mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
bf9662f6fa
MDEV-27277 added warnings on truncation during sorting for SELECTs but did not for DML operations. However, UPDATEs and DELETEs may also perform sorting and thus produce warnings. This commit fixes that
34345 lines
1.1 MiB
34345 lines
1.1 MiB
/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
|
|
Copyright (c) 2009, 2024, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
/**
|
|
@file
|
|
|
|
@brief
|
|
mysql_select and join optimization
|
|
|
|
|
|
@defgroup Query_Optimizer Query Optimizer
|
|
@{
|
|
*/
|
|
|
|
#ifdef USE_PRAGMA_IMPLEMENTATION
|
|
#pragma implementation // gcc: Class implementation
|
|
#endif
|
|
|
|
#include "mariadb.h"
|
|
#include "sql_priv.h"
|
|
#include "unireg.h"
|
|
#include "sql_select.h"
|
|
#include "sql_cache.h" // query_cache_*
|
|
#include "sql_table.h" // primary_key_name
|
|
#include "probes_mysql.h"
|
|
#include "key.h" // key_copy, key_cmp, key_cmp_if_same
|
|
#include "lock.h" // mysql_unlock_some_tables,
|
|
// mysql_unlock_read_tables
|
|
#include "sql_show.h" // append_identifier
|
|
#include "sql_base.h" // setup_wild, setup_fields, fill_record
|
|
#include "sql_parse.h" // check_stack_overrun
|
|
#include "sql_partition.h" // make_used_partitions_str
|
|
#include "sql_test.h" // print_where, print_keyuse_array,
|
|
// print_sjm, print_plan, TEST_join
|
|
#include "records.h" // init_read_record, end_read_record
|
|
#include "filesort.h" // filesort_free_buffers
|
|
#include "filesort_utils.h" // get_qsort_sort_cost
|
|
#include "sql_union.h" // mysql_union
|
|
#include "opt_subselect.h"
|
|
#include "sql_derived.h"
|
|
#include "sql_statistics.h"
|
|
#include "sql_cte.h"
|
|
#include "sql_window.h"
|
|
#include "tztime.h"
|
|
|
|
#include "debug_sync.h" // DEBUG_SYNC
|
|
#include <m_ctype.h>
|
|
#include <my_bit.h>
|
|
#include <hash.h>
|
|
#include <ft_global.h>
|
|
#include "sys_vars_shared.h"
|
|
#include "sp_head.h"
|
|
#include "sp_rcontext.h"
|
|
#include "rowid_filter.h"
|
|
#include "select_handler.h"
|
|
#include "my_json_writer.h"
|
|
#include "opt_trace.h"
|
|
#include "derived_handler.h"
|
|
#include "create_tmp_table.h"
|
|
#include "optimizer_defaults.h"
|
|
#include "derived_handler.h"
|
|
|
|
/*
|
|
A key part number that means we're using a fulltext scan.
|
|
|
|
In order not to confuse it with regular equalities, we need to pick
|
|
a number that's greater than MAX_REF_PARTS.
|
|
|
|
Hash Join code stores field->field_index in KEYUSE::keypart, so the
|
|
number needs to be bigger than MAX_FIELDS, also.
|
|
|
|
CAUTION: sql_test.cc has its own definition of FT_KEYPART.
|
|
*/
|
|
#define FT_KEYPART (MAX_FIELDS+10)
|
|
|
|
/*
|
|
We assume that when we do hash join, only 10 % rows in the hash will
|
|
match the current found row.
|
|
*/
|
|
#define HASH_FANOUT 0.1
|
|
|
|
/*
|
|
The following is used to check that A <= B, but with some margin as the
|
|
calculation is done slightly differently (mathematically correct, but
|
|
double calculations are not exact).
|
|
This is only used when comparing read rows and output rows, which
|
|
means that we can assume that both values are >= 0 and B cannot be notable
|
|
smaller than A.
|
|
*/
|
|
|
|
#define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001)
|
|
|
|
#define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A))
|
|
|
|
#define double_to_ulonglong(A) ((A) >= ((double)ULONGLONG_MAX) ? ULONGLONG_MAX : (ulonglong) (A))
|
|
|
|
/* Used to ensure that costs are calculate the same way */
|
|
inline bool compare_cost(double a, double b)
|
|
{
|
|
DBUG_ASSERT(a >= 0.0 && b >= 0.0);
|
|
return (a >= b - b/10000000.0 && a <= b+b/10000000.0);
|
|
}
|
|
|
|
inline double safe_filtered(double a, double b)
|
|
{
|
|
return b != 0 ? a/b*100.0 : 0.0;
|
|
}
|
|
|
|
const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
|
|
"MAYBE_REF","ALL","range","index","fulltext",
|
|
"ref_or_null","unique_subquery","index_subquery",
|
|
"index_merge", "hash_ALL", "hash_range",
|
|
"hash_index", "hash_index_merge" };
|
|
|
|
static const Lex_ident_column group_key= "group_key"_Lex_ident_column;
|
|
static const Lex_ident_column distinct_key= "distinct_key"_Lex_ident_column;
|
|
|
|
struct st_sargable_param;
|
|
|
|
static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves,
|
|
DYNAMIC_ARRAY *keyuse);
|
|
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
|
|
JOIN_TAB *join_tab,
|
|
uint tables, COND *conds,
|
|
table_map table_map, SELECT_LEX *select_lex,
|
|
SARGABLE_PARAM **sargables);
|
|
static int sort_keyuse(KEYUSE *a,KEYUSE *b);
|
|
static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables);
|
|
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
|
|
bool allow_full_scan, table_map used_tables);
|
|
static bool get_quick_record_count(THD *thd, SQL_SELECT *select,
|
|
TABLE *table,
|
|
const key_map *keys,ha_rows limit,
|
|
ha_rows *quick_count);
|
|
static void optimize_straight_join(JOIN *join, table_map join_tables);
|
|
static bool greedy_search(JOIN *join, table_map remaining_tables,
|
|
uint depth, uint use_cond_selectivity);
|
|
|
|
enum enum_best_search {
|
|
SEARCH_ABORT= -2,
|
|
SEARCH_ERROR= -1,
|
|
SEARCH_OK= 0,
|
|
SEARCH_FOUND_EDGE=1
|
|
};
|
|
|
|
static enum_best_search
|
|
best_extension_by_limited_search(JOIN *join,
|
|
table_map remaining_tables,
|
|
uint idx, double record_count,
|
|
double read_time, uint depth,
|
|
uint use_cond_selectivity,
|
|
table_map *processed_eq_ref_tables);
|
|
static uint determine_search_depth(JOIN* join);
|
|
C_MODE_START
|
|
static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
|
|
static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
|
|
static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
|
|
C_MODE_END
|
|
static uint cache_record_length(JOIN *join,uint index);
|
|
static store_key *get_store_key(THD *thd,
|
|
KEYUSE *keyuse, table_map used_tables,
|
|
KEY_PART_INFO *key_part, uchar *key_buff,
|
|
uint maybe_null);
|
|
static bool make_outerjoin_info(JOIN *join);
|
|
static Item*
|
|
make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables,
|
|
table_map sjm_tables, bool inside_or_clause);
|
|
static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
|
|
static void revise_cache_usage(JOIN_TAB *join_tab);
|
|
static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
|
|
static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
|
|
static void update_depend_map(JOIN *join);
|
|
static void update_depend_map_for_order(JOIN *join, ORDER *order);
|
|
static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
|
|
bool change_list, bool *simple_order);
|
|
static int return_zero_rows(JOIN *join, select_result *res,
|
|
List<TABLE_LIST> *tables,
|
|
List<Item> *fields, bool send_row,
|
|
ulonglong select_options, const char *info,
|
|
Item *having, List<Item> *all_fields);
|
|
static COND *build_equal_items(JOIN *join, COND *cond,
|
|
COND_EQUAL *inherited,
|
|
List<TABLE_LIST> *join_list,
|
|
bool ignore_on_conds,
|
|
COND_EQUAL **cond_equal_ref,
|
|
bool link_equal_fields= FALSE);
|
|
static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab,
|
|
COND *cond,
|
|
COND_EQUAL *cond_equal,
|
|
void *table_join_idx,
|
|
bool do_substitution);
|
|
static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
|
|
COND *conds, bool top, bool in_sj);
|
|
static bool check_interleaving_with_nj(JOIN_TAB *next);
|
|
static void restore_prev_nj_state(JOIN_TAB *last);
|
|
static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
|
|
static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
|
|
uint first_unused);
|
|
|
|
static COND *optimize_cond(JOIN *join, COND *conds,
|
|
List<TABLE_LIST> *join_list,
|
|
bool ignore_on_conds,
|
|
Item::cond_result *cond_value,
|
|
COND_EQUAL **cond_equal,
|
|
int flags= 0);
|
|
bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
|
|
static int do_select(JOIN *join, Procedure *procedure);
|
|
|
|
static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int);
|
|
static enum_nested_loop_state
|
|
evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
|
|
static enum_nested_loop_state
|
|
end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
static enum_nested_loop_state
|
|
end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
static enum_nested_loop_state
|
|
end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
static enum_nested_loop_state
|
|
end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
|
|
static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos);
|
|
static int join_read_system(JOIN_TAB *tab);
|
|
static int join_read_const(JOIN_TAB *tab);
|
|
static int join_read_key(JOIN_TAB *tab);
|
|
static void join_read_key_unlock_row(st_join_table *tab);
|
|
static void join_const_unlock_row(JOIN_TAB *tab);
|
|
static int join_read_always_key(JOIN_TAB *tab);
|
|
static int join_read_last_key(JOIN_TAB *tab);
|
|
static int join_no_more_records(READ_RECORD *info);
|
|
static int join_read_next(READ_RECORD *info);
|
|
static int join_init_quick_read_record(JOIN_TAB *tab);
|
|
static quick_select_return test_if_quick_select(JOIN_TAB *tab);
|
|
static int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
|
|
static int join_read_first(JOIN_TAB *tab);
|
|
static int join_read_next(READ_RECORD *info);
|
|
static int join_read_next_same(READ_RECORD *info);
|
|
static int join_read_last(JOIN_TAB *tab);
|
|
static int join_read_prev_same(READ_RECORD *info);
|
|
static int join_read_prev(READ_RECORD *info);
|
|
static int join_ft_read_first(JOIN_TAB *tab);
|
|
static int join_ft_read_next(READ_RECORD *info);
|
|
int join_read_always_key_or_null(JOIN_TAB *tab);
|
|
int join_read_next_same_or_null(READ_RECORD *info);
|
|
static COND *make_cond_for_table(THD *thd, Item *cond,table_map table,
|
|
table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond,
|
|
bool retain_ref_cond);
|
|
static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
|
|
Item *cond,
|
|
table_map tables,
|
|
table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond,
|
|
bool retain_ref_cond,
|
|
bool is_top_and_level);
|
|
|
|
static Item* part_of_refkey(TABLE *form,Field *field);
|
|
static bool test_if_cheaper_ordering(bool in_join_optimizer,
|
|
const JOIN_TAB *tab,
|
|
ORDER *order, TABLE *table,
|
|
key_map usable_keys, int key,
|
|
ha_rows select_limit,
|
|
int *new_key, int *new_key_direction,
|
|
ha_rows *new_select_limit,
|
|
double *new_read_time,
|
|
uint *new_used_key_parts= NULL,
|
|
uint *saved_best_key_parts= NULL);
|
|
static int test_if_order_by_key(JOIN *, ORDER *, TABLE *, uint, uint *);
|
|
static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
|
|
ha_rows select_limit, bool no_changes,
|
|
const key_map *map,
|
|
bool *fatal_error);
|
|
static bool list_contains_unique_index(TABLE *table,
|
|
bool (*find_func) (Field *, void *), void *data);
|
|
static bool find_field_in_item_list (Field *field, void *data);
|
|
static bool find_field_in_order_list (Field *field, void *data);
|
|
int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort);
|
|
static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
|
|
SORT_FIELD *sortorder, ulong keylength,
|
|
Item *having);
|
|
static int remove_dup_with_hash_index(THD *thd,TABLE *table,
|
|
uint field_count, Field **first_field,
|
|
SORT_FIELD *sortorder,
|
|
ulong key_length,Item *having);
|
|
static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
|
|
static bool setup_new_fields(THD *thd, List<Item> &fields,
|
|
List<Item> &all_fields, ORDER *new_order);
|
|
static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
ORDER *order, List<Item> &fields,
|
|
List<Item> &all_fields,
|
|
bool *all_order_by_fields_used);
|
|
static bool test_if_subpart(ORDER *group_by, ORDER *order_by);
|
|
static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables,
|
|
table_map const_tables);
|
|
static void calc_group_buffer(JOIN *join, ORDER *group);
|
|
static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
|
|
static bool alloc_group_fields(JOIN *join, ORDER *group);
|
|
static bool alloc_order_fields(JOIN *join, ORDER *group,
|
|
uint max_number_of_elements);
|
|
// Create list for using with tempory table
|
|
static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &new_list1,
|
|
List<Item> &new_list2,
|
|
uint elements, List<Item> &items);
|
|
// Create list for using with tempory table
|
|
static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &new_list1,
|
|
List<Item> &new_list2,
|
|
uint elements, List<Item> &items);
|
|
static void init_tmptable_sum_functions(Item_sum **func);
|
|
static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
|
|
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
|
|
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
|
|
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
|
|
static bool init_sum_functions(Item_sum **func, Item_sum **end);
|
|
static bool update_sum_func(Item_sum **func);
|
|
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
|
|
bool distinct, const char *message=NullS);
|
|
static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
|
|
static uint make_join_orderinfo(JOIN *join);
|
|
static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
|
|
|
|
Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
|
|
bool *inherited_fl);
|
|
JOIN_TAB *first_depth_first_tab(JOIN* join);
|
|
JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab);
|
|
|
|
static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
|
|
uint n_top_tabs_count, JOIN_TAB *tab);
|
|
static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
|
|
List<Item> &, List<Item> &, bool, bool, bool);
|
|
|
|
static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|
table_map rem_tables,
|
|
double *records_out);
|
|
void set_postjoin_aggr_write_func(JOIN_TAB *tab);
|
|
|
|
static Item **get_sargable_cond(JOIN *join, TABLE *table);
|
|
|
|
bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item);
|
|
|
|
void print_list_item(String *str, List_item *list,
|
|
enum_query_type query_type);
|
|
|
|
static
|
|
bool build_notnull_conds_for_range_scans(JOIN *join, COND *cond,
|
|
table_map allowed);
|
|
static
|
|
void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
|
|
TABLE_LIST *nest_tbl);
|
|
static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex);
|
|
static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond);
|
|
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
|
|
Item *cond);
|
|
static double prev_record_reads(const POSITION *positions, uint idx,
|
|
table_map found_ref, double record_count,
|
|
double *same_keys);
|
|
|
|
static
|
|
bool join_limit_shortcut_is_applicable(const JOIN *join);
|
|
POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost);
|
|
|
|
static
|
|
bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
|
|
key_map *usable_keys);
|
|
static
|
|
void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table,
|
|
Item_field *item_field,
|
|
key_map *col_keys);
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
/*
|
|
SHOW EXPLAIN testing: wait for, and serve n_calls APC requests.
|
|
*/
|
|
void dbug_serve_apcs(THD *thd, int n_calls)
|
|
{
|
|
const char *save_proc_info= thd->proc_info;
|
|
|
|
/* Busy-wait for n_calls APC requests to arrive and be processed */
|
|
int n_apcs= thd->apc_target.n_calls_processed + n_calls;
|
|
while (thd->apc_target.n_calls_processed < n_apcs)
|
|
{
|
|
/* This is so that mysqltest knows we're ready to serve requests: */
|
|
thd_proc_info(thd, "show_explain_trap");
|
|
my_sleep(30000);
|
|
thd_proc_info(thd, save_proc_info);
|
|
if (unlikely(thd->check_killed(1)))
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Debugging: check if @name=value, comparing as integer
|
|
|
|
Intended usage:
|
|
|
|
DBUG_EXECUTE_IF("show_explain_probe_2",
|
|
if (dbug_user_var_equals_int(thd, "select_id", select_id))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
|
|
*/
|
|
|
|
bool dbug_user_var_equals_int(THD *thd, const char *name, int value)
|
|
{
|
|
user_var_entry *var;
|
|
LEX_CSTRING varname= { name, strlen(name)};
|
|
if ((var= get_variable(&thd->user_vars, &varname, FALSE)))
|
|
{
|
|
bool null_value;
|
|
longlong var_value= var->val_int(&null_value);
|
|
if (!null_value && var_value == value)
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
Debugging : check if @name= value, comparing as string
|
|
|
|
Intended usage :
|
|
|
|
DBUG_EXECUTE_IF("log_slow_statement_end",
|
|
if (dbug_user_var_equals_str(thd, "show_explain_probe_query",
|
|
thd->query()))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
*/
|
|
|
|
bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value)
|
|
{
|
|
user_var_entry *var;
|
|
LEX_CSTRING varname= {name, strlen(name)};
|
|
if ((var= get_variable(&thd->user_vars, &varname, FALSE)))
|
|
{
|
|
bool null_value;
|
|
String str;
|
|
auto var_value= var->val_str(&null_value, &str, 10)->ptr();
|
|
if (!null_value && !strncmp(var_value, value, strlen(value)))
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
#endif /* DBUG_OFF */
|
|
|
|
/*
|
|
Intialize POSITION structure.
|
|
*/
|
|
|
|
POSITION::POSITION()
|
|
{
|
|
table= 0;
|
|
records_read= cond_selectivity= read_time= records_out= records_init= 0.0;
|
|
prefix_record_count= 0.0;
|
|
key= 0;
|
|
forced_index= 0;
|
|
use_join_buffer= 0;
|
|
firstmatch_with_join_buf= false;
|
|
sj_strategy= SJ_OPT_NONE;
|
|
n_sj_tables= 0;
|
|
spl_plan= 0;
|
|
range_rowid_filter_info= 0;
|
|
ref_depend_map= dups_producing_tables= 0;
|
|
inner_tables_handled_with_other_sjs= 0;
|
|
type= JT_UNKNOWN;
|
|
key_dependent= 0;
|
|
dups_weedout_picker.set_empty();
|
|
firstmatch_picker.set_empty();
|
|
loosescan_picker.set_empty();
|
|
sjmat_picker.set_empty();
|
|
}
|
|
|
|
|
|
void JOIN::init(THD *thd_arg, List<Item> &fields_arg,
|
|
ulonglong select_options_arg, select_result *result_arg)
|
|
{
|
|
join_tab= 0;
|
|
table= 0;
|
|
table_count= 0;
|
|
top_join_tab_count= 0;
|
|
const_tables= 0;
|
|
const_table_map= found_const_table_map= not_usable_rowid_map= 0;
|
|
aggr_tables= 0;
|
|
eliminated_tables= 0;
|
|
join_list= 0;
|
|
implicit_grouping= FALSE;
|
|
sort_and_group= 0;
|
|
first_record= 0;
|
|
do_send_rows= 1;
|
|
duplicate_rows= send_records= 0;
|
|
found_records= accepted_rows= 0;
|
|
fetch_limit= HA_POS_ERROR;
|
|
thd= thd_arg;
|
|
sum_funcs= sum_funcs2= 0;
|
|
procedure= 0;
|
|
having= tmp_having= having_history= 0;
|
|
having_is_correlated= false;
|
|
group_list_for_estimates= 0;
|
|
select_options= select_options_arg;
|
|
result= result_arg;
|
|
lock= thd_arg->lock;
|
|
select_lex= 0; //for safety
|
|
select_distinct= MY_TEST(select_options & SELECT_DISTINCT);
|
|
no_order= 0;
|
|
simple_order= 0;
|
|
simple_group= 0;
|
|
ordered_index_usage= ordered_index_void;
|
|
need_distinct= 0;
|
|
skip_sort_order= 0;
|
|
with_two_phase_optimization= 0;
|
|
save_qep= 0;
|
|
spl_opt_info= 0;
|
|
ext_keyuses_for_splitting= 0;
|
|
spl_opt_info= 0;
|
|
need_tmp= 0;
|
|
hidden_group_fields= 0; /*safety*/
|
|
error= 0;
|
|
select= 0;
|
|
return_tab= 0;
|
|
ref_ptrs.reset();
|
|
items0.reset();
|
|
items1.reset();
|
|
items2.reset();
|
|
items3.reset();
|
|
zero_result_cause= 0;
|
|
optimization_state= JOIN::NOT_OPTIMIZED;
|
|
have_query_plan= QEP_NOT_PRESENT_YET;
|
|
initialized= 0;
|
|
cleaned= 0;
|
|
cond_equal= 0;
|
|
having_equal= 0;
|
|
exec_const_cond= 0;
|
|
group_optimized_away= 0;
|
|
no_rows_in_result_called= 0;
|
|
positions= best_positions= 0;
|
|
pushdown_query= 0;
|
|
original_join_tab= 0;
|
|
explain= NULL;
|
|
tmp_table_keep_current_rowid= 0;
|
|
allowed_top_level_tables= 0;
|
|
|
|
all_fields= fields_arg;
|
|
if (&fields_list != &fields_arg) /* Avoid valgrind-warning */
|
|
fields_list= fields_arg;
|
|
non_agg_fields.empty();
|
|
bzero((char*) &keyuse,sizeof(keyuse));
|
|
having_value= Item::COND_UNDEF;
|
|
tmp_table_param.init();
|
|
tmp_table_param.end_write_records= HA_POS_ERROR;
|
|
rollup.state= ROLLUP::STATE_NONE;
|
|
|
|
no_const_tables= FALSE;
|
|
first_select= sub_select;
|
|
group_sent= 0;
|
|
|
|
outer_ref_cond= pseudo_bits_cond= NULL;
|
|
in_to_exists_where= NULL;
|
|
in_to_exists_having= NULL;
|
|
emb_sjm_nest= NULL;
|
|
sjm_lookup_tables= 0;
|
|
sjm_scan_tables= 0;
|
|
is_orig_degenerated= false;
|
|
with_ties_order_count= 0;
|
|
prepared= false;
|
|
sql_cmd_dml= NULL;
|
|
};
|
|
|
|
|
|
static void trace_table_dependencies(THD *thd,
|
|
JOIN_TAB *join_tabs, uint table_count)
|
|
{
|
|
DBUG_ASSERT(thd->trace_started());
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_array trace_dep(thd, "table_dependencies");
|
|
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
TABLE_LIST *table_ref= join_tabs[i].tab_list;
|
|
Json_writer_object trace_one_table(thd);
|
|
trace_one_table.
|
|
add_table_name(&join_tabs[i]).
|
|
add("row_may_be_null",
|
|
(bool)table_ref->table->maybe_null);
|
|
const table_map map= table_ref->get_map();
|
|
DBUG_ASSERT(map < (1ULL << table_count));
|
|
for (uint j= 0; j < table_count; j++)
|
|
{
|
|
if (map & (1ULL << j))
|
|
{
|
|
trace_one_table.add("map_bit", j);
|
|
break;
|
|
}
|
|
}
|
|
Json_writer_array depends_on(thd, "depends_on_map_bits");
|
|
Table_map_iterator it(join_tabs[i].dependent);
|
|
uint dep_bit;
|
|
while ((dep_bit= it++) != Table_map_iterator::BITMAP_END)
|
|
depends_on.add(static_cast<longlong>(dep_bit));
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
This handles SELECT with and without UNION.
|
|
*/
|
|
|
|
bool handle_select(THD *thd, LEX *lex, select_result *result,
|
|
ulonglong setup_tables_done_option)
|
|
{
|
|
bool res;
|
|
SELECT_LEX *select_lex= lex->first_select_lex();
|
|
DBUG_ENTER("handle_select");
|
|
MYSQL_SELECT_START(thd->query());
|
|
|
|
if (select_lex->master_unit()->is_unit_op() ||
|
|
select_lex->master_unit()->fake_select_lex)
|
|
res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option);
|
|
else
|
|
{
|
|
SELECT_LEX_UNIT *unit= &lex->unit;
|
|
unit->set_limit(unit->global_parameters());
|
|
/*
|
|
'options' of mysql_select will be set in JOIN, as far as JOIN for
|
|
every PS/SP execution new, we will not need reset this flag if
|
|
setup_tables_done_option changed for next rexecution
|
|
*/
|
|
res= mysql_select(thd,
|
|
select_lex->table_list.first,
|
|
select_lex->item_list,
|
|
select_lex->where,
|
|
select_lex->order_list.elements +
|
|
select_lex->group_list.elements,
|
|
select_lex->order_list.first,
|
|
select_lex->group_list.first,
|
|
select_lex->having,
|
|
lex->proc_list.first,
|
|
select_lex->options | thd->variables.option_bits |
|
|
setup_tables_done_option,
|
|
result, unit, select_lex);
|
|
}
|
|
DBUG_PRINT("info",("res: %d is_error(): %d", res,
|
|
thd->is_error()));
|
|
res|= thd->is_error();
|
|
if (unlikely(res))
|
|
result->abort_result_set();
|
|
if (unlikely(thd->killed == ABORT_QUERY && !thd->no_errors))
|
|
{
|
|
/*
|
|
If LIMIT ROWS EXAMINED interrupted query execution, issue a warning,
|
|
continue with normal processing and produce an incomplete query result.
|
|
*/
|
|
bool saved_abort_on_warning= thd->abort_on_warning;
|
|
thd->abort_on_warning= false;
|
|
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
|
|
ER_QUERY_RESULT_INCOMPLETE,
|
|
ER_THD(thd, ER_QUERY_RESULT_INCOMPLETE),
|
|
"LIMIT ROWS EXAMINED",
|
|
thd->lex->limit_rows_examined->val_uint());
|
|
thd->abort_on_warning= saved_abort_on_warning;
|
|
thd->reset_killed();
|
|
}
|
|
/* Disable LIMIT ROWS EXAMINED after query execution. */
|
|
thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
|
|
|
|
MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
Fix fields referenced from inner selects.
|
|
|
|
@param thd Thread handle
|
|
@param all_fields List of all fields used in select
|
|
@param select Current select
|
|
@param ref_pointer_array Array of references to Items used in current select
|
|
@param group_list GROUP BY list (is NULL by default)
|
|
|
|
@details
|
|
The function serves 3 purposes
|
|
|
|
- adds fields referenced from inner query blocks to the current select list
|
|
|
|
- Decides which class to use to reference the items (Item_ref or
|
|
Item_direct_ref)
|
|
|
|
- fixes references (Item_ref objects) to these fields.
|
|
|
|
If a field isn't already on the select list and the ref_pointer_array
|
|
is provided then it is added to the all_fields list and the pointer to
|
|
it is saved in the ref_pointer_array.
|
|
|
|
The class to access the outer field is determined by the following rules:
|
|
|
|
-#. If the outer field isn't used under an aggregate function then the
|
|
Item_ref class should be used.
|
|
|
|
-#. If the outer field is used under an aggregate function and this
|
|
function is, in turn, aggregated in the query block where the outer
|
|
field was resolved or some query nested therein, then the
|
|
Item_direct_ref class should be used. Also it should be used if we are
|
|
grouping by a subquery that references this outer field.
|
|
|
|
The resolution is done here and not at the fix_fields() stage as
|
|
it can be done only after aggregate functions are fixed and pulled up to
|
|
selects where they are to be aggregated.
|
|
|
|
When the class is chosen it substitutes the original field in the
|
|
Item_outer_ref object.
|
|
|
|
After this we proceed with fixing references (Item_outer_ref objects) to
|
|
this field from inner subqueries.
|
|
|
|
@return Status
|
|
@retval true An error occurred.
|
|
@retval false OK.
|
|
*/
|
|
|
|
bool
|
|
fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
|
|
Ref_ptr_array ref_pointer_array)
|
|
{
|
|
Item_outer_ref *ref;
|
|
|
|
/*
|
|
Mark the references from the inner_refs_list that are occurred in
|
|
the group by expressions. Those references will contain direct
|
|
references to the referred fields. The markers are set in
|
|
the found_in_group_by field of the references from the list.
|
|
*/
|
|
List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list);
|
|
for (ORDER *group= select->join->group_list; group; group= group->next)
|
|
{
|
|
(*group->item)->walk(&Item::check_inner_refs_processor, TRUE, &ref_it);
|
|
}
|
|
|
|
while ((ref= ref_it++))
|
|
{
|
|
bool direct_ref= false;
|
|
Item *item= ref->outer_ref;
|
|
Item **item_ref= ref->ref;
|
|
Item_ref *new_ref;
|
|
/*
|
|
TODO: this field item already might be present in the select list.
|
|
In this case instead of adding new field item we could use an
|
|
existing one. The change will lead to less operations for copying fields,
|
|
smaller temporary tables and less data passed through filesort.
|
|
*/
|
|
if (!ref_pointer_array.is_null() && !ref->found_in_select_list)
|
|
{
|
|
int el= all_fields.elements;
|
|
ref_pointer_array[el]= item;
|
|
/* Add the field item to the select list of the current select. */
|
|
all_fields.push_front(item, thd->mem_root);
|
|
/*
|
|
If it's needed reset each Item_ref item that refers this field with
|
|
a new reference taken from ref_pointer_array.
|
|
*/
|
|
item_ref= &ref_pointer_array[el];
|
|
}
|
|
|
|
if (ref->in_sum_func)
|
|
{
|
|
Item_sum *sum_func;
|
|
if (ref->in_sum_func->nest_level > select->nest_level)
|
|
direct_ref= TRUE;
|
|
else
|
|
{
|
|
for (sum_func= ref->in_sum_func; sum_func &&
|
|
sum_func->aggr_level >= select->nest_level;
|
|
sum_func= sum_func->in_sum_func)
|
|
{
|
|
if (sum_func->aggr_level == select->nest_level)
|
|
{
|
|
direct_ref= TRUE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (ref->found_in_group_by)
|
|
direct_ref= TRUE;
|
|
|
|
new_ref= direct_ref ?
|
|
new (thd->mem_root) Item_direct_ref(thd, ref->context, item_ref, ref->table_name,
|
|
ref->field_name, ref->alias_name_used) :
|
|
new (thd->mem_root) Item_ref(thd, ref->context, item_ref, ref->table_name,
|
|
ref->field_name, ref->alias_name_used);
|
|
if (!new_ref)
|
|
return TRUE;
|
|
ref->outer_ref= new_ref;
|
|
ref->ref= &ref->outer_ref;
|
|
|
|
if (ref->fix_fields_if_needed(thd, 0))
|
|
return TRUE;
|
|
thd->lex->used_tables|= item->used_tables();
|
|
thd->lex->current_select->select_list_tables|= item->used_tables();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
The following clauses are redundant for subqueries:
|
|
|
|
DISTINCT
|
|
GROUP BY if there are no aggregate functions and no HAVING
|
|
clause
|
|
|
|
Because redundant clauses are removed both from JOIN and
|
|
select_lex, the removal is permanent. Thus, it only makes sense to
|
|
call this function for normal queries and on first execution of
|
|
SP/PS
|
|
|
|
@param subq_select_lex select_lex that is part of a subquery
|
|
predicate. This object and the associated
|
|
join is modified.
|
|
*/
|
|
|
|
static
|
|
void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex)
|
|
{
|
|
DBUG_ENTER("remove_redundant_subquery_clauses");
|
|
Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
|
|
/*
|
|
The removal should happen for IN, ALL, ANY and EXISTS subqueries,
|
|
which means all but single row subqueries. Example single row
|
|
subqueries:
|
|
a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>)
|
|
b) SELECT a, (<single row subquery) FROM t1
|
|
*/
|
|
if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
|
|
DBUG_VOID_RETURN;
|
|
|
|
/* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */
|
|
DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
|
|
subq_predicate->is_in_predicate());
|
|
|
|
if (subq_select_lex->options & SELECT_DISTINCT)
|
|
{
|
|
subq_select_lex->join->select_distinct= false;
|
|
subq_select_lex->options&= ~SELECT_DISTINCT;
|
|
DBUG_PRINT("info", ("DISTINCT removed"));
|
|
}
|
|
|
|
/*
|
|
Remove GROUP BY if there are no aggregate functions and no HAVING
|
|
clause
|
|
*/
|
|
if (subq_select_lex->group_list.elements &&
|
|
!subq_select_lex->with_sum_func && !subq_select_lex->join->having)
|
|
{
|
|
/*
|
|
Temporary workaround for MDEV-28621: Do not remove GROUP BY expression
|
|
if it has any subqueries in it.
|
|
*/
|
|
bool have_subquery= false;
|
|
for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next)
|
|
{
|
|
if ((*ord->item)->with_subquery())
|
|
{
|
|
have_subquery= true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!have_subquery)
|
|
{
|
|
for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next)
|
|
{
|
|
/*
|
|
Do not remove the item if it is used in select list and then referred
|
|
from GROUP BY clause by its name or number. Example:
|
|
|
|
select (select ... ) as SUBQ ... group by SUBQ
|
|
|
|
Here SUBQ cannot be removed.
|
|
*/
|
|
if (!ord->in_field_list)
|
|
{
|
|
/*
|
|
Not necessary due to workaround for MDEV-28621:
|
|
(*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
|
|
*/
|
|
/*
|
|
Remove from the JOIN::all_fields list any reference to the elements
|
|
of the eliminated GROUP BY list unless it is 'in_field_list'.
|
|
This is needed in order not to confuse JOIN::make_aggr_tables_info()
|
|
when it constructs different structure for execution phase.
|
|
*/
|
|
List_iterator<Item> li(subq_select_lex->join->all_fields);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
if (item == *ord->item)
|
|
li.remove();
|
|
}
|
|
}
|
|
}
|
|
subq_select_lex->join->group_list= NULL;
|
|
subq_select_lex->group_list.empty();
|
|
DBUG_PRINT("info", ("GROUP BY removed"));
|
|
}
|
|
}
|
|
|
|
/*
|
|
TODO: This would prevent processing quries with ORDER BY ... LIMIT
|
|
therefore we disable this optimization for now.
|
|
Remove GROUP BY if there are no aggregate functions and no HAVING
|
|
clause
|
|
if (subq_select_lex->group_list.elements &&
|
|
!subq_select_lex->with_sum_func && !subq_select_lex->join->having)
|
|
{
|
|
subq_select_lex->join->group_list= NULL;
|
|
subq_select_lex->group_list.empty();
|
|
}
|
|
*/
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Function to setup clauses without sum functions.
|
|
*/
|
|
static inline int
|
|
setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
TABLE_LIST *tables,
|
|
List<TABLE_LIST> &leaves,
|
|
List<Item> &fields,
|
|
List<Item> &all_fields,
|
|
COND **conds,
|
|
ORDER *order,
|
|
ORDER *group,
|
|
List<Window_spec> &win_specs,
|
|
List<Item_window_func> &win_funcs,
|
|
bool *hidden_group_fields)
|
|
{
|
|
int res;
|
|
enum_parsing_place save_place;
|
|
st_select_lex *const select= thd->lex->current_select;
|
|
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
|
|
/*
|
|
Need to stave the value, so we can turn off only any new non_agg_field_used
|
|
additions coming from the WHERE
|
|
*/
|
|
const bool saved_non_agg_field_used= select->non_agg_field_used();
|
|
DBUG_ENTER("setup_without_group");
|
|
|
|
thd->lex->allow_sum_func.clear_bit(select->nest_level);
|
|
res= setup_conds(thd, tables, leaves, conds);
|
|
|
|
/* it's not wrong to have non-aggregated columns in a WHERE */
|
|
select->set_non_agg_field_used(saved_non_agg_field_used);
|
|
|
|
thd->lex->allow_sum_func.set_bit(select->nest_level);
|
|
|
|
save_place= thd->lex->current_select->context_analysis_place;
|
|
thd->lex->current_select->context_analysis_place= IN_ORDER_BY;
|
|
res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
|
|
order);
|
|
thd->lex->allow_sum_func.clear_bit(select->nest_level);
|
|
thd->lex->current_select->context_analysis_place= IN_GROUP_BY;
|
|
res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
|
|
group, hidden_group_fields);
|
|
thd->lex->current_select->context_analysis_place= save_place;
|
|
thd->lex->allow_sum_func.set_bit(select->nest_level);
|
|
res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields,
|
|
win_specs, win_funcs);
|
|
thd->lex->allow_sum_func= save_allow_sum_func;
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
bool vers_select_conds_t::init_from_sysvar(THD *thd)
|
|
{
|
|
vers_asof_timestamp_t &in= thd->variables.vers_asof_timestamp;
|
|
type= (vers_system_time_t) in.type;
|
|
delete_history= false;
|
|
start.unit= VERS_TIMESTAMP;
|
|
if (type != SYSTEM_TIME_UNSPECIFIED && type != SYSTEM_TIME_ALL)
|
|
{
|
|
DBUG_ASSERT(type == SYSTEM_TIME_AS_OF);
|
|
Datetime dt(in.unix_time, in.second_part, thd->variables.time_zone);
|
|
|
|
start.item= new (thd->mem_root)
|
|
Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS);
|
|
if (!start.item)
|
|
return true;
|
|
}
|
|
else
|
|
start.item= NULL;
|
|
end.empty();
|
|
return false;
|
|
}
|
|
|
|
void vers_select_conds_t::print(String *str, enum_query_type query_type) const
|
|
{
|
|
switch (orig_type) {
|
|
case SYSTEM_TIME_UNSPECIFIED:
|
|
break;
|
|
case SYSTEM_TIME_AS_OF:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME AS OF "));
|
|
break;
|
|
case SYSTEM_TIME_FROM_TO:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME FROM "));
|
|
end.print(str, query_type, STRING_WITH_LEN(" TO "));
|
|
break;
|
|
case SYSTEM_TIME_BETWEEN:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BETWEEN "));
|
|
end.print(str, query_type, STRING_WITH_LEN(" AND "));
|
|
break;
|
|
case SYSTEM_TIME_BEFORE:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BEFORE "));
|
|
break;
|
|
case SYSTEM_TIME_HISTORY:
|
|
// nothing to add
|
|
break;
|
|
case SYSTEM_TIME_ALL:
|
|
str->append(STRING_WITH_LEN(" FOR SYSTEM_TIME ALL"));
|
|
break;
|
|
}
|
|
}
|
|
|
|
static
|
|
Item* period_get_condition(THD *thd, TABLE_LIST *table, SELECT_LEX *select,
|
|
vers_select_conds_t *conds, bool timestamp)
|
|
{
|
|
DBUG_ASSERT(table);
|
|
DBUG_ASSERT(table->table);
|
|
#define newx new (thd->mem_root)
|
|
TABLE_SHARE *share= table->table->s;
|
|
const TABLE_SHARE::period_info_t *period= conds->period;
|
|
|
|
const LEX_CSTRING &fstart= period->start_field(share)->field_name;
|
|
const LEX_CSTRING &fend= period->end_field(share)->field_name;
|
|
|
|
conds->field_start= newx Item_field(thd, &select->context,
|
|
table->db, table->alias,
|
|
thd->strmake_lex_cstring(fstart));
|
|
conds->field_end= newx Item_field(thd, &select->context,
|
|
table->db, table->alias,
|
|
thd->strmake_lex_cstring(fend));
|
|
|
|
Item *cond1= NULL, *cond2= NULL, *cond3= NULL, *curr= NULL;
|
|
if (timestamp)
|
|
{
|
|
MYSQL_TIME max_time;
|
|
switch (conds->type)
|
|
{
|
|
case SYSTEM_TIME_UNSPECIFIED:
|
|
case SYSTEM_TIME_HISTORY:
|
|
{
|
|
thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE);
|
|
max_time.second_part= TIME_MAX_SECOND_PART;
|
|
Datetime dt(&max_time);
|
|
curr= newx Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS);
|
|
if (conds->type == SYSTEM_TIME_UNSPECIFIED)
|
|
cond1= newx Item_func_eq(thd, conds->field_end, curr);
|
|
else
|
|
cond1= newx Item_func_lt(thd, conds->field_end, curr);
|
|
break;
|
|
}
|
|
case SYSTEM_TIME_AS_OF:
|
|
cond1= newx Item_func_le(thd, conds->field_start, conds->start.item);
|
|
cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item);
|
|
break;
|
|
case SYSTEM_TIME_FROM_TO:
|
|
cond1= newx Item_func_lt(thd, conds->field_start, conds->end.item);
|
|
cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item);
|
|
cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BETWEEN:
|
|
cond1= newx Item_func_le(thd, conds->field_start, conds->end.item);
|
|
cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item);
|
|
cond3= newx Item_func_le(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BEFORE:
|
|
cond1= newx Item_func_history(thd, conds->field_end);
|
|
cond2= newx Item_func_lt(thd, conds->field_end, conds->start.item);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(table->table->s && table->table->s->db_plugin);
|
|
|
|
Item *trx_id0= conds->start.item;
|
|
Item *trx_id1= conds->end.item;
|
|
if (conds->start.item && conds->start.unit == VERS_TIMESTAMP)
|
|
{
|
|
bool backwards= conds->type != SYSTEM_TIME_AS_OF;
|
|
trx_id0= newx Item_func_trt_id(thd, conds->start.item,
|
|
TR_table::FLD_TRX_ID, backwards);
|
|
}
|
|
if (conds->end.item && conds->end.unit == VERS_TIMESTAMP)
|
|
{
|
|
trx_id1= newx Item_func_trt_id(thd, conds->end.item,
|
|
TR_table::FLD_TRX_ID, false);
|
|
}
|
|
|
|
switch (conds->type)
|
|
{
|
|
case SYSTEM_TIME_UNSPECIFIED:
|
|
case SYSTEM_TIME_HISTORY:
|
|
curr= newx Item_int(thd, ULONGLONG_MAX);
|
|
if (conds->type == SYSTEM_TIME_UNSPECIFIED)
|
|
cond1= newx Item_func_eq(thd, conds->field_end, curr);
|
|
else
|
|
cond1= newx Item_func_lt(thd, conds->field_end, curr);
|
|
break;
|
|
DBUG_ASSERT(!conds->start.item);
|
|
DBUG_ASSERT(!conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_AS_OF:
|
|
cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id0, conds->field_start);
|
|
cond2= newx Item_func_trt_trx_sees(thd, conds->field_end, trx_id0);
|
|
DBUG_ASSERT(!conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_FROM_TO:
|
|
cond1= newx Item_func_trt_trx_sees(thd, trx_id1, conds->field_start);
|
|
cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0);
|
|
cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BETWEEN:
|
|
cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id1, conds->field_start);
|
|
cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0);
|
|
cond3= newx Item_func_le(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BEFORE:
|
|
cond1= newx Item_func_history(thd, conds->field_end);
|
|
cond2= newx Item_func_trt_trx_sees(thd, trx_id0, conds->field_end);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
if (cond1)
|
|
{
|
|
cond1= and_items(thd, cond2, cond1);
|
|
cond1= and_items(thd, cond3, cond1);
|
|
}
|
|
return cond1;
|
|
}
|
|
|
|
static
|
|
bool skip_setup_conds(THD *thd)
|
|
{
|
|
return (!thd->stmt_arena->is_conventional()
|
|
&& !thd->stmt_arena->is_stmt_prepare_or_first_sp_execute())
|
|
|| thd->lex->is_view_context_analysis();
|
|
}
|
|
|
|
int SELECT_LEX::period_setup_conds(THD *thd, TABLE_LIST *tables)
|
|
{
|
|
DBUG_ENTER("SELECT_LEX::period_setup_conds");
|
|
const bool update_conds= !skip_setup_conds(thd);
|
|
|
|
Query_arena backup;
|
|
Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
DBUG_ASSERT(!tables->next_local && tables->table);
|
|
|
|
Item *result= NULL;
|
|
for (TABLE_LIST *table= tables; table; table= table->next_local)
|
|
{
|
|
if (!table->table)
|
|
continue;
|
|
vers_select_conds_t &conds= table->period_conditions;
|
|
if (!table->table->s->period.name.streq(conds.name))
|
|
{
|
|
my_error(ER_PERIOD_NOT_FOUND, MYF(0), conds.name.str);
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (update_conds)
|
|
{
|
|
conds.period= &table->table->s->period;
|
|
result= and_items(thd, result,
|
|
period_get_condition(thd, table, this, &conds, true));
|
|
}
|
|
}
|
|
if (update_conds)
|
|
where= and_items(thd, where, result);
|
|
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables)
|
|
{
|
|
DBUG_ENTER("SELECT_LEX::vers_setup_conds");
|
|
const bool update_conds= !skip_setup_conds(thd);
|
|
|
|
if (!versioned_tables)
|
|
{
|
|
for (TABLE_LIST *table= tables; table; table= table->next_local)
|
|
{
|
|
if (table->table && table->table->versioned())
|
|
versioned_tables++;
|
|
else if (table->vers_conditions.is_set() &&
|
|
(table->is_non_derived() || !table->vers_conditions.used))
|
|
{
|
|
my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->alias.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (versioned_tables == 0)
|
|
DBUG_RETURN(0);
|
|
|
|
/* For prepared statements we create items on statement arena,
|
|
because they must outlive execution phase for multiple executions. */
|
|
Query_arena_stmt on_stmt_arena(thd);
|
|
|
|
// find outer system_time
|
|
SELECT_LEX *outer_slex= outer_select();
|
|
TABLE_LIST* outer_table= NULL;
|
|
|
|
if (outer_slex)
|
|
{
|
|
TABLE_LIST* derived= master_unit()->derived;
|
|
// inner SELECT may not be a derived table (derived == NULL)
|
|
while (derived && outer_slex && !derived->vers_conditions.is_set())
|
|
{
|
|
derived= outer_slex->master_unit()->derived;
|
|
outer_slex= outer_slex->outer_select();
|
|
}
|
|
if (derived && outer_slex)
|
|
{
|
|
DBUG_ASSERT(derived->vers_conditions.is_set());
|
|
outer_table= derived;
|
|
}
|
|
}
|
|
|
|
bool is_select= false;
|
|
bool use_sysvar= false;
|
|
switch (thd->lex->sql_command)
|
|
{
|
|
case SQLCOM_SELECT:
|
|
use_sysvar= true;
|
|
/* fall through */
|
|
case SQLCOM_CREATE_TABLE:
|
|
case SQLCOM_INSERT_SELECT:
|
|
case SQLCOM_REPLACE_SELECT:
|
|
case SQLCOM_DELETE_MULTI:
|
|
case SQLCOM_UPDATE_MULTI:
|
|
is_select= true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
for (TABLE_LIST *table= tables; table; table= table->next_local)
|
|
{
|
|
if (!table->table || table->is_view() || !table->table->versioned())
|
|
continue;
|
|
|
|
vers_select_conds_t &vers_conditions= table->vers_conditions;
|
|
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
/*
|
|
if the history is stored in partitions, then partitions
|
|
themselves are not versioned
|
|
*/
|
|
if (table->partition_names && table->table->part_info->vers_info)
|
|
{
|
|
/* If the history is stored in partitions, then partitions
|
|
themselves are not versioned. */
|
|
if (vers_conditions.was_set())
|
|
{
|
|
my_error(ER_VERS_QUERY_IN_PARTITION, MYF(0), table->alias.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
else if (!vers_conditions.is_set())
|
|
vers_conditions.set_all();
|
|
}
|
|
#endif
|
|
|
|
if (outer_table && !vers_conditions.is_set())
|
|
{
|
|
// propagate system_time from nearest outer SELECT_LEX
|
|
vers_conditions= outer_table->vers_conditions;
|
|
outer_table->vers_conditions.used= true;
|
|
}
|
|
|
|
// propagate system_time from sysvar
|
|
if (!vers_conditions.is_set() && use_sysvar)
|
|
{
|
|
if (vers_conditions.init_from_sysvar(thd))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (vers_conditions.is_set())
|
|
{
|
|
if (vers_conditions.was_set() &&
|
|
table->lock_type >= TL_FIRST_WRITE &&
|
|
!vers_conditions.delete_history)
|
|
{
|
|
my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table->alias.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (vers_conditions.type == SYSTEM_TIME_ALL)
|
|
continue;
|
|
}
|
|
|
|
bool timestamps_only= table->table->versioned(VERS_TIMESTAMP);
|
|
|
|
if (vers_conditions.is_set() && vers_conditions.type != SYSTEM_TIME_HISTORY)
|
|
{
|
|
thd->where= THD_WHERE::FOR_SYSTEM_TIME;
|
|
/* TODO: do resolve fix_length_and_dec(), fix_fields(). This requires
|
|
storing vers_conditions as Item and make some magic related to
|
|
vers_system_time_t/VERS_TRX_ID at stage of fix_fields()
|
|
(this is large refactoring). */
|
|
if (vers_conditions.check_units(thd))
|
|
DBUG_RETURN(-1);
|
|
if (timestamps_only && (vers_conditions.start.unit == VERS_TRX_ID ||
|
|
vers_conditions.end.unit == VERS_TRX_ID))
|
|
{
|
|
my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
if (update_conds)
|
|
{
|
|
vers_conditions.period = &table->table->s->vers;
|
|
Item *cond= period_get_condition(thd, table, this, &vers_conditions,
|
|
timestamps_only);
|
|
if (is_select)
|
|
table->on_expr= and_items(thd, table->on_expr, cond);
|
|
else
|
|
{
|
|
if (join)
|
|
{
|
|
where= and_items(thd, join->conds, cond);
|
|
join->conds= where;
|
|
}
|
|
else
|
|
where= and_items(thd, where, cond);
|
|
table->where= and_items(thd, table->where, cond);
|
|
}
|
|
|
|
table->vers_conditions.set_all();
|
|
}
|
|
} // for (table= tables; ...)
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Check fields, find best join, do the select and output fields.
|
|
mysql_select assumes that all tables are already opened
|
|
*****************************************************************************/
|
|
|
|
/*
|
|
Check if we have a field reference. If yes, we have to use
|
|
mixed_implicit_grouping.
|
|
*/
|
|
|
|
static bool check_list_for_field(List<Item> *items)
|
|
{
|
|
List_iterator_fast <Item> select_it(*items);
|
|
Item *select_el;
|
|
|
|
while ((select_el= select_it++))
|
|
{
|
|
if (select_el->with_field())
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool check_list_for_field(ORDER *order)
|
|
{
|
|
for (; order; order= order->next)
|
|
{
|
|
if (order->item[0]->with_field())
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
Prepare of whole select (including sub queries in future).
|
|
|
|
@todo
|
|
Add check of calculation of GROUP functions and fields:
|
|
SELECT COUNT(*)+table.col1 from table1;
|
|
|
|
@retval
|
|
-1 on error
|
|
@retval
|
|
0 on success
|
|
*/
|
|
int
|
|
JOIN::prepare(TABLE_LIST *tables_init, COND *conds_init, uint og_num,
|
|
ORDER *order_init, bool skip_order_by,
|
|
ORDER *group_init, Item *having_init,
|
|
ORDER *proc_param_init, SELECT_LEX *select_lex_arg,
|
|
SELECT_LEX_UNIT *unit_arg)
|
|
{
|
|
DBUG_ENTER("JOIN::prepare");
|
|
|
|
// to prevent double initialization on EXPLAIN
|
|
if (optimization_state != JOIN::NOT_OPTIMIZED)
|
|
DBUG_RETURN(0);
|
|
|
|
conds= conds_init;
|
|
order= order_init;
|
|
group_list= group_init;
|
|
having= having_init;
|
|
proc_param= proc_param_init;
|
|
tables_list= tables_init;
|
|
select_lex= select_lex_arg;
|
|
DBUG_PRINT("info", ("select %p (%u) = JOIN %p",
|
|
select_lex, select_lex->select_number, this));
|
|
select_lex->join= this;
|
|
join_list= &select_lex->top_join_list;
|
|
union_part= unit_arg->is_unit_op();
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_prepare(thd, "join_preparation");
|
|
trace_prepare.add_select_number(select_lex->select_number);
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
|
|
// simple check that we got usable conds
|
|
dbug_print_item(conds);
|
|
|
|
/* Fix items that requires the join structure to exist */
|
|
fix_items_after_optimize(thd, select_lex);
|
|
|
|
/*
|
|
It is hack which force creating EXPLAIN object always on runt-time arena
|
|
(because very top JOIN::prepare executes always with runtime arena, but
|
|
constant subquery like (SELECT 'x') can be called with statement arena
|
|
during prepare phase of top SELECT).
|
|
*/
|
|
if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE))
|
|
create_explain_query_if_not_exists(thd->lex, thd->mem_root);
|
|
|
|
if (select_lex->handle_derived(thd->lex, DT_PREPARE))
|
|
DBUG_RETURN(-1);
|
|
|
|
thd->lex->current_select->context_analysis_place= NO_MATTER;
|
|
thd->lex->current_select->is_item_list_lookup= 1;
|
|
/*
|
|
If we have already executed SELECT, then it have not sense to prevent
|
|
its table from update (see unique_table())
|
|
Affects only materialized derived tables.
|
|
*/
|
|
/* Check that all tables, fields, conds and order are ok */
|
|
if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
|
|
setup_tables_and_check_access(thd, &select_lex->context, join_list,
|
|
tables_list, select_lex->leaf_tables,
|
|
FALSE, SELECT_ACL, SELECT_ACL, FALSE))
|
|
DBUG_RETURN(-1);
|
|
|
|
/* System Versioning: handle FOR SYSTEM_TIME clause. */
|
|
if (select_lex->vers_setup_conds(thd, tables_list) < 0)
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
mixed_implicit_grouping will be set to TRUE if the SELECT list
|
|
mixes elements with and without grouping, and there is no GROUP BY
|
|
clause.
|
|
Mixing non-aggregated fields with aggregate functions in the
|
|
SELECT list or HAVING is a MySQL extension that is allowed only if
|
|
the ONLY_FULL_GROUP_BY sql mode is not set.
|
|
*/
|
|
mixed_implicit_grouping= false;
|
|
if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
|
|
select_lex->with_sum_func && !group_list)
|
|
{
|
|
if (check_list_for_field(&fields_list) ||
|
|
check_list_for_field(order))
|
|
{
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
|
|
mixed_implicit_grouping= true; // mark for future
|
|
|
|
while (TABLE_LIST *tbl= li++)
|
|
{
|
|
/*
|
|
If the query uses implicit grouping where the select list
|
|
contains both aggregate functions and non-aggregate fields,
|
|
any non-aggregated field may produce a NULL value. Set all
|
|
fields of each table as nullable before semantic analysis to
|
|
take into account this change of nullability.
|
|
|
|
Note: this loop doesn't touch tables inside merged
|
|
semi-joins, because subquery-to-semijoin conversion has not
|
|
been done yet. This is intended.
|
|
*/
|
|
if (tbl->table)
|
|
tbl->table->maybe_null= 1;
|
|
}
|
|
}
|
|
}
|
|
table_count= select_lex->leaf_tables.elements;
|
|
|
|
uint real_og_num= og_num;
|
|
if (skip_order_by &&
|
|
select_lex != select_lex->master_unit()->global_parameters())
|
|
real_og_num+= select_lex->order_list.elements;
|
|
|
|
DBUG_ASSERT(select_lex->hidden_bit_fields == 0);
|
|
if (setup_wild(thd, tables_list, fields_list, &all_fields, select_lex, false))
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
If the select_lex is immediately contained within a derived table
|
|
AND this derived table is a CTE
|
|
WITH supplied column names
|
|
AND we have the correct number of elements in both lists
|
|
(mismatches found in mysql_derived_prepare/rename_columns_of_derived_unit)
|
|
THEN NOW is the time to take a copy of these item_names for
|
|
later restoration if required.
|
|
*/
|
|
TABLE_LIST *derived= select_lex->master_unit()->derived;
|
|
|
|
if (derived &&
|
|
derived->with &&
|
|
derived->with->column_list.elements &&
|
|
(derived->with->column_list.elements == select_lex->item_list.elements))
|
|
{
|
|
if (select_lex->save_item_list_names(thd))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (thd->lex->current_select->first_cond_optimization)
|
|
{
|
|
if ( conds && ! thd->lex->current_select->merged_into)
|
|
select_lex->select_n_reserved= conds->exists2in_reserved_items();
|
|
else
|
|
select_lex->select_n_reserved= 0;
|
|
}
|
|
|
|
if (select_lex->setup_ref_array(thd, real_og_num))
|
|
DBUG_RETURN(-1);
|
|
|
|
ref_ptrs= ref_ptr_array_slice(0);
|
|
|
|
enum_parsing_place save_place=
|
|
thd->lex->current_select->context_analysis_place;
|
|
thd->lex->current_select->context_analysis_place= SELECT_LIST;
|
|
|
|
{
|
|
List_iterator_fast<TABLE_LIST> it(select_lex->leaf_tables);
|
|
while (TABLE_LIST *tbl= it++)
|
|
{
|
|
if (tbl->table_function &&
|
|
tbl->table_function->setup(thd, tbl, select_lex_arg))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
if (setup_fields(thd, ref_ptrs, fields_list, select_lex->item_list_usage,
|
|
&all_fields, &select_lex->pre_fix, 1))
|
|
DBUG_RETURN(-1);
|
|
thd->lex->current_select->context_analysis_place= save_place;
|
|
|
|
if (setup_without_group(thd, ref_ptrs, tables_list,
|
|
select_lex->leaf_tables, fields_list,
|
|
all_fields, &conds, order, group_list,
|
|
select_lex->window_specs,
|
|
select_lex->window_funcs,
|
|
&hidden_group_fields))
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
Permanently remove redundant parts from the query if
|
|
1) This is a subquery
|
|
2) This is the first time this query is optimized (since the
|
|
transformation is permanent
|
|
3) Not normalizing a view. Removal should take place when a
|
|
query involving a view is optimized, not when the view
|
|
is created
|
|
*/
|
|
if (select_lex->master_unit()->item && // 1)
|
|
select_lex->first_cond_optimization && // 2)
|
|
!thd->lex->is_view_context_analysis()) // 3)
|
|
{
|
|
remove_redundant_subquery_clauses(select_lex);
|
|
}
|
|
|
|
/* Resolve the ORDER BY that was skipped, then remove it. */
|
|
if (skip_order_by && select_lex !=
|
|
select_lex->master_unit()->global_parameters())
|
|
{
|
|
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
|
|
thd->lex->allow_sum_func.set_bit(select_lex->nest_level);
|
|
thd->where= THD_WHERE::ORDER_CLAUSE;
|
|
for (ORDER *order= select_lex->order_list.first; order; order= order->next)
|
|
{
|
|
/* Don't add the order items to all fields. Just resolve them to ensure
|
|
the query is valid, we'll drop them immediately after. */
|
|
if (find_order_in_list(thd, ref_ptrs, tables_list, order,
|
|
fields_list, all_fields, false, false, false))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
thd->lex->allow_sum_func= save_allow_sum_func;
|
|
select_lex->order_list.empty();
|
|
}
|
|
|
|
if (having)
|
|
{
|
|
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
|
|
thd->where= THD_WHERE::HAVING_CLAUSE;
|
|
thd->lex->allow_sum_func.set_bit(select_lex_arg->nest_level);
|
|
select_lex->having_fix_field= 1;
|
|
/*
|
|
Wrap alone field in HAVING clause in case it will be outer field
|
|
of subquery which need persistent pointer on it, but having
|
|
could be changed by optimizer
|
|
*/
|
|
if (having->type() == Item::REF_ITEM &&
|
|
((Item_ref *)having)->ref_type() == Item_ref::REF)
|
|
wrap_ident(thd, &having);
|
|
bool having_fix_rc= having->fix_fields_if_needed_for_bool(thd, &having);
|
|
select_lex->having_fix_field= 0;
|
|
|
|
if (unlikely(having_fix_rc || thd->is_error()))
|
|
DBUG_RETURN(-1); /* purecov: inspected */
|
|
thd->lex->allow_sum_func= save_allow_sum_func;
|
|
|
|
if (having->with_window_func())
|
|
{
|
|
my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
After setting up window functions, we may have discovered additional
|
|
used tables from the PARTITION BY and ORDER BY list. Update all items
|
|
that contain window functions.
|
|
*/
|
|
if (select_lex->have_window_funcs())
|
|
{
|
|
List_iterator_fast<Item> it(select_lex->item_list);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
if (item->with_window_func())
|
|
item->update_used_tables();
|
|
}
|
|
}
|
|
|
|
With_clause *with_clause=select_lex->get_with_clause();
|
|
if (with_clause && with_clause->prepare_unreferenced_elements(thd))
|
|
DBUG_RETURN(1);
|
|
|
|
With_element *with_elem= select_lex->get_with_element();
|
|
if (with_elem &&
|
|
select_lex->check_unrestricted_recursive(
|
|
thd->variables.only_standard_compliant_cte))
|
|
DBUG_RETURN(-1);
|
|
if (!(select_lex->changed_elements & TOUCHED_SEL_COND))
|
|
select_lex->check_subqueries_with_recursive_references();
|
|
|
|
int res= check_and_do_in_subquery_rewrites(this);
|
|
|
|
select_lex->fix_prepare_information(thd, &conds, &having);
|
|
|
|
if (res)
|
|
DBUG_RETURN(res);
|
|
|
|
if (order)
|
|
{
|
|
bool requires_sorting= FALSE;
|
|
/*
|
|
WITH TIES forces the results to be sorted, even if it's not sanely
|
|
sortable.
|
|
*/
|
|
if (select_lex->limit_params.with_ties)
|
|
requires_sorting= true;
|
|
|
|
/*
|
|
Go through each ORDER BY item and perform the following:
|
|
1. Detect if none of the items contain meaningful data, which means we
|
|
can drop the sorting altogether.
|
|
2. Split any columns with aggregation functions or window functions into
|
|
their base components and store them as separate fields.
|
|
(see split_sum_func) for more details.
|
|
*/
|
|
for (ORDER *ord= order; ord; ord= ord->next)
|
|
{
|
|
Item *item= *ord->item;
|
|
/*
|
|
Disregard sort order if there's only
|
|
zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or
|
|
zero length NOT NULL string functions there.
|
|
Such tuples don't contain any data to sort.
|
|
*/
|
|
if (!requires_sorting &&
|
|
/* Not a zero length NOT NULL field */
|
|
((item->type() != Item::FIELD_ITEM ||
|
|
((Item_field *) item)->field->maybe_null() ||
|
|
((Item_field *) item)->field->sort_length()) &&
|
|
/* AND not a zero length NOT NULL string function. */
|
|
(item->type() != Item::FUNC_ITEM ||
|
|
item->maybe_null() ||
|
|
item->result_type() != STRING_RESULT ||
|
|
item->max_length)))
|
|
requires_sorting= TRUE;
|
|
|
|
if ((item->with_sum_func() && item->type() != Item::SUM_FUNC_ITEM) ||
|
|
item->with_window_func())
|
|
item->split_sum_func(thd, ref_ptrs, all_fields, SPLIT_SUM_SELECT);
|
|
}
|
|
/* Drop the ORDER BY clause if none of the columns contain any data that
|
|
can produce a meaningful sorted set. */
|
|
if (!requires_sorting)
|
|
order= NULL;
|
|
}
|
|
else
|
|
{
|
|
/* The current select does not have an ORDER BY */
|
|
if (select_lex->limit_params.with_ties)
|
|
{
|
|
my_error(ER_WITH_TIES_NEEDS_ORDER, MYF(0));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
if (having && (having->with_sum_func() || having->with_rownum_func()))
|
|
having->split_sum_func2(thd, ref_ptrs, all_fields,
|
|
&having, SPLIT_SUM_SKIP_REGISTERED);
|
|
if (select_lex->inner_sum_func_list)
|
|
{
|
|
Item_sum *end=select_lex->inner_sum_func_list;
|
|
Item_sum *item_sum= end;
|
|
do
|
|
{
|
|
item_sum= item_sum->next;
|
|
item_sum->split_sum_func2(thd, ref_ptrs,
|
|
all_fields, item_sum->ref_by, 0);
|
|
} while (item_sum != end);
|
|
}
|
|
|
|
if (select_lex->inner_refs_list.elements &&
|
|
fix_inner_refs(thd, all_fields, select_lex, ref_ptrs))
|
|
DBUG_RETURN(-1);
|
|
|
|
if (group_list)
|
|
{
|
|
/*
|
|
Because HEAP tables can't index BIT fields we need to use an
|
|
additional hidden field for grouping because later it will be
|
|
converted to a LONG field. Original field will remain of the
|
|
BIT type and will be returned to a client.
|
|
*/
|
|
for (ORDER *ord= group_list; ord; ord= ord->next)
|
|
{
|
|
if ((*ord->item)->type() == Item::FIELD_ITEM &&
|
|
(*ord->item)->field_type() == MYSQL_TYPE_BIT)
|
|
{
|
|
Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item);
|
|
if (!field)
|
|
DBUG_RETURN(-1);
|
|
int el= all_fields.elements;
|
|
ref_ptrs[el]= field;
|
|
all_fields.push_front(field, thd->mem_root);
|
|
ord->item= &ref_ptrs[el];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Check if there are references to un-aggregated columns when computing
|
|
aggregate functions with implicit grouping (there is no GROUP BY).
|
|
*/
|
|
if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
|
|
!(select_lex->master_unit()->item &&
|
|
select_lex->master_unit()->item->is_in_predicate() &&
|
|
select_lex->master_unit()->item->get_IN_subquery()->
|
|
test_set_strategy(SUBS_MAXMIN_INJECTED)) &&
|
|
select_lex->non_agg_field_used() &&
|
|
select_lex->agg_func_used())
|
|
{
|
|
my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS,
|
|
ER_THD(thd, ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
{
|
|
/* Caclulate the number of groups */
|
|
send_group_parts= 0;
|
|
for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next)
|
|
send_group_parts++;
|
|
}
|
|
|
|
procedure= setup_procedure(thd, proc_param, result, fields_list, &error);
|
|
if (unlikely(error))
|
|
goto err; /* purecov: inspected */
|
|
if (procedure)
|
|
{
|
|
if (setup_new_fields(thd, fields_list, all_fields,
|
|
procedure->param_fields))
|
|
goto err; /* purecov: inspected */
|
|
if (procedure->group)
|
|
{
|
|
if (!test_if_subpart(procedure->group,group_list))
|
|
{ /* purecov: inspected */
|
|
my_message(ER_DIFF_GROUPS_PROC, ER_THD(thd, ER_DIFF_GROUPS_PROC),
|
|
MYF(0)); /* purecov: inspected */
|
|
goto err; /* purecov: inspected */
|
|
}
|
|
}
|
|
if (order && (procedure->flags & PROC_NO_SORT))
|
|
{ /* purecov: inspected */
|
|
my_message(ER_ORDER_WITH_PROC, ER_THD(thd, ER_ORDER_WITH_PROC),
|
|
MYF(0)); /* purecov: inspected */
|
|
goto err; /* purecov: inspected */
|
|
}
|
|
if (thd->lex->derived_tables)
|
|
{
|
|
/*
|
|
Queries with derived tables and PROCEDURE are not allowed.
|
|
Many of such queries are disallowed grammatically, but there
|
|
are still some complex cases:
|
|
SELECT 1 FROM (SELECT 1) a PROCEDURE ANALYSE()
|
|
*/
|
|
my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE",
|
|
thd->lex->derived_tables & DERIVED_VIEW ?
|
|
"view" : "subquery");
|
|
goto err;
|
|
}
|
|
if (thd->lex->sql_command != SQLCOM_SELECT)
|
|
{
|
|
// EXPLAIN SELECT * FROM t1 PROCEDURE ANALYSE()
|
|
my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_wrapper(thd);
|
|
opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper);
|
|
}
|
|
|
|
if (!procedure && result && result->prepare(fields_list, unit_arg))
|
|
goto err; /* purecov: inspected */
|
|
|
|
select_lex->where_cond_after_prepare= conds;
|
|
|
|
unit= unit_arg;
|
|
if (prepare_stage2())
|
|
goto err;
|
|
prepared= true;
|
|
|
|
DBUG_RETURN(0); // All OK
|
|
|
|
err:
|
|
delete procedure; /* purecov: inspected */
|
|
procedure= 0;
|
|
DBUG_RETURN(-1); /* purecov: inspected */
|
|
}
|
|
|
|
|
|
/**
|
|
Second phase of prepare where we collect some statistic.
|
|
|
|
@details
|
|
We made this part separate to be able recalculate some statistic after
|
|
transforming subquery on optimization phase.
|
|
*/
|
|
|
|
bool JOIN::prepare_stage2()
|
|
{
|
|
bool res= TRUE;
|
|
DBUG_ENTER("JOIN::prepare_stage2");
|
|
|
|
/* Init join struct */
|
|
count_field_types(select_lex, &tmp_table_param, all_fields, 0);
|
|
this->group= group_list != 0;
|
|
|
|
if (tmp_table_param.sum_func_count && !group_list)
|
|
{
|
|
implicit_grouping= TRUE;
|
|
// Result will contain zero or one row - ordering is meaningless
|
|
order= NULL;
|
|
}
|
|
|
|
#ifdef RESTRICTED_GROUP
|
|
if (implicit_grouping)
|
|
{
|
|
my_message(ER_WRONG_SUM_SELECT,ER_THD(thd, ER_WRONG_SUM_SELECT),MYF(0));
|
|
goto err;
|
|
}
|
|
#endif
|
|
if (select_lex->olap == ROLLUP_TYPE && rollup_init())
|
|
goto err;
|
|
if (alloc_func_list() ||
|
|
make_sum_func_list(all_fields, fields_list, false))
|
|
goto err;
|
|
|
|
res= FALSE;
|
|
err:
|
|
DBUG_RETURN(res); /* purecov: inspected */
|
|
}
|
|
|
|
|
|
bool JOIN::build_explain()
|
|
{
|
|
DBUG_ENTER("JOIN::build_explain");
|
|
have_query_plan= QEP_AVAILABLE;
|
|
|
|
/*
|
|
explain data must be created on the Explain_query::mem_root. Because it's
|
|
just a memroot, not an arena, explain data must not contain any Items
|
|
*/
|
|
MEM_ROOT *old_mem_root= thd->mem_root;
|
|
Item *old_free_list __attribute__((unused))= thd->free_list;
|
|
thd->mem_root= thd->lex->explain->mem_root;
|
|
bool res= save_explain_data(thd->lex->explain, false /* can overwrite */,
|
|
need_tmp,
|
|
!skip_sort_order && !no_order && (order || group_list),
|
|
select_distinct);
|
|
thd->mem_root= old_mem_root;
|
|
DBUG_ASSERT(thd->free_list == old_free_list); // no Items were created
|
|
if (res)
|
|
DBUG_RETURN(1);
|
|
uint select_nr= select_lex->select_number;
|
|
JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
|
|
for (uint i= 0; i < aggr_tables; i++, curr_tab++)
|
|
{
|
|
if (select_nr == FAKE_SELECT_LEX_ID)
|
|
{
|
|
/* this is a fake_select_lex of a union */
|
|
select_nr= select_lex->master_unit()->first_select()->select_number;
|
|
curr_tab->tracker= thd->lex->explain->get_union(select_nr)->
|
|
get_tmptable_read_tracker();
|
|
}
|
|
else if (select_nr < INT_MAX)
|
|
{
|
|
Explain_select *tmp= thd->lex->explain->get_select(select_nr);
|
|
if (tmp)
|
|
curr_tab->tracker= tmp->get_using_temporary_read_tracker();
|
|
}
|
|
}
|
|
if (is_in_subquery())
|
|
{
|
|
Item_in_subselect *subq= unit->item->get_IN_subquery();
|
|
subq->init_subq_materialization_tracker(thd);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
int JOIN::optimize()
|
|
{
|
|
int res= 0;
|
|
join_optimization_state init_state= optimization_state;
|
|
if (select_lex->pushdown_select)
|
|
{
|
|
if (optimization_state == JOIN::OPTIMIZATION_DONE)
|
|
return 0;
|
|
DBUG_ASSERT(optimization_state == JOIN::NOT_OPTIMIZED);
|
|
|
|
// Do same as JOIN::optimize_inner does:
|
|
fields= &select_lex->item_list;
|
|
|
|
if (!(select_options & SELECT_DESCRIBE))
|
|
{
|
|
/* Prepare to execute the query pushed into a foreign engine */
|
|
res= select_lex->pushdown_select->prepare();
|
|
}
|
|
with_two_phase_optimization= false;
|
|
}
|
|
else if (optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE)
|
|
res= optimize_stage2();
|
|
else
|
|
{
|
|
// to prevent double initialization on EXPLAIN
|
|
if (optimization_state != JOIN::NOT_OPTIMIZED)
|
|
return FALSE;
|
|
optimization_state= JOIN::OPTIMIZATION_IN_PROGRESS;
|
|
res= optimize_inner();
|
|
}
|
|
if (!with_two_phase_optimization ||
|
|
init_state == JOIN::OPTIMIZATION_PHASE_1_DONE)
|
|
{
|
|
if (!res && have_query_plan != QEP_DELETED)
|
|
res= build_explain();
|
|
optimization_state= JOIN::OPTIMIZATION_DONE;
|
|
}
|
|
|
|
/*
|
|
Store the cost of this query into a user variable
|
|
TODO: calculate a correct cost for a query with subqueries and UNIONs.
|
|
*/
|
|
if (select_lex->select_number == 1)
|
|
thd->status_var.last_query_cost= best_read;
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Create range filters objects needed in execution for all join tables
|
|
|
|
@details
|
|
For each join table from the chosen execution plan such that a range filter
|
|
is used when joining this table the function creates a Rowid_filter object
|
|
for this range filter. In order to do this the function first constructs
|
|
a quick select to scan the range for this range filter. Then it creates
|
|
a container for the range filter and finally constructs a Range_rowid_filter
|
|
object a pointer to which is set in the field JOIN_TAB::rowid_filter of
|
|
the joined table.
|
|
|
|
@retval false Ok
|
|
@retval true Error, query should abort
|
|
*/
|
|
|
|
bool JOIN::make_range_rowid_filters()
|
|
{
|
|
DBUG_ENTER("make_range_rowid_filters");
|
|
|
|
/*
|
|
Do not build range filters with detected impossible WHERE.
|
|
Anyway conditions cannot be used anymore to extract ranges for filters.
|
|
*/
|
|
if (const_table_map != found_const_table_map)
|
|
DBUG_RETURN(0);
|
|
|
|
JOIN_TAB *tab;
|
|
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (!tab->range_rowid_filter_info)
|
|
continue;
|
|
|
|
DBUG_ASSERT(!(tab->ref.key >= 0 &&
|
|
tab->ref.key == (int) tab->range_rowid_filter_info->get_key_no()));
|
|
DBUG_ASSERT(!(tab->ref.key == -1 && tab->quick &&
|
|
tab->quick->index == tab->range_rowid_filter_info->get_key_no()));
|
|
|
|
int err;
|
|
SQL_SELECT *sel= NULL;
|
|
Rowid_filter_container *filter_container= NULL;
|
|
Item **sargable_cond= get_sargable_cond(this, tab->table);
|
|
sel= make_select(tab->table, const_table_map, const_table_map,
|
|
*sargable_cond, (SORT_INFO*) 0, 1, &err);
|
|
if (!sel)
|
|
continue;
|
|
|
|
key_map filter_map;
|
|
filter_map.clear_all();
|
|
filter_map.set_bit(tab->range_rowid_filter_info->get_key_no());
|
|
filter_map.merge(tab->table->with_impossible_ranges);
|
|
quick_select_return rc;
|
|
/*
|
|
EQ_FUNC and EQUAL_FUNC already sent unusable key notes (if any)
|
|
during update_ref_and_keys(). Have only other functions raise notes
|
|
from can_optimize_scalar_range().
|
|
*/
|
|
rc= sel->test_quick_select(thd, filter_map, (table_map) 0,
|
|
(ha_rows) HA_POS_ERROR, true, false, true,
|
|
true, Item_func::BITMAP_EXCEPT_ANY_EQUALITY);
|
|
if (rc == SQL_SELECT::ERROR || thd->is_error() || thd->check_killed())
|
|
{
|
|
delete sel;
|
|
DBUG_RETURN(true); /* Fatal error */
|
|
}
|
|
/*
|
|
If SUBS_IN_TO_EXISTS strtrategy is chosen for the subquery then
|
|
additional conditions are injected into WHERE/ON/HAVING and it may
|
|
happen that the call of test_quick_select() discovers impossible range.
|
|
*/
|
|
if (rc == SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
{
|
|
const_table_map|= tab->table->map;
|
|
goto no_filter;
|
|
}
|
|
DBUG_ASSERT(sel->quick);
|
|
filter_container=
|
|
tab->range_rowid_filter_info->create_container();
|
|
if (filter_container)
|
|
{
|
|
tab->rowid_filter=
|
|
new (thd->mem_root) Range_rowid_filter(tab->table,
|
|
tab->range_rowid_filter_info,
|
|
filter_container, sel);
|
|
if (tab->rowid_filter)
|
|
{
|
|
tab->need_to_build_rowid_filter= true;
|
|
continue;
|
|
}
|
|
}
|
|
no_filter:
|
|
delete sel;
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Allocate memory the rowid containers of the used the range filters
|
|
|
|
@details
|
|
For each join table from the chosen execution plan such that a range filter
|
|
is used when joining this table the function allocate memory for the
|
|
rowid container employed by the filter. On success it lets the table engine
|
|
know that what rowid filter will be used when accessing the table rows.
|
|
|
|
@retval
|
|
false OK
|
|
true Error, query should abort
|
|
*/
|
|
|
|
bool
|
|
JOIN::init_range_rowid_filters()
|
|
{
|
|
JOIN_TAB *tab;
|
|
DBUG_ENTER("init_range_rowid_filters");
|
|
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->need_to_build_rowid_filter= false; // Safety
|
|
if (!tab->rowid_filter)
|
|
continue;
|
|
if (tab->rowid_filter->get_container()->alloc())
|
|
{
|
|
tab->clear_range_rowid_filter();
|
|
continue;
|
|
}
|
|
tab->table->file->rowid_filter_push(tab->rowid_filter);
|
|
tab->need_to_build_rowid_filter= true;
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
global select optimisation.
|
|
|
|
@note
|
|
error code saved in field 'error'
|
|
|
|
@retval
|
|
0 success
|
|
@retval
|
|
1 error
|
|
*/
|
|
|
|
int
|
|
JOIN::optimize_inner()
|
|
{
|
|
DBUG_ENTER("JOIN::optimize_inner");
|
|
subq_exit_fl= false;
|
|
best_read= 0.0;
|
|
|
|
DEBUG_SYNC(thd, "before_join_optimize");
|
|
THD_STAGE_INFO(thd, stage_optimizing);
|
|
#ifndef DBUG_OFF
|
|
dbug_join_tab_array_size= 0;
|
|
#endif
|
|
|
|
// rownum used somewhere in query, no limits and it is derived
|
|
if (unlikely(thd->lex->with_rownum &&
|
|
select_lex->first_cond_optimization &&
|
|
select_lex->master_unit()->derived))
|
|
optimize_upper_rownum_func();
|
|
|
|
do_send_rows = (unit->lim.get_select_limit()) ? 1 : 0;
|
|
|
|
set_allowed_join_cache_types();
|
|
need_distinct= TRUE;
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_prepare(thd, "join_optimization");
|
|
trace_prepare.add_select_number(select_lex->select_number);
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
|
|
/*
|
|
Needed in case optimizer short-cuts,
|
|
set properly in make_aggr_tables_info()
|
|
*/
|
|
fields= &select_lex->item_list;
|
|
|
|
if (select_lex->first_cond_optimization)
|
|
{
|
|
//Do it only for the first execution
|
|
/* Merge all mergeable derived tables/views in this SELECT. */
|
|
if (select_lex->handle_derived(thd->lex, DT_MERGE))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (select_lex->first_cond_optimization &&
|
|
transform_in_predicates_into_in_subq(thd))
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
Update used tables after all handling derived table procedures
|
|
After this call, select_lex->select_list_tables contains the table
|
|
bits of all items in the select list (but not bits from WHERE clause or
|
|
other items).
|
|
*/
|
|
select_lex->update_used_tables();
|
|
|
|
/*
|
|
In fact we transform underlying subqueries after their 'prepare' phase and
|
|
before 'optimize' from upper query 'optimize' to allow semijoin
|
|
conversion happened (which done in the same way.
|
|
*/
|
|
if (select_lex->first_cond_optimization &&
|
|
conds && conds->walk(&Item::exists2in_processor, 0, thd))
|
|
DBUG_RETURN(1);
|
|
/*
|
|
TODO
|
|
make view to decide if it is possible to write to WHERE directly or make Semi-Joins able to process ON condition if it is possible
|
|
for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local)
|
|
{
|
|
if (tbl->on_expr &&
|
|
tbl->on_expr->walk(&Item::exists2in_processor, 0, thd))
|
|
DBUG_RETURN(1);
|
|
}
|
|
*/
|
|
|
|
if (transform_max_min_subquery())
|
|
DBUG_RETURN(1); /* purecov: inspected */
|
|
|
|
if (select_lex->first_cond_optimization)
|
|
{
|
|
/* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
|
|
if (convert_join_subqueries_to_semijoins(this))
|
|
DBUG_RETURN(1); /* purecov: inspected */
|
|
/* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
|
|
select_lex->update_used_tables();
|
|
}
|
|
|
|
eval_select_list_used_tables();
|
|
|
|
if (select_lex->options & OPTION_SCHEMA_TABLE &&
|
|
optimize_schema_tables_memory_usage(select_lex->leaf_tables))
|
|
DBUG_RETURN(1);
|
|
|
|
if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
|
|
DBUG_RETURN(-1);
|
|
|
|
row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
|
|
unit->lim.get_select_limit());
|
|
/* select_limit is used to decide if we are likely to scan the whole table */
|
|
select_limit= unit->lim.get_select_limit();
|
|
if (having || (select_options & OPTION_FOUND_ROWS))
|
|
select_limit= HA_POS_ERROR;
|
|
#ifdef HAVE_REF_TO_FIELDS // Not done yet
|
|
/* Add HAVING to WHERE if possible */
|
|
if (having && !group_list && !sum_func_count)
|
|
{
|
|
if (!conds)
|
|
{
|
|
conds= having;
|
|
having= 0;
|
|
}
|
|
else if ((conds=new (thd->mem_root) Item_cond_and(conds,having)))
|
|
{
|
|
/*
|
|
Item_cond_and can't be fixed after creation, so we do not check
|
|
conds->fixed()
|
|
*/
|
|
conds->fix_fields(thd, &conds);
|
|
conds->change_ref_to_fields(thd, tables_list);
|
|
conds->top_level_item();
|
|
having= 0;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
SELECT_LEX *sel= select_lex;
|
|
if (sel->first_cond_optimization)
|
|
{
|
|
/*
|
|
The following code will allocate the new items in a permanent
|
|
MEMROOT for prepared statements and stored procedures.
|
|
|
|
But first we need to ensure that thd->lex->explain is allocated
|
|
in the execution arena
|
|
*/
|
|
create_explain_query_if_not_exists(thd->lex, thd->mem_root);
|
|
|
|
Query_arena *arena, backup;
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
sel->first_cond_optimization= 0;
|
|
|
|
/* Convert all outer joins to inner joins if possible */
|
|
conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
|
|
|
|
add_table_function_dependencies(join_list, table_map(-1));
|
|
|
|
if (thd->is_error() ||
|
|
(!select_lex->leaf_tables_saved && select_lex->save_leaf_tables(thd)))
|
|
{
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
DBUG_RETURN(1);
|
|
}
|
|
select_lex->leaf_tables_saved= true;
|
|
build_bitmap_for_nested_joins(join_list, 0);
|
|
|
|
sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
|
|
|
|
sel->where= conds;
|
|
|
|
select_lex->update_used_tables();
|
|
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
}
|
|
|
|
if (!allowed_top_level_tables)
|
|
calc_allowed_top_level_tables(select_lex);
|
|
|
|
if (optimize_constant_subqueries())
|
|
DBUG_RETURN(1);
|
|
|
|
if (conds && conds->with_subquery())
|
|
(void) conds->walk(&Item::cleanup_is_expensive_cache_processor,
|
|
0, (void *) 0);
|
|
if (having && having->with_subquery())
|
|
(void) having->walk(&Item::cleanup_is_expensive_cache_processor,
|
|
0, (void *) 0);
|
|
|
|
List<Item> eq_list;
|
|
|
|
if (setup_degenerate_jtbm_semi_joins(this, join_list, eq_list))
|
|
DBUG_RETURN(1);
|
|
|
|
if (eq_list.elements != 0)
|
|
{
|
|
Item *new_cond;
|
|
|
|
if (eq_list.elements == 1)
|
|
new_cond= eq_list.pop();
|
|
else
|
|
new_cond= new (thd->mem_root) Item_cond_and(thd, eq_list);
|
|
|
|
if (new_cond &&
|
|
((new_cond->fix_fields(thd, &new_cond) ||
|
|
!(conds= and_items(thd, conds, new_cond)) ||
|
|
conds->fix_fields(thd, &conds))))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
eq_list.empty();
|
|
|
|
if (select_lex->cond_pushed_into_where)
|
|
{
|
|
conds= and_conds(thd, conds, select_lex->cond_pushed_into_where);
|
|
if (conds && conds->fix_fields(thd, &conds))
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (select_lex->cond_pushed_into_having)
|
|
{
|
|
having= and_conds(thd, having, select_lex->cond_pushed_into_having);
|
|
if (having)
|
|
{
|
|
select_lex->having_fix_field= 1;
|
|
select_lex->having_fix_field_for_pushed_cond= 1;
|
|
if (having->fix_fields(thd, &having))
|
|
DBUG_RETURN(1);
|
|
select_lex->having_fix_field= 0;
|
|
select_lex->having_fix_field_for_pushed_cond= 0;
|
|
}
|
|
}
|
|
|
|
bool ignore_on_expr= false;
|
|
/*
|
|
PS/SP note: on_expr of versioned table can not be reallocated
|
|
(see build_equal_items() below) because it can be not rebuilt
|
|
at second invocation.
|
|
*/
|
|
if (!thd->stmt_arena->is_conventional() &&
|
|
thd->mem_root != thd->stmt_arena->mem_root)
|
|
for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local)
|
|
if (tbl->table && tbl->on_expr && tbl->table->versioned())
|
|
{
|
|
ignore_on_expr= true;
|
|
break;
|
|
}
|
|
|
|
transform_in_predicates_into_equalities(thd);
|
|
if (thd->lex->are_date_funcs_used())
|
|
transform_date_conds_into_sargable();
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_SARGABLE_CASEFOLD))
|
|
{
|
|
transform_all_conds_and_on_exprs(
|
|
thd, &Item::varchar_upper_cmp_transformer);
|
|
}
|
|
|
|
conds= optimize_cond(this, conds, join_list, ignore_on_expr,
|
|
&cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS);
|
|
|
|
if (thd->is_error())
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from optimize_cond"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (select_lex->with_rownum && ! order && ! group_list &&
|
|
!select_distinct && conds && select_lex == unit->global_parameters() &&
|
|
select_lex->first_rownum_optimization)
|
|
{
|
|
optimize_rownum(thd, unit, conds);
|
|
select_lex->first_rownum_optimization= false;
|
|
}
|
|
|
|
having= optimize_cond(this, having, join_list, TRUE,
|
|
&having_value, &having_equal);
|
|
|
|
if (thd->is_error())
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from optimize_cond"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/* Do not push into WHERE from HAVING if cond_value == Item::COND_FALSE */
|
|
|
|
if (thd->lex->sql_command == SQLCOM_SELECT &&
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING) &&
|
|
cond_value != Item::COND_FALSE)
|
|
{
|
|
having=
|
|
select_lex->pushdown_from_having_into_where(thd, having);
|
|
if (select_lex->attach_to_conds.elements != 0)
|
|
{
|
|
conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal,
|
|
select_lex->attach_to_conds,
|
|
&cond_value);
|
|
sel->attach_to_conds.empty();
|
|
Json_writer_object wrapper(thd);
|
|
Json_writer_object pushd(thd, "condition_pushdown_from_having");
|
|
pushd.add("conds", conds);
|
|
pushd.add("having", having);
|
|
}
|
|
}
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY))
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
while ((tbl= li++))
|
|
if (tbl->jtbm_subselect)
|
|
{
|
|
if (tbl->jtbm_subselect->pushdown_cond_for_in_subquery(thd, conds))
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
|
|
if (setup_jtbm_semi_joins(this, join_list, eq_list))
|
|
DBUG_RETURN(1);
|
|
|
|
if (eq_list.elements != 0)
|
|
{
|
|
conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal,
|
|
eq_list, &cond_value);
|
|
|
|
if (!conds &&
|
|
cond_value != Item::COND_FALSE && cond_value != Item::COND_TRUE)
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED))
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
while ((tbl= li++))
|
|
{
|
|
/*
|
|
Do not push conditions from where into materialized inner tables
|
|
of outer joins: this is not valid.
|
|
*/
|
|
if (tbl->is_materialized_derived())
|
|
{
|
|
JOIN *join= tbl->get_unit()->first_select()->join;
|
|
if (join &&
|
|
join->optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE &&
|
|
join->with_two_phase_optimization)
|
|
continue;
|
|
/*
|
|
Do not push conditions from where into materialized inner tables
|
|
of outer joins: this is not valid.
|
|
*/
|
|
if (!tbl->is_inner_table_of_outer_join())
|
|
{
|
|
if (pushdown_cond_for_derived(thd, conds, tbl))
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (mysql_handle_single_derived(thd->lex, tbl, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Run optimize phase for all derived tables/views used in this SELECT. */
|
|
if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
}
|
|
{
|
|
if (select_lex->where)
|
|
{
|
|
select_lex->cond_value= cond_value;
|
|
if (sel->where != conds && cond_value == Item::COND_OK)
|
|
thd->change_item_tree(&sel->where, conds);
|
|
}
|
|
if (select_lex->having)
|
|
{
|
|
select_lex->having_value= having_value;
|
|
if (sel->having != having && having_value == Item::COND_OK)
|
|
thd->change_item_tree(&sel->having, having);
|
|
}
|
|
if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE ||
|
|
(!unit->lim.get_select_limit() &&
|
|
!(select_options & OPTION_FOUND_ROWS)))
|
|
{ /* Impossible cond */
|
|
if (unit->lim.get_select_limit())
|
|
{
|
|
DBUG_PRINT("info", (having_value == Item::COND_FALSE ?
|
|
"Impossible HAVING" : "Impossible WHERE"));
|
|
zero_result_cause= having_value == Item::COND_FALSE ?
|
|
"Impossible HAVING" : "Impossible WHERE";
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("Zero limit"));
|
|
zero_result_cause= "Zero limit";
|
|
}
|
|
table_count= top_join_tab_count= 0;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
error= 0;
|
|
subq_exit_fl= true;
|
|
goto setup_subq_exit;
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
while ((tbl= li++))
|
|
{
|
|
Item **prune_cond= get_sargable_cond(this, tbl->table);
|
|
tbl->table->all_partitions_pruned_away=
|
|
prune_partitions(thd, tbl->table, *prune_cond);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if
|
|
there is implicit grouping (aggregate functions but no
|
|
group_list). In this case, the result set shall only contain one
|
|
row.
|
|
*/
|
|
if (tables_list && implicit_grouping)
|
|
{
|
|
int res;
|
|
/*
|
|
opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
|
|
to the WHERE conditions,
|
|
or 1 if all items were resolved (optimized away),
|
|
or 0, or an error number HA_ERR_...
|
|
|
|
If all items were resolved by opt_sum_query, there is no need to
|
|
open any tables.
|
|
*/
|
|
|
|
/*
|
|
The following resetting and restoring of sum_funcs is needed to
|
|
go around a bug in spider where it assumes that
|
|
make_sum_func_list() has not been called yet and do logical
|
|
choices based on this if special handling of min/max functions should
|
|
be done. We disable this special handling while we are trying to find
|
|
out if we can replace MIN/MAX values with constants.
|
|
*/
|
|
Item_sum **save_func_sums= sum_funcs, *tmp_sum_funcs= 0;
|
|
sum_funcs= &tmp_sum_funcs;
|
|
res= opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds);
|
|
sum_funcs= save_func_sums;
|
|
|
|
if (res)
|
|
{
|
|
DBUG_ASSERT(res >= 0);
|
|
if (res == HA_ERR_KEY_NOT_FOUND)
|
|
{
|
|
DBUG_PRINT("info",("No matching min/max row"));
|
|
zero_result_cause= "No matching min/max row";
|
|
table_count= top_join_tab_count= 0;
|
|
error=0;
|
|
subq_exit_fl= true;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
if (res > 1)
|
|
{
|
|
error= res;
|
|
DBUG_PRINT("error",("Error from opt_sum_query"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
DBUG_PRINT("info",("Select tables optimized away"));
|
|
if (!select_lex->have_window_funcs())
|
|
zero_result_cause= "Select tables optimized away";
|
|
tables_list= 0; // All tables resolved
|
|
select_lex->min_max_opt_list.empty();
|
|
const_tables= top_join_tab_count= table_count;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
/*
|
|
Extract all table-independent conditions and replace the WHERE
|
|
clause with them. All other conditions were computed by opt_sum_query
|
|
and the MIN/MAX/COUNT function(s) have been replaced by constants,
|
|
so there is no need to compute the whole WHERE clause again.
|
|
Notice that make_cond_for_table() will always succeed to remove all
|
|
computed conditions, because opt_sum_query() is applicable only to
|
|
conjunctions.
|
|
Preserve conditions for EXPLAIN.
|
|
*/
|
|
if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
|
|
{
|
|
COND *table_independent_conds=
|
|
make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1,
|
|
FALSE, FALSE);
|
|
if (!table_independent_conds && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
DBUG_EXECUTE("where",
|
|
print_where(table_independent_conds,
|
|
"where after opt_sum_query()",
|
|
QT_ORDINARY););
|
|
conds= table_independent_conds;
|
|
}
|
|
}
|
|
}
|
|
if (!tables_list)
|
|
{
|
|
DBUG_PRINT("info",("No tables"));
|
|
error= 0;
|
|
subq_exit_fl= true;
|
|
goto setup_subq_exit;
|
|
}
|
|
error= -1; // Error is sent to client
|
|
/* get_sort_by_table() call used to be here: */
|
|
MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table));
|
|
|
|
/*
|
|
We have to remove constants and duplicates from group_list before
|
|
calling make_join_statistics() as this may call get_best_group_min_max()
|
|
which needs a simplified group_list.
|
|
*/
|
|
if (group_list && table_count == 1)
|
|
{
|
|
group_list= remove_const(this, group_list, conds,
|
|
rollup.state == ROLLUP::STATE_NONE,
|
|
&simple_group);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (!group_list)
|
|
{
|
|
/* The output has only one row */
|
|
order=0;
|
|
simple_order=1;
|
|
group_optimized_away= 1;
|
|
select_distinct=0;
|
|
}
|
|
}
|
|
|
|
/* Calculate how to do the join */
|
|
THD_STAGE_INFO(thd, stage_statistics);
|
|
result->prepare_to_read_rows();
|
|
if (unlikely(make_join_statistics(this, select_lex->leaf_tables,
|
|
&keyuse)) ||
|
|
unlikely(thd->is_error()))
|
|
{
|
|
DBUG_PRINT("error",("Error: make_join_statistics() failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
If a splittable materialized derived/view dt_i is embedded into
|
|
into another splittable materialized derived/view dt_o then
|
|
splitting plans for dt_i and dt_o are evaluated independently.
|
|
First the optimizer looks for the best splitting plan sp_i for dt_i.
|
|
It happens when non-splitting plans for dt_o are evaluated.
|
|
The cost of sp_i is considered as the cost of materialization of dt_i
|
|
when evaluating any splitting plan for dt_o.
|
|
*/
|
|
if (fix_all_splittings_in_plan())
|
|
DBUG_RETURN(1);
|
|
|
|
setup_subq_exit:
|
|
with_two_phase_optimization= check_two_phase_optimization(thd);
|
|
if (with_two_phase_optimization)
|
|
optimization_state= JOIN::OPTIMIZATION_PHASE_1_DONE;
|
|
else
|
|
{
|
|
if (optimize_stage2())
|
|
DBUG_RETURN(1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
int JOIN::optimize_stage2()
|
|
{
|
|
ulonglong select_opts_for_readinfo;
|
|
uint no_jbuf_after;
|
|
JOIN_TAB *tab;
|
|
DBUG_ENTER("JOIN::optimize_stage2");
|
|
|
|
if (subq_exit_fl)
|
|
goto setup_subq_exit;
|
|
|
|
if (unlikely(thd->check_killed()))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Generate an execution plan from the found optimal join order. */
|
|
if (get_best_combination())
|
|
DBUG_RETURN(1);
|
|
|
|
if (make_range_rowid_filters())
|
|
DBUG_RETURN(1);
|
|
|
|
if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
We have to call drop_unused_derived_keys() even if we don't have any
|
|
generated keys (enabled with OPTIMIZER_SWITCH_DERIVED_WITH_KEYS)
|
|
as we may still have unique constraints we have to get rid of.
|
|
*/
|
|
drop_unused_derived_keys();
|
|
|
|
if (rollup.state != ROLLUP::STATE_NONE)
|
|
{
|
|
if (rollup_process_const_fields())
|
|
{
|
|
DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Remove distinct if only const tables */
|
|
select_distinct= select_distinct && (const_tables != table_count);
|
|
}
|
|
|
|
THD_STAGE_INFO(thd, stage_preparing);
|
|
if (result->initialize_tables(this))
|
|
{
|
|
DBUG_PRINT("error",("Error: initialize_tables() failed"));
|
|
DBUG_RETURN(1); // error == -1
|
|
}
|
|
if (const_table_map != found_const_table_map &&
|
|
!(select_options & SELECT_DESCRIBE))
|
|
{
|
|
// There is at least one empty const table
|
|
zero_result_cause= "no matching row in const table";
|
|
DBUG_PRINT("error",("Error: %s", zero_result_cause));
|
|
error= 0;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
|
|
join_record_count > (double) thd->variables.max_join_size &&
|
|
!(select_options & SELECT_DESCRIBE))
|
|
{ /* purecov: inspected */
|
|
my_message(ER_TOO_BIG_SELECT, ER_THD(thd, ER_TOO_BIG_SELECT), MYF(0));
|
|
error= -1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (const_tables && !thd->locked_tables_mode &&
|
|
!(select_options & SELECT_NO_UNLOCK))
|
|
{
|
|
/*
|
|
Unlock all tables, except sequences, as accessing these may still
|
|
require table updates. It's safe to ignore result code as all
|
|
tables where opened for read only.
|
|
*/
|
|
(void) mysql_unlock_some_tables(thd, table, const_tables,
|
|
GET_LOCK_SKIP_SEQUENCES);
|
|
}
|
|
if (!conds && outer_join)
|
|
{
|
|
/* Handle the case where we have an OUTER JOIN without a WHERE */
|
|
conds= (Item*) Item_true;
|
|
}
|
|
|
|
if (impossible_where)
|
|
{
|
|
zero_result_cause=
|
|
"Impossible WHERE noticed after reading const tables";
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
|
|
select= make_select(*table, const_table_map,
|
|
const_table_map, conds, (SORT_INFO*) 0, 1, &error);
|
|
if (unlikely(error))
|
|
{ /* purecov: inspected */
|
|
error= -1; /* purecov: inspected */
|
|
DBUG_PRINT("error",("Error: make_select() failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
reset_nj_counters(this, join_list);
|
|
if (make_outerjoin_info(this))
|
|
{
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
Among the equal fields belonging to the same multiple equality
|
|
choose the one that is to be retrieved first and substitute
|
|
all references to these in where condition for a reference for
|
|
the selected field.
|
|
*/
|
|
if (conds)
|
|
{
|
|
conds= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, conds,
|
|
cond_equal, map2table, true);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from substitute_for_best_equal"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
conds->update_used_tables();
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
trace_condition(thd, "WHERE", "substitute_best_equal", conds);
|
|
|
|
DBUG_EXECUTE("where",
|
|
print_where(conds,
|
|
"after substitute_best_equal",
|
|
QT_ORDINARY););
|
|
}
|
|
if (having)
|
|
{
|
|
having= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, having,
|
|
having_equal, map2table, false);
|
|
if (thd->is_error())
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from substitute_for_best_equal"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (having)
|
|
{
|
|
having->update_used_tables();
|
|
if (unlikely(thd->trace_started()))
|
|
trace_condition(thd, "HAVING", "substitute_best_equal", having);
|
|
}
|
|
|
|
DBUG_EXECUTE("having",
|
|
print_where(having,
|
|
"after substitute_best_equal",
|
|
QT_ORDINARY););
|
|
}
|
|
|
|
/*
|
|
Perform the optimization on fields evaluation mentioned above
|
|
for all on expressions.
|
|
*/
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (*tab->on_expr_ref)
|
|
{
|
|
*tab->on_expr_ref= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB,
|
|
*tab->on_expr_ref,
|
|
tab->cond_equal,
|
|
map2table, true);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from substitute_for_best_equal"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
(*tab->on_expr_ref)->update_used_tables();
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_condition(thd, "ON expr", "substitute_best_equal",
|
|
(*tab->on_expr_ref), tab->table->alias.c_ptr());
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Perform the optimization on fields evaliation mentioned above
|
|
for all used ref items.
|
|
*/
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
uint key_copy_index=0;
|
|
for (uint i=0; i < tab->ref.key_parts; i++)
|
|
{
|
|
Item **ref_item_ptr= tab->ref.items+i;
|
|
Item *ref_item= *ref_item_ptr;
|
|
if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE))
|
|
continue;
|
|
COND_EQUAL *equals= cond_equal;
|
|
JOIN_TAB *first_inner= tab->first_inner;
|
|
while (equals)
|
|
{
|
|
ref_item= substitute_for_best_equal_field(thd, tab, ref_item,
|
|
equals, map2table, true);
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(1);
|
|
|
|
if (first_inner)
|
|
{
|
|
equals= first_inner->cond_equal;
|
|
first_inner= first_inner->first_upper;
|
|
}
|
|
else
|
|
equals= 0;
|
|
}
|
|
ref_item->update_used_tables();
|
|
if (*ref_item_ptr != ref_item)
|
|
{
|
|
*ref_item_ptr= ref_item;
|
|
Item *item= ref_item->real_item();
|
|
store_key *key_copy= tab->ref.key_copy[key_copy_index];
|
|
if (key_copy->type() == store_key::FIELD_STORE_KEY)
|
|
{
|
|
if (item->basic_const_item())
|
|
{
|
|
/* It is constant propagated here */
|
|
tab->ref.key_copy[key_copy_index]=
|
|
new store_key_const_item(*tab->ref.key_copy[key_copy_index],
|
|
item);
|
|
}
|
|
else if (item->const_item())
|
|
{
|
|
tab->ref.key_copy[key_copy_index]=
|
|
new store_key_item(*tab->ref.key_copy[key_copy_index],
|
|
item, TRUE);
|
|
}
|
|
else
|
|
{
|
|
store_key_field *field_copy= ((store_key_field *)key_copy);
|
|
DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
|
|
field_copy->change_source_field((Item_field *) item);
|
|
}
|
|
}
|
|
}
|
|
key_copy_index++;
|
|
}
|
|
}
|
|
|
|
if (conds && const_table_map != found_const_table_map &&
|
|
(select_options & SELECT_DESCRIBE))
|
|
conds= (Item*) Item_false;
|
|
|
|
/* Cache constant expressions in WHERE, HAVING, ON clauses. */
|
|
cache_const_exprs();
|
|
|
|
if (setup_semijoin_loosescan(this))
|
|
DBUG_RETURN(1);
|
|
|
|
if (make_join_select(this, select, conds))
|
|
{
|
|
if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
zero_result_cause=
|
|
"Impossible WHERE noticed after reading const tables";
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
|
|
error= -1; /* if goto err */
|
|
|
|
/* Optimize distinct away if possible */
|
|
{
|
|
ORDER *org_order= order;
|
|
order=remove_const(this, order,conds,1, &simple_order);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
If we are using ORDER BY NULL or ORDER BY const_expression,
|
|
return result in any order (even if we are using a GROUP BY)
|
|
*/
|
|
if (!order && org_order)
|
|
skip_sort_order= 1;
|
|
}
|
|
|
|
/*
|
|
For FETCH ... WITH TIES save how many items order by had, after we've
|
|
removed constant items that have no relevance on the final sorting.
|
|
*/
|
|
if (unit->lim.is_with_ties())
|
|
{
|
|
DBUG_ASSERT(with_ties_order_count == 0);
|
|
for (ORDER *it= order; it; it= it->next)
|
|
with_ties_order_count+= 1;
|
|
}
|
|
|
|
|
|
/*
|
|
Check if we can optimize away GROUP BY/DISTINCT.
|
|
We can do that if there are no aggregate functions, the
|
|
fields in DISTINCT clause (if present) and/or columns in GROUP BY
|
|
(if present) contain direct references to all key parts of
|
|
an unique index (in whatever order) and if the key parts of the
|
|
unique index cannot contain NULLs.
|
|
Note that the unique keys for DISTINCT and GROUP BY should not
|
|
be the same (as long as they are unique).
|
|
|
|
The FROM clause must contain a single non-constant table.
|
|
*/
|
|
if (table_count - const_tables == 1 && (group || select_distinct) &&
|
|
!tmp_table_param.sum_func_count &&
|
|
(!join_tab[const_tables].select ||
|
|
!join_tab[const_tables].select->quick ||
|
|
join_tab[const_tables].select->quick->get_type() !=
|
|
QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) &&
|
|
!select_lex->have_window_funcs())
|
|
{
|
|
if (group && rollup.state == ROLLUP::STATE_NONE &&
|
|
list_contains_unique_index(join_tab[const_tables].table,
|
|
find_field_in_order_list,
|
|
(void *) group_list))
|
|
{
|
|
/*
|
|
We have found that grouping can be removed since groups correspond to
|
|
only one row anyway, but we still have to guarantee correct result
|
|
order. The line below effectively rewrites the query from GROUP BY
|
|
<fields> to ORDER BY <fields>. There are three exceptions:
|
|
- if skip_sort_order is set (see above), then we can simply skip
|
|
GROUP BY;
|
|
- if we are in a subquery, we don't have to maintain order unless there
|
|
is a limit clause in the subquery.
|
|
- we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
|
|
with the GROUP BY ones, i.e. either one is a prefix of another.
|
|
We only check if the ORDER BY is a prefix of GROUP BY. In this case
|
|
test_if_subpart() copies the ASC/DESC attributes from the original
|
|
ORDER BY fields.
|
|
If GROUP BY is a prefix of ORDER BY, then it is safe to leave
|
|
'order' as is.
|
|
*/
|
|
if (!order || test_if_subpart(group_list, order))
|
|
{
|
|
if (skip_sort_order ||
|
|
(select_lex->master_unit()->item && select_limit == HA_POS_ERROR)) // This is a subquery
|
|
order= NULL;
|
|
else
|
|
order= group_list;
|
|
}
|
|
/*
|
|
If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
|
|
rewritten to IGNORE INDEX FOR ORDER BY(fields).
|
|
*/
|
|
join_tab->table->keys_in_use_for_order_by=
|
|
join_tab->table->keys_in_use_for_group_by;
|
|
group_list= 0;
|
|
group= 0;
|
|
}
|
|
if (select_distinct &&
|
|
list_contains_unique_index(join_tab[const_tables].table,
|
|
find_field_in_item_list,
|
|
(void *) &fields_list))
|
|
{
|
|
select_distinct= 0;
|
|
}
|
|
}
|
|
if (group || tmp_table_param.sum_func_count)
|
|
{
|
|
if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE
|
|
&& !select_lex->have_window_funcs())
|
|
select_distinct=0;
|
|
}
|
|
else if (select_distinct && table_count - const_tables == 1 &&
|
|
rollup.state == ROLLUP::STATE_NONE &&
|
|
!select_lex->have_window_funcs())
|
|
{
|
|
/*
|
|
We are only using one table. In this case we change DISTINCT to a
|
|
GROUP BY query if:
|
|
- The GROUP BY can be done through indexes (no sort) and the ORDER
|
|
BY only uses selected fields.
|
|
(In this case we can later optimize away GROUP BY and ORDER BY)
|
|
- We are scanning the whole table without LIMIT
|
|
This can happen if:
|
|
- We are using CALC_FOUND_ROWS
|
|
- We are using an ORDER BY that can't be optimized away.
|
|
|
|
We don't want to use this optimization when we are using LIMIT
|
|
because in this case we can just create a temporary table that
|
|
holds LIMIT rows and stop when this table is full.
|
|
*/
|
|
bool all_order_fields_used;
|
|
|
|
tab= &join_tab[const_tables];
|
|
if (order)
|
|
{
|
|
bool fatal_err;
|
|
skip_sort_order=
|
|
test_if_skip_sort_order(tab, order, select_limit,
|
|
true, // no_changes
|
|
&tab->table->keys_in_use_for_order_by,
|
|
&fatal_err);
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
}
|
|
if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array,
|
|
order, fields_list, all_fields,
|
|
&all_order_fields_used)))
|
|
{
|
|
bool fatal_err= 0;
|
|
const bool skip_group=
|
|
skip_sort_order &&
|
|
test_if_skip_sort_order(tab, group_list, select_limit,
|
|
true, // no_changes
|
|
&tab->table->keys_in_use_for_group_by,
|
|
&fatal_err);
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
|
|
count_field_types(select_lex, &tmp_table_param, all_fields, 0);
|
|
if ((skip_group && all_order_fields_used) ||
|
|
select_limit == HA_POS_ERROR ||
|
|
(order && !skip_sort_order))
|
|
{
|
|
/* Change DISTINCT to GROUP BY */
|
|
select_distinct= 0;
|
|
no_order= !order;
|
|
if (all_order_fields_used)
|
|
{
|
|
if (order && skip_sort_order)
|
|
{
|
|
/*
|
|
Force MySQL to read the table in sorted order to get result in
|
|
ORDER BY order.
|
|
*/
|
|
tmp_table_param.quick_group=0;
|
|
}
|
|
order=0;
|
|
}
|
|
group=1; // For end_write_group
|
|
}
|
|
else
|
|
group_list= 0;
|
|
}
|
|
else if (thd->is_error()) // End of memory
|
|
DBUG_RETURN(1);
|
|
}
|
|
simple_group= rollup.state == ROLLUP::STATE_NONE;
|
|
if (group)
|
|
{
|
|
/*
|
|
Update simple_group and group_list as we now have more information, like
|
|
which tables or columns are constant.
|
|
*/
|
|
group_list= remove_const(this, group_list, conds,
|
|
rollup.state == ROLLUP::STATE_NONE,
|
|
&simple_group);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (!group_list)
|
|
{
|
|
/* The output has only one row */
|
|
order=0;
|
|
simple_order=1;
|
|
select_distinct= 0;
|
|
group_optimized_away= 1;
|
|
}
|
|
}
|
|
|
|
calc_group_buffer(this, group_list);
|
|
send_group_parts= tmp_table_param.group_parts; /* Save org parts */
|
|
if (procedure && procedure->group)
|
|
{
|
|
group_list= procedure->group= remove_const(this, procedure->group, conds,
|
|
1, &simple_group);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
calc_group_buffer(this, group_list);
|
|
}
|
|
|
|
/*
|
|
We can ignore ORDER BY if it's a prefix of the GROUP BY list
|
|
(as MariaDB is by default sorting on GROUP BY) or
|
|
if there is no GROUP BY and aggregate functions are used
|
|
(as the result will only contain one row).
|
|
*/
|
|
if (order && (test_if_subpart(group_list, order) ||
|
|
(!group_list && tmp_table_param.sum_func_count)))
|
|
order=0;
|
|
|
|
// Can't use sort on head table if using join buffering
|
|
if (full_join || hash_join)
|
|
{
|
|
TABLE *stable= (sort_by_table == (TABLE *) 1 ?
|
|
join_tab[const_tables].table : sort_by_table);
|
|
/*
|
|
FORCE INDEX FOR ORDER BY can be used to prevent join buffering when
|
|
sorting on the first table.
|
|
*/
|
|
if (!stable || (!stable->force_index_order &&
|
|
!map2table[stable->tablenr]->keep_current_rowid))
|
|
{
|
|
if (group_list)
|
|
simple_group= 0;
|
|
if (order)
|
|
simple_order= 0;
|
|
}
|
|
}
|
|
|
|
need_tmp= test_if_need_tmp_table();
|
|
|
|
/*
|
|
If window functions are present then we can't have simple_order set to
|
|
TRUE as the window function needs a temp table for computation.
|
|
ORDER BY is computed after the window function computation is done, so
|
|
the sort will be done on the temp table.
|
|
*/
|
|
if (select_lex->have_window_funcs())
|
|
simple_order= FALSE;
|
|
|
|
/*
|
|
If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
|
|
whose columns are required to be returned in a sorted order, then
|
|
the proper value for no_jbuf_after should be yielded by a call to
|
|
the make_join_orderinfo function.
|
|
Yet the current implementation of FORCE INDEX hints does not
|
|
allow us to do it in a clean manner.
|
|
*/
|
|
no_jbuf_after= 1 ? table_count : make_join_orderinfo(this);
|
|
|
|
// Don't use join buffering when we use MATCH
|
|
select_opts_for_readinfo=
|
|
(select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
|
|
(select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0);
|
|
|
|
if (select_lex->options & OPTION_SCHEMA_TABLE &&
|
|
optimize_schema_tables_reads(this))
|
|
DBUG_RETURN(1);
|
|
|
|
if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Perform FULLTEXT search before all regular searches */
|
|
if (!(select_options & SELECT_DESCRIBE))
|
|
if (init_ftfuncs(thd, select_lex, MY_TEST(order)))
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
It's necessary to check const part of HAVING cond as
|
|
there is a chance that some cond parts may become
|
|
const items after make_join_statistics(for example
|
|
when Item is a reference to cost table field from
|
|
outer join).
|
|
This check is performed only for those conditions
|
|
which do not use aggregate functions. In such case
|
|
temporary table may not be used and const condition
|
|
elements may be lost during further having
|
|
condition transformation.
|
|
*/
|
|
if (having && const_table_map && !having->with_sum_func())
|
|
{
|
|
having->update_used_tables();
|
|
if (having->const_item() && !having->is_expensive())
|
|
{
|
|
if (!having->val_int())
|
|
{
|
|
having= Item_false;
|
|
zero_result_cause= "Impossible HAVING noticed after reading const tables";
|
|
error= 0;
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
goto setup_subq_exit;
|
|
}
|
|
having= Item_true;
|
|
}
|
|
}
|
|
|
|
if (optimize_unflattened_subqueries())
|
|
DBUG_RETURN(1);
|
|
|
|
int res;
|
|
if ((res= rewrite_to_index_subquery_engine(this)) != -1)
|
|
DBUG_RETURN(res);
|
|
if (setup_subquery_caches())
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
Need to tell handlers that to play it safe, it should fetch all
|
|
columns of the primary key of the tables: this is because MySQL may
|
|
build row pointers for the rows, and for all columns of the primary key
|
|
the read set has not necessarily been set by the server code.
|
|
*/
|
|
if (need_tmp || select_distinct || group_list || order)
|
|
{
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
if (!(table[i]->map & const_table_map))
|
|
table[i]->prepare_for_position();
|
|
}
|
|
}
|
|
|
|
DBUG_EXECUTE("info",TEST_join(this););
|
|
|
|
if (!only_const_tables())
|
|
{
|
|
JOIN_TAB *tab= &join_tab[const_tables];
|
|
|
|
if (order && !need_tmp)
|
|
{
|
|
/*
|
|
Force using of tmp table if sorting by a SP or UDF function due to
|
|
their expensive and probably non-deterministic nature.
|
|
*/
|
|
for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
|
|
{
|
|
Item *item= *tmp_order->item;
|
|
if (item->is_expensive())
|
|
{
|
|
/* Force tmp table without sort */
|
|
need_tmp=1; simple_order=simple_group=0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Because filesort always does a full table scan or a quick range scan
|
|
we must add the removed reference to the select for the table.
|
|
We only need to do this when we have a simple_order or simple_group
|
|
as in other cases the join is done before the sort.
|
|
*/
|
|
if ((order || group_list) &&
|
|
tab->type != JT_ALL &&
|
|
tab->type != JT_RANGE &&
|
|
tab->type != JT_NEXT &&
|
|
tab->type != JT_FT &&
|
|
tab->type != JT_REF_OR_NULL &&
|
|
((order && simple_order) || (group_list && simple_group)))
|
|
{
|
|
if (add_ref_to_table_cond(thd,tab)) {
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
/*
|
|
Investigate whether we may use an ordered index as part of either
|
|
DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be
|
|
used for only the first of any of these terms to be executed. This
|
|
is reflected in the order which we check for test_if_skip_sort_order()
|
|
below. However we do not check for DISTINCT here, as it would have
|
|
been transformed to a GROUP BY at this stage if it is a candidate for
|
|
ordered index optimization.
|
|
If a decision was made to use an ordered index, the availability
|
|
of such an access path is stored in 'ordered_index_usage' for later
|
|
use by 'execute' or 'explain'
|
|
*/
|
|
DBUG_ASSERT(ordered_index_usage == ordered_index_void);
|
|
|
|
if (group_list) // GROUP BY honoured first
|
|
// (DISTINCT was rewritten to GROUP BY if skippable)
|
|
{
|
|
/*
|
|
When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
|
|
and thus force sorting on disk unless a group min-max optimization
|
|
is going to be used as it is applied now only for one table queries
|
|
with covering indexes.
|
|
*/
|
|
if (!(select_options & SELECT_BIG_RESULT) ||
|
|
(tab->select &&
|
|
tab->select->quick &&
|
|
tab->select->quick->get_type() ==
|
|
QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
|
|
{
|
|
if (simple_group && // GROUP BY is possibly skippable
|
|
!select_distinct) // .. if not preceded by a DISTINCT
|
|
{
|
|
/*
|
|
Calculate a possible 'limit' of table rows for 'GROUP BY':
|
|
A specified 'LIMIT' is relative to the final resultset.
|
|
'need_tmp' implies that there will be more postprocessing
|
|
so the specified 'limit' should not be enforced yet.
|
|
*/
|
|
bool fatal_err;
|
|
const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit;
|
|
if (test_if_skip_sort_order(tab, group_list, limit, false,
|
|
&tab->table->keys_in_use_for_group_by,
|
|
&fatal_err))
|
|
{
|
|
ordered_index_usage= ordered_index_group_by;
|
|
}
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
If we are going to use semi-join LooseScan, it will depend
|
|
on the selected index scan to be used. If index is not used
|
|
for the GROUP BY, we risk that sorting is put on the LooseScan
|
|
table. In order to avoid this, force use of temporary table.
|
|
TODO: Explain the quick_group part of the test below.
|
|
*/
|
|
if ((ordered_index_usage != ordered_index_group_by) &&
|
|
((tmp_table_param.quick_group && !procedure) ||
|
|
(tab->emb_sj_nest &&
|
|
best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN)))
|
|
{
|
|
need_tmp=1;
|
|
simple_order= simple_group= false; // Force tmp table without sort
|
|
}
|
|
}
|
|
}
|
|
else if (order && // ORDER BY wo/ preceding GROUP BY
|
|
(simple_order || skip_sort_order)) // which is possibly skippable
|
|
{
|
|
bool fatal_err;
|
|
if (test_if_skip_sort_order(tab, order, select_limit, false,
|
|
&tab->table->keys_in_use_for_order_by,
|
|
&fatal_err))
|
|
{
|
|
ordered_index_usage= ordered_index_order_by;
|
|
}
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
|
|
if (having)
|
|
having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT);
|
|
tmp_having= having;
|
|
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
/*
|
|
The loose index scan access method guarantees that all grouping or
|
|
duplicate row elimination (for distinct) is already performed
|
|
during data retrieval, and that all MIN/MAX functions are already
|
|
computed for each group. Thus all MIN/MAX functions should be
|
|
treated as regular functions, and there is no need to perform
|
|
grouping in the main execution loop.
|
|
Notice that currently loose index scan is applicable only for
|
|
single table queries, thus it is sufficient to test only the first
|
|
join_tab element of the plan for its access method.
|
|
*/
|
|
if (join_tab->is_using_loose_index_scan())
|
|
{
|
|
tmp_table_param.precomputed_group_by= TRUE;
|
|
if (join_tab->is_using_agg_loose_index_scan())
|
|
{
|
|
need_distinct= FALSE;
|
|
tmp_table_param.precomputed_group_by= FALSE;
|
|
}
|
|
}
|
|
|
|
if (make_aggr_tables_info())
|
|
DBUG_RETURN(1);
|
|
|
|
init_join_cache_and_keyread();
|
|
|
|
if (init_range_rowid_filters())
|
|
DBUG_RETURN(1);
|
|
|
|
error= 0;
|
|
|
|
if (select_options & SELECT_DESCRIBE)
|
|
goto derived_exit;
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
setup_subq_exit:
|
|
/* Choose an execution strategy for this JOIN. */
|
|
if (!tables_list || !table_count)
|
|
{
|
|
choose_tableless_subquery_plan();
|
|
|
|
/* The output has atmost one row */
|
|
if (group_list)
|
|
{
|
|
group_list= NULL;
|
|
group_optimized_away= 1;
|
|
rollup.state= ROLLUP::STATE_NONE;
|
|
}
|
|
order= NULL;
|
|
simple_order= TRUE;
|
|
select_distinct= FALSE;
|
|
|
|
if (select_lex->have_window_funcs())
|
|
{
|
|
if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
|
|
DBUG_RETURN(1);
|
|
#ifndef DBUG_OFF
|
|
dbug_join_tab_array_size= 1;
|
|
#endif
|
|
need_tmp= 1;
|
|
}
|
|
if (make_aggr_tables_info())
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
It could be that we've only done optimization stage 1 for
|
|
some of the derived tables, and never did stage 2.
|
|
Do it now, otherwise Explain data structure will not be complete.
|
|
*/
|
|
if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
}
|
|
/*
|
|
Even with zero matching rows, subqueries in the HAVING clause may
|
|
need to be evaluated if there are aggregate functions in the query.
|
|
*/
|
|
if (optimize_unflattened_subqueries())
|
|
DBUG_RETURN(1);
|
|
error= 0;
|
|
|
|
derived_exit:
|
|
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
Add having condition as a where clause condition of the given temp table.
|
|
|
|
@param tab Table to which having condition is added.
|
|
|
|
@returns false if success, true if error.
|
|
*/
|
|
|
|
bool JOIN::add_having_as_table_cond(JOIN_TAB *tab)
|
|
{
|
|
tmp_having->update_used_tables();
|
|
table_map used_tables= tab->table->map | OUTER_REF_TABLE_BIT;
|
|
|
|
/* If tmp table is not used then consider conditions of const table also */
|
|
if (!need_tmp)
|
|
used_tables|= const_table_map;
|
|
|
|
DBUG_ENTER("JOIN::add_having_as_table_cond");
|
|
|
|
Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables,
|
|
(table_map) 0, 0, false, false);
|
|
if (sort_table_cond)
|
|
{
|
|
if (!tab->select)
|
|
{
|
|
if (!(tab->select= new SQL_SELECT))
|
|
DBUG_RETURN(true);
|
|
tab->select->head= tab->table;
|
|
}
|
|
if (!tab->select->cond)
|
|
tab->select->cond= sort_table_cond;
|
|
else
|
|
{
|
|
if (!(tab->select->cond=
|
|
new (thd->mem_root) Item_cond_and(thd,
|
|
tab->select->cond,
|
|
sort_table_cond)))
|
|
DBUG_RETURN(true);
|
|
}
|
|
if (tab->pre_idx_push_select_cond)
|
|
{
|
|
if (sort_table_cond->type() == Item::COND_ITEM)
|
|
sort_table_cond= sort_table_cond->copy_andor_structure(thd);
|
|
if (!(tab->pre_idx_push_select_cond=
|
|
new (thd->mem_root) Item_cond_and(thd,
|
|
tab->pre_idx_push_select_cond,
|
|
sort_table_cond)))
|
|
DBUG_RETURN(true);
|
|
}
|
|
if (tab->select->cond)
|
|
tab->select->cond->fix_fields_if_needed(thd, 0);
|
|
if (tab->pre_idx_push_select_cond)
|
|
tab->pre_idx_push_select_cond->fix_fields_if_needed(thd, 0);
|
|
tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond;
|
|
tab->set_select_cond(tab->select->cond, __LINE__);
|
|
tab->select_cond->top_level_item();
|
|
DBUG_EXECUTE("where",print_where(tab->select->cond,
|
|
"select and having",
|
|
QT_ORDINARY););
|
|
|
|
having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0,
|
|
~used_tables, 0, false, false);
|
|
if (!having && thd->is_error())
|
|
DBUG_RETURN(true);
|
|
DBUG_EXECUTE("where",
|
|
print_where(having, "having after sort", QT_ORDINARY););
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(true);
|
|
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
bool JOIN::add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *table_fields)
|
|
{
|
|
/*
|
|
this will not walk into semi-join materialization nests but this is ok
|
|
because we will never need to save current rowids for those.
|
|
*/
|
|
for (JOIN_TAB *tab=join_tab; tab < cur; tab++)
|
|
{
|
|
if (!tab->keep_current_rowid)
|
|
continue;
|
|
Item *item= new (thd->mem_root) Item_temptable_rowid(tab->table);
|
|
item->fix_fields(thd, 0);
|
|
table_fields->push_back(item, thd->mem_root);
|
|
cur->tmp_table_param->func_count++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Set info for aggregation tables
|
|
|
|
@details
|
|
This function finalizes execution plan by taking following actions:
|
|
.) aggregation temporary tables are created, but not instantiated
|
|
(this is done during execution).
|
|
JOIN_TABs for aggregation tables are set appropriately
|
|
(see JOIN::create_postjoin_aggr_table).
|
|
.) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
|
|
each required stage of execution. These fields lists are set for
|
|
working tables' tabs and for the tab of last table in the join.
|
|
.) info for sorting/grouping/dups removal is prepared and saved in
|
|
appropriate tabs. Here is an example:
|
|
|
|
@returns
|
|
false - Ok
|
|
true - Error
|
|
*/
|
|
|
|
bool JOIN::make_aggr_tables_info()
|
|
{
|
|
List<Item> *curr_all_fields= &all_fields;
|
|
List<Item> *curr_fields_list= &fields_list;
|
|
// Avoid UB (applying .. offset to nullptr) when join_tab is nullptr
|
|
JOIN_TAB *curr_tab= join_tab ? join_tab + const_tables : nullptr;
|
|
TABLE *exec_tmp_table= NULL;
|
|
bool distinct= false;
|
|
const bool has_group_by= this->group;
|
|
bool keep_row_order= thd->lex->with_rownum && (group_list || order);
|
|
bool is_having_added_as_table_cond= false;
|
|
DBUG_ENTER("JOIN::make_aggr_tables_info");
|
|
|
|
|
|
sort_and_group_aggr_tab= NULL;
|
|
|
|
if (group_optimized_away)
|
|
implicit_grouping= true;
|
|
|
|
bool implicit_grouping_with_window_funcs= implicit_grouping &&
|
|
select_lex->have_window_funcs();
|
|
bool implicit_grouping_without_tables= implicit_grouping &&
|
|
!tables_list;
|
|
|
|
/*
|
|
Setup last table to provide fields and all_fields lists to the next
|
|
node in the plan.
|
|
*/
|
|
if (join_tab && top_join_tab_count && tables_list)
|
|
{
|
|
join_tab[top_join_tab_count - 1].fields= &fields_list;
|
|
join_tab[top_join_tab_count - 1].all_fields= &all_fields;
|
|
}
|
|
|
|
/*
|
|
All optimization is done. Check if we can use the storage engines
|
|
group by handler to evaluate the group by.
|
|
Some storage engines, like spider can also do joins, group by and
|
|
distinct in the engine, so we do this for all queries, not only
|
|
GROUP BY queries.
|
|
*/
|
|
if (tables_list && top_join_tab_count && !only_const_tables() && !procedure)
|
|
{
|
|
/*
|
|
At the moment we only support push down for queries where
|
|
all tables are in the same storage engine
|
|
*/
|
|
TABLE_LIST *tbl= tables_list;
|
|
handlerton *ht= tbl && tbl->table ? tbl->table->file->partition_ht() : 0;
|
|
for (tbl= tbl->next_local; ht && tbl; tbl= tbl->next_local)
|
|
{
|
|
if (!tbl->table || tbl->table->file->partition_ht() != ht)
|
|
ht= 0;
|
|
}
|
|
|
|
if (ht && ht->create_group_by)
|
|
{
|
|
/*
|
|
Check if the storage engine can intercept the query
|
|
|
|
JOIN::optimize_stage2() might convert DISTINCT into GROUP BY and then
|
|
optimize away GROUP BY (group_list). In such a case, we need to notify
|
|
a storage engine supporting a group by handler of the existence of the
|
|
original DISTINCT. Thus, we set select_distinct || group_optimized_away
|
|
to Query::distinct.
|
|
*/
|
|
Query query= {&all_fields, select_distinct || group_optimized_away,
|
|
tables_list, conds,
|
|
group_list, order ? order : group_list, having,
|
|
&select_lex->master_unit()->lim};
|
|
group_by_handler *gbh= ht->create_group_by(thd, &query);
|
|
|
|
if (gbh)
|
|
{
|
|
if (!(pushdown_query= new (thd->mem_root) Pushdown_query(select_lex,
|
|
gbh)))
|
|
DBUG_RETURN(1);
|
|
/*
|
|
We must store rows in the tmp table if we need to do an ORDER BY
|
|
or DISTINCT and the storage handler can't handle it.
|
|
*/
|
|
need_tmp= query.order_by || query.group_by || query.distinct;
|
|
distinct= query.distinct;
|
|
keep_row_order= query.order_by || query.group_by;
|
|
|
|
order= query.order_by;
|
|
|
|
aggr_tables++;
|
|
curr_tab= join_tab + exec_join_tab_cnt();
|
|
bzero((void*)curr_tab, sizeof(JOIN_TAB));
|
|
curr_tab->ref.key= -1;
|
|
curr_tab->join= this;
|
|
|
|
if (!(curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param)))
|
|
DBUG_RETURN(1);
|
|
curr_tab->tmp_table_param->func_count= all_fields.elements;
|
|
TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param,
|
|
all_fields,
|
|
NULL, distinct,
|
|
TRUE, select_options, HA_ROWS_MAX,
|
|
&empty_clex_str, !need_tmp,
|
|
keep_row_order);
|
|
if (!table)
|
|
DBUG_RETURN(1);
|
|
|
|
if (!(curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab)))
|
|
DBUG_RETURN(1);
|
|
curr_tab->aggr->set_write_func(::end_send);
|
|
curr_tab->table= table;
|
|
/*
|
|
Setup reference fields, used by summary functions and group by fields,
|
|
to point to the temporary table.
|
|
The actual switching to the temporary tables fields for HAVING
|
|
and ORDER BY is done in do_select() by calling
|
|
set_items_ref_array(items1).
|
|
*/
|
|
init_items_ref_array();
|
|
items1= ref_ptr_array_slice(2);
|
|
//items1= items0 + all_fields.elements;
|
|
if (change_to_use_tmp_fields(thd, items1,
|
|
tmp_fields_list1, tmp_all_fields1,
|
|
fields_list.elements, all_fields))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Give storage engine access to temporary table */
|
|
gbh->table= table;
|
|
pushdown_query->store_data_in_temp_table= need_tmp;
|
|
pushdown_query->having= having;
|
|
|
|
/*
|
|
Group by and having is calculated by the group_by handler.
|
|
Reset the group by and having
|
|
*/
|
|
DBUG_ASSERT(query.group_by == NULL);
|
|
group= 0; group_list= 0;
|
|
having= tmp_having= 0;
|
|
/*
|
|
Select distinct is handled by handler or by creating an unique index
|
|
over all fields in the temporary table
|
|
*/
|
|
select_distinct= 0;
|
|
order= query.order_by;
|
|
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
|
|
tmp_table_param.sum_func_count= 0;
|
|
|
|
fields= curr_fields_list;
|
|
|
|
//todo: new:
|
|
curr_tab->ref_array= &items1;
|
|
curr_tab->all_fields= &tmp_all_fields1;
|
|
curr_tab->fields= &tmp_fields_list1;
|
|
|
|
DBUG_RETURN(thd->is_error());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
The loose index scan access method guarantees that all grouping or
|
|
duplicate row elimination (for distinct) is already performed
|
|
during data retrieval, and that all MIN/MAX functions are already
|
|
computed for each group. Thus all MIN/MAX functions should be
|
|
treated as regular functions, and there is no need to perform
|
|
grouping in the main execution loop.
|
|
Notice that currently loose index scan is applicable only for
|
|
single table queries, thus it is sufficient to test only the first
|
|
join_tab element of the plan for its access method.
|
|
*/
|
|
if (join_tab && top_join_tab_count && tables_list &&
|
|
join_tab->is_using_loose_index_scan())
|
|
tmp_table_param.precomputed_group_by=
|
|
!join_tab->is_using_agg_loose_index_scan();
|
|
|
|
group_list_for_estimates= group_list;
|
|
/* Create a tmp table if distinct or if the sort is too complicated */
|
|
if (need_tmp)
|
|
{
|
|
aggr_tables++;
|
|
curr_tab= join_tab + exec_join_tab_cnt();
|
|
DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size);
|
|
bzero((void*)curr_tab, sizeof(JOIN_TAB));
|
|
curr_tab->ref.key= -1;
|
|
if (only_const_tables())
|
|
first_select= sub_select_postjoin_aggr;
|
|
|
|
/*
|
|
Create temporary table on first execution of this join.
|
|
(Will be reused if this is a subquery that is executed several times.)
|
|
*/
|
|
init_items_ref_array();
|
|
|
|
ORDER *tmp_group= (ORDER *) 0;
|
|
if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP))
|
|
tmp_group= group_list;
|
|
|
|
tmp_table_param.hidden_field_count=
|
|
all_fields.elements - fields_list.elements;
|
|
|
|
distinct= select_distinct && !group_list &&
|
|
!select_lex->have_window_funcs();
|
|
keep_row_order= thd->lex->with_rownum && (group_list || order);
|
|
bool save_sum_fields= (group_list && simple_group) ||
|
|
implicit_grouping_with_window_funcs;
|
|
if (create_postjoin_aggr_table(curr_tab,
|
|
&all_fields, tmp_group,
|
|
save_sum_fields,
|
|
distinct, keep_row_order))
|
|
DBUG_RETURN(true);
|
|
exec_tmp_table= curr_tab->table;
|
|
|
|
if (exec_tmp_table->distinct)
|
|
optimize_distinct();
|
|
|
|
/* Change sum_fields reference to calculated fields in tmp_table */
|
|
items1= ref_ptr_array_slice(2);
|
|
if ((sort_and_group || curr_tab->table->group ||
|
|
tmp_table_param.precomputed_group_by) &&
|
|
!implicit_grouping_without_tables)
|
|
{
|
|
if (change_to_use_tmp_fields(thd, items1,
|
|
tmp_fields_list1, tmp_all_fields1,
|
|
fields_list.elements, all_fields))
|
|
DBUG_RETURN(true);
|
|
}
|
|
else
|
|
{
|
|
if (change_refs_to_tmp_fields(thd, items1,
|
|
tmp_fields_list1, tmp_all_fields1,
|
|
fields_list.elements, all_fields))
|
|
DBUG_RETURN(true);
|
|
}
|
|
curr_all_fields= &tmp_all_fields1;
|
|
curr_fields_list= &tmp_fields_list1;
|
|
// Need to set them now for correct group_fields setup, reset at the end.
|
|
set_items_ref_array(items1);
|
|
curr_tab->ref_array= &items1;
|
|
curr_tab->all_fields= &tmp_all_fields1;
|
|
curr_tab->fields= &tmp_fields_list1;
|
|
set_postjoin_aggr_write_func(curr_tab);
|
|
|
|
/*
|
|
If having is not handled here, it will be checked before the row is sent
|
|
to the client.
|
|
*/
|
|
if (tmp_having &&
|
|
(sort_and_group || (exec_tmp_table->distinct && !group_list) ||
|
|
select_lex->have_window_funcs()))
|
|
{
|
|
/*
|
|
If there is no select distinct and there are no window functions
|
|
then move the having to table conds of tmp table.
|
|
NOTE : We cannot apply having after distinct or window functions
|
|
If columns of having are not part of select distinct,
|
|
then distinct may remove rows which can satisfy having.
|
|
In the case of window functions we *must* make sure to not
|
|
store any rows which don't match HAVING within the temp table,
|
|
as rows will end up being used during their computation.
|
|
*/
|
|
if (!select_distinct && !select_lex->have_window_funcs() &&
|
|
add_having_as_table_cond(curr_tab))
|
|
DBUG_RETURN(true);
|
|
is_having_added_as_table_cond= tmp_having != having;
|
|
|
|
/*
|
|
Having condition which we are not able to add as tmp table conds are
|
|
kept as before. And, this will be applied before storing the rows in
|
|
tmp table.
|
|
*/
|
|
curr_tab->having= having;
|
|
having= NULL; // Already done
|
|
}
|
|
|
|
tmp_table_param.func_count= 0;
|
|
tmp_table_param.field_count+= tmp_table_param.func_count;
|
|
if (sort_and_group || curr_tab->table->group)
|
|
{
|
|
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
|
|
tmp_table_param.sum_func_count= 0;
|
|
}
|
|
|
|
if (exec_tmp_table->group)
|
|
{ // Already grouped
|
|
if (!order && !no_order && !skip_sort_order)
|
|
order= group_list; /* order by group */
|
|
group_list= NULL;
|
|
}
|
|
|
|
/*
|
|
If we have different sort & group then we must sort the data by group
|
|
and copy it to another tmp table.
|
|
|
|
This code is also used if we are using distinct something
|
|
we haven't been able to store in the temporary table yet
|
|
like SEC_TO_TIME(SUM(...)).
|
|
|
|
3. Also, this is used when
|
|
- the query has Window functions,
|
|
- the GROUP BY operation is done with OrderedGroupBy algorithm.
|
|
In this case, the first temptable will contain pre-GROUP-BY data. Force
|
|
the creation of the second temporary table. Post-GROUP-BY dataset will be
|
|
written there, and then Window Function processing code will be able to
|
|
process it.
|
|
*/
|
|
if ((group_list &&
|
|
(!test_if_subpart(group_list, order) || select_distinct)) ||
|
|
(select_distinct && tmp_table_param.using_outer_summary_function) ||
|
|
(group_list && !tmp_table_param.quick_group && // (3)
|
|
select_lex->have_window_funcs())) // (3)
|
|
{ /* Must copy to another table */
|
|
DBUG_PRINT("info",("Creating group table"));
|
|
|
|
calc_group_buffer(this, group_list);
|
|
count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
|
|
select_distinct && !group_list);
|
|
tmp_table_param.hidden_field_count=
|
|
tmp_all_fields1.elements - tmp_fields_list1.elements;
|
|
|
|
curr_tab++;
|
|
aggr_tables++;
|
|
DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size);
|
|
bzero((void*)curr_tab, sizeof(JOIN_TAB));
|
|
curr_tab->ref.key= -1;
|
|
|
|
/* group data to new table */
|
|
/*
|
|
If the access method is loose index scan then all MIN/MAX
|
|
functions are precomputed, and should be treated as regular
|
|
functions. See extended comment above.
|
|
*/
|
|
if (join_tab->is_using_loose_index_scan())
|
|
tmp_table_param.precomputed_group_by= TRUE;
|
|
|
|
tmp_table_param.hidden_field_count=
|
|
curr_all_fields->elements - curr_fields_list->elements;
|
|
ORDER *dummy= NULL; //TODO can use table->group here also
|
|
|
|
if (create_postjoin_aggr_table(curr_tab, curr_all_fields, dummy, true,
|
|
distinct, keep_row_order))
|
|
DBUG_RETURN(true);
|
|
|
|
if (group_list)
|
|
{
|
|
if (!only_const_tables()) // No need to sort a single row
|
|
{
|
|
if (add_sorting_to_table(curr_tab - 1, group_list))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
if (make_group_fields(this, this))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
// Setup sum funcs only when necessary, otherwise we might break info
|
|
// for the first table
|
|
if (group_list || tmp_table_param.sum_func_count)
|
|
{
|
|
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
|
DBUG_RETURN(true);
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!join_tab->is_using_agg_loose_index_scan()))
|
|
DBUG_RETURN(true);
|
|
group_list= NULL;
|
|
if (setup_sum_funcs(thd, sum_funcs))
|
|
DBUG_RETURN(true);
|
|
}
|
|
// No sum funcs anymore
|
|
DBUG_ASSERT(items2.is_null());
|
|
|
|
items2= ref_ptr_array_slice(3);
|
|
if (change_to_use_tmp_fields(thd, items2,
|
|
tmp_fields_list2, tmp_all_fields2,
|
|
fields_list.elements, tmp_all_fields1))
|
|
DBUG_RETURN(true);
|
|
|
|
curr_fields_list= &tmp_fields_list2;
|
|
curr_all_fields= &tmp_all_fields2;
|
|
set_items_ref_array(items2);
|
|
curr_tab->ref_array= &items2;
|
|
curr_tab->all_fields= &tmp_all_fields2;
|
|
curr_tab->fields= &tmp_fields_list2;
|
|
set_postjoin_aggr_write_func(curr_tab);
|
|
|
|
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
|
|
tmp_table_param.sum_func_count= 0;
|
|
}
|
|
if (curr_tab->table->distinct)
|
|
select_distinct= false; /* Each row is unique */
|
|
|
|
if (select_distinct && !group_list)
|
|
{
|
|
if (having)
|
|
{
|
|
curr_tab->having= having;
|
|
having->update_used_tables();
|
|
}
|
|
/*
|
|
We only need DISTINCT operation if the join is not degenerate.
|
|
If it is, we must not request DISTINCT processing, because
|
|
remove_duplicates() assumes there is a preceding computation step (and
|
|
in the degenerate join, there's none)
|
|
*/
|
|
if (top_join_tab_count && tables_list)
|
|
curr_tab->distinct= true;
|
|
|
|
having= NULL;
|
|
select_distinct= false;
|
|
}
|
|
/* Clean tmp_table_param for the next tmp table. */
|
|
tmp_table_param.field_count= tmp_table_param.sum_func_count=
|
|
tmp_table_param.func_count= 0;
|
|
|
|
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
|
|
first_record= sort_and_group=0;
|
|
|
|
if (!group_optimized_away || implicit_grouping_with_window_funcs)
|
|
{
|
|
group= false;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
If grouping has been optimized away, a temporary table is
|
|
normally not needed unless we're explicitly requested to create
|
|
one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
|
|
|
|
In this case (grouping was optimized away), temp_table was
|
|
created without a grouping expression and JOIN::exec() will not
|
|
perform the necessary grouping (by the use of end_send_group()
|
|
or end_write_group()) if JOIN::group is set to false.
|
|
*/
|
|
// the temporary table was explicitly requested
|
|
DBUG_ASSERT(select_options & OPTION_BUFFER_RESULT);
|
|
// the temporary table does not have a grouping expression
|
|
DBUG_ASSERT(!curr_tab->table->group);
|
|
}
|
|
calc_group_buffer(this, group_list);
|
|
count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
|
|
}
|
|
|
|
if (group ||
|
|
(implicit_grouping && !implicit_grouping_with_window_funcs) ||
|
|
tmp_table_param.sum_func_count)
|
|
{
|
|
if (make_group_fields(this, this))
|
|
DBUG_RETURN(true);
|
|
|
|
DBUG_ASSERT(items3.is_null());
|
|
|
|
if (items0.is_null())
|
|
init_items_ref_array();
|
|
items3= ref_ptr_array_slice(4);
|
|
setup_copy_fields(thd, &tmp_table_param,
|
|
items3, tmp_fields_list3, tmp_all_fields3,
|
|
curr_fields_list->elements, *curr_all_fields);
|
|
|
|
curr_fields_list= &tmp_fields_list3;
|
|
curr_all_fields= &tmp_all_fields3;
|
|
set_items_ref_array(items3);
|
|
if (join_tab)
|
|
{
|
|
JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1;
|
|
// Set grouped fields on the last table
|
|
last_tab->ref_array= &items3;
|
|
last_tab->all_fields= &tmp_all_fields3;
|
|
last_tab->fields= &tmp_fields_list3;
|
|
}
|
|
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
|
DBUG_RETURN(true);
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!join_tab ||
|
|
!join_tab-> is_using_agg_loose_index_scan()))
|
|
DBUG_RETURN(true);
|
|
if (unlikely(setup_sum_funcs(thd, sum_funcs) || thd->is_error()))
|
|
DBUG_RETURN(true);
|
|
}
|
|
if (group_list || order)
|
|
{
|
|
DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
|
|
THD_STAGE_INFO(thd, stage_sorting_result);
|
|
/* If we have already done the group, add HAVING to sorted table */
|
|
if (tmp_having && !is_having_added_as_table_cond &&
|
|
!group_list && !sort_and_group)
|
|
{
|
|
if (add_having_as_table_cond(curr_tab))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
if (group)
|
|
select_limit= HA_POS_ERROR;
|
|
else if (!need_tmp)
|
|
{
|
|
/*
|
|
We can abort sorting after thd->select_limit rows if there are no
|
|
filter conditions for any tables after the sorted one.
|
|
Filter conditions come in several forms:
|
|
1. as a condition item attached to the join_tab, or
|
|
2. as a keyuse attached to the join_tab (ref access).
|
|
*/
|
|
for (uint i= const_tables + 1; i < top_join_tab_count; i++)
|
|
{
|
|
JOIN_TAB *const tab= join_tab + i;
|
|
if (tab->select_cond || // 1
|
|
(tab->keyuse && !tab->first_inner)) // 2
|
|
{
|
|
/* We have to sort all rows */
|
|
select_limit= HA_POS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
Here we add sorting stage for ORDER BY/GROUP BY clause, if the
|
|
optimiser chose FILESORT to be faster than INDEX SCAN or there is
|
|
no suitable index present.
|
|
OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
|
|
*/
|
|
DBUG_PRINT("info",("Sorting for order by/group by"));
|
|
ORDER *order_arg= group_list ? group_list : order;
|
|
if (top_join_tab_count + aggr_tables > const_tables &&
|
|
ordered_index_usage !=
|
|
(group_list ? ordered_index_group_by : ordered_index_order_by) &&
|
|
curr_tab->type != JT_CONST &&
|
|
curr_tab->type != JT_EQ_REF) // Don't sort 1 row
|
|
{
|
|
// Sort either first non-const table or the last tmp table
|
|
JOIN_TAB *sort_tab= curr_tab;
|
|
|
|
if (add_sorting_to_table(sort_tab, order_arg))
|
|
DBUG_RETURN(true);
|
|
/*
|
|
filesort_limit: Return only this many rows from filesort().
|
|
We can use select_limit_cnt only if we have no group_by and 1 table.
|
|
This allows us to use Bounded_queue for queries like:
|
|
"select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
|
|
m_select_limit == HA_POS_ERROR (we need a full table scan)
|
|
unit->select_limit_cnt == 1 (we only need one row in the result set)
|
|
*/
|
|
sort_tab->filesort->limit=
|
|
(has_group_by || (join_tab + top_join_tab_count > curr_tab + 1)) ?
|
|
select_limit : unit->lim.get_select_limit();
|
|
|
|
if (unit->lim.is_with_ties())
|
|
sort_tab->filesort->limit= HA_POS_ERROR;
|
|
}
|
|
if (!only_const_tables() &&
|
|
!join_tab[const_tables].filesort &&
|
|
!(select_options & SELECT_DESCRIBE))
|
|
{
|
|
/*
|
|
If no IO cache exists for the first table then we are using an
|
|
INDEX SCAN and no filesort. Thus we should not remove the sorted
|
|
attribute on the INDEX SCAN.
|
|
*/
|
|
skip_sort_order= true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Window functions computation step should be attached to the last join_tab
|
|
that's doing aggregation.
|
|
The last join_tab reads the data from the temp. table. It also may do
|
|
- sorting
|
|
- duplicate value removal
|
|
Both of these operations are done after window function computation step.
|
|
*/
|
|
if (select_lex->window_funcs.elements)
|
|
{
|
|
curr_tab= join_tab + total_join_tab_cnt();
|
|
if (!(curr_tab->window_funcs_step= new Window_funcs_computation))
|
|
DBUG_RETURN(true);
|
|
if (curr_tab->window_funcs_step->setup(thd, &select_lex->window_funcs,
|
|
curr_tab))
|
|
DBUG_RETURN(true);
|
|
/* Count that we're using window functions. */
|
|
status_var_increment(thd->status_var.feature_window_functions);
|
|
}
|
|
if (select_lex->custom_agg_func_used())
|
|
status_var_increment(thd->status_var.feature_custom_aggregate_functions);
|
|
|
|
/*
|
|
Allocate Cached_items of ORDER BY for FETCH FIRST .. WITH TIES.
|
|
The order list might have been modified prior to this, but we are
|
|
only interested in the initial order by columns, after all const
|
|
elements are removed.
|
|
*/
|
|
if (unit->lim.is_with_ties())
|
|
{
|
|
/*
|
|
When ORDER BY is eliminated, we make use of the GROUP BY list.
|
|
We've already counted how many elements from ORDER BY
|
|
must be evaluated as part of WITH TIES so we use that.
|
|
*/
|
|
ORDER *order_src = order ? order : group_list;
|
|
if (alloc_order_fields(this, order_src,
|
|
with_ties_order_count))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
fields= curr_fields_list;
|
|
// Reset before execution
|
|
set_items_ref_array(items0);
|
|
if (join_tab)
|
|
join_tab[exec_join_tab_cnt() + aggr_tables - 1].next_select=
|
|
setup_end_select_func(this);
|
|
group= has_group_by;
|
|
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
|
|
ORDER *table_group,
|
|
bool save_sum_fields,
|
|
bool distinct,
|
|
bool keep_row_order)
|
|
{
|
|
DBUG_ENTER("JOIN::create_postjoin_aggr_table");
|
|
THD_STAGE_INFO(thd, stage_creating_tmp_table);
|
|
|
|
/*
|
|
Pushing LIMIT to the post-join temporary table creation is not applicable
|
|
when there is ORDER BY or GROUP BY or there is no GROUP BY, but
|
|
there are aggregate functions, because in all these cases we need
|
|
all result rows.
|
|
|
|
We also can not push limit if the limit is WITH TIES, as we do not know
|
|
how many rows we will actually have. This can happen if ORDER BY was
|
|
a constant and removed (during remove_const), thus we have an "unlimited"
|
|
WITH TIES.
|
|
*/
|
|
ha_rows table_rows_limit= ((order == NULL || skip_sort_order) &&
|
|
!table_group &&
|
|
!select_lex->with_sum_func &&
|
|
!unit->lim.is_with_ties()) ? select_limit
|
|
: HA_POS_ERROR;
|
|
|
|
if (!(tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param)))
|
|
DBUG_RETURN(true);
|
|
if (tmp_table_keep_current_rowid)
|
|
add_fields_for_current_rowid(tab, table_fields);
|
|
tab->tmp_table_param->skip_create_table= true;
|
|
TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields,
|
|
table_group, distinct,
|
|
save_sum_fields, select_options,
|
|
table_rows_limit,
|
|
&empty_clex_str, true, keep_row_order);
|
|
if (!table)
|
|
DBUG_RETURN(true);
|
|
tmp_table_param.using_outer_summary_function=
|
|
tab->tmp_table_param->using_outer_summary_function;
|
|
tab->join= this;
|
|
DBUG_ASSERT(tab > tab->join->join_tab || !top_join_tab_count ||
|
|
!tables_list);
|
|
tab->table= table;
|
|
if (tab > join_tab)
|
|
(tab - 1)->next_select= sub_select_postjoin_aggr;
|
|
|
|
/* if group or order on first table, sort first */
|
|
if ((group_list && simple_group) ||
|
|
(implicit_grouping && select_lex->have_window_funcs()))
|
|
{
|
|
DBUG_PRINT("info",("Sorting for group"));
|
|
THD_STAGE_INFO(thd, stage_sorting_for_group);
|
|
|
|
if (ordered_index_usage != ordered_index_group_by &&
|
|
!only_const_tables() &&
|
|
(join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row
|
|
!implicit_grouping &&
|
|
add_sorting_to_table(join_tab + const_tables, group_list))
|
|
goto err;
|
|
|
|
if (alloc_group_fields(this, group_list))
|
|
goto err;
|
|
if (make_sum_func_list(all_fields, fields_list, true))
|
|
goto err;
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!(tables_list &&
|
|
join_tab->is_using_agg_loose_index_scan())))
|
|
goto err;
|
|
if (setup_sum_funcs(thd, sum_funcs))
|
|
goto err;
|
|
group_list= NULL;
|
|
}
|
|
else
|
|
{
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!join_tab->is_using_agg_loose_index_scan()))
|
|
goto err;
|
|
if (setup_sum_funcs(thd, sum_funcs))
|
|
goto err;
|
|
|
|
if (!group_list && !table->distinct && order && simple_order &&
|
|
tab == join_tab + const_tables)
|
|
{
|
|
DBUG_PRINT("info",("Sorting for order"));
|
|
THD_STAGE_INFO(thd, stage_sorting_for_order);
|
|
|
|
if (ordered_index_usage != ordered_index_order_by &&
|
|
!only_const_tables() &&
|
|
add_sorting_to_table(join_tab + const_tables, order))
|
|
goto err;
|
|
order= NULL;
|
|
}
|
|
}
|
|
if (!(tab->aggr= new (thd->mem_root) AGGR_OP(tab)))
|
|
goto err;
|
|
table->reginfo.join_tab= tab;
|
|
DBUG_RETURN(false);
|
|
|
|
err:
|
|
if (table != NULL)
|
|
free_tmp_table(thd, table);
|
|
tab->table= NULL;
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
|
|
void
|
|
JOIN::optimize_distinct()
|
|
{
|
|
for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;)
|
|
{
|
|
if (select_lex->select_list_tables & last_join_tab->table->map ||
|
|
last_join_tab->use_join_cache)
|
|
break;
|
|
last_join_tab->shortcut_for_distinct= true;
|
|
if (last_join_tab == join_tab)
|
|
break;
|
|
--last_join_tab;
|
|
}
|
|
|
|
/* Optimize "select distinct b from t1 order by key_part_1 limit #" */
|
|
if (order && skip_sort_order && !unit->lim.is_with_ties())
|
|
{
|
|
/* Should already have been optimized away */
|
|
DBUG_ASSERT(ordered_index_usage == ordered_index_order_by);
|
|
if (ordered_index_usage == ordered_index_order_by)
|
|
{
|
|
order= NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Add Filesort object to the given table to sort if with filesort
|
|
|
|
@param tab the JOIN_TAB object to attach created Filesort object to
|
|
@param order List of expressions to sort the table by
|
|
|
|
@note This function moves tab->select, if any, to filesort->select
|
|
|
|
@return false on success, true on OOM
|
|
*/
|
|
|
|
bool
|
|
JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
|
|
{
|
|
tab->filesort=
|
|
new (thd->mem_root) Filesort(order, HA_ROWS_MAX, tab->keep_current_rowid,
|
|
tab->select);
|
|
if (!tab->filesort)
|
|
return true;
|
|
|
|
TABLE *table= tab->table;
|
|
if ((tab == join_tab + const_tables) &&
|
|
table->pos_in_table_list->is_sjm_scan_table())
|
|
{
|
|
tab->filesort->set_all_read_bits= TRUE;
|
|
tab->filesort->unpack= unpack_to_base_table_fields;
|
|
}
|
|
|
|
/*
|
|
Select was moved to filesort->select to force join_init_read_record to use
|
|
sorted result instead of reading table through select.
|
|
*/
|
|
if (tab->select)
|
|
{
|
|
tab->select= NULL;
|
|
tab->set_select_cond(NULL, __LINE__);
|
|
}
|
|
tab->read_first_record= join_init_read_record;
|
|
return false;
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
Setup expression caches for subqueries that need them
|
|
|
|
@details
|
|
The function wraps correlated subquery expressions that return one value
|
|
into objects of the class Item_cache_wrapper setting up an expression
|
|
cache for each of them. The result values of the subqueries are to be
|
|
cached together with the corresponding sets of the parameters - outer
|
|
references of the subqueries.
|
|
|
|
@retval FALSE OK
|
|
@retval TRUE Error
|
|
*/
|
|
|
|
bool JOIN::setup_subquery_caches()
|
|
{
|
|
DBUG_ENTER("JOIN::setup_subquery_caches");
|
|
|
|
/*
|
|
We have to check all this condition together because items created in
|
|
one of this clauses can be moved to another one by optimizer
|
|
*/
|
|
if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
|
|
select_lex->expr_cache_may_be_used[IN_HAVING] ||
|
|
select_lex->expr_cache_may_be_used[IN_ON] ||
|
|
select_lex->expr_cache_may_be_used[NO_MATTER])
|
|
{
|
|
JOIN_TAB *tab;
|
|
if (conds &&
|
|
!(conds= conds->transform(thd, &Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->select_cond &&
|
|
!(tab->select_cond=
|
|
tab->select_cond->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
if (tab->cache_select && tab->cache_select->cond)
|
|
if (!(tab->cache_select->cond=
|
|
tab->cache_select->
|
|
cond->transform(thd, &Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (having &&
|
|
!(having= having->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (tmp_having)
|
|
{
|
|
DBUG_ASSERT(having == NULL);
|
|
if (!(tmp_having=
|
|
tmp_having->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
}
|
|
if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
|
|
select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
|
|
select_lex->expr_cache_may_be_used[NO_MATTER])
|
|
{
|
|
List_iterator<Item> li(all_fields);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item;
|
|
if (!(new_item=
|
|
item->transform(thd, &Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
if (new_item != item)
|
|
{
|
|
thd->change_item_tree(li.ref(), new_item);
|
|
}
|
|
}
|
|
for (ORDER *tmp_group= group_list; tmp_group ; tmp_group= tmp_group->next)
|
|
{
|
|
if (!(*tmp_group->item=
|
|
(*tmp_group->item)->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
}
|
|
if (select_lex->expr_cache_may_be_used[NO_MATTER])
|
|
{
|
|
for (ORDER *ord= order; ord; ord= ord->next)
|
|
{
|
|
if (!(*ord->item=
|
|
(*ord->item)->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
Shrink join buffers used for preceding tables to reduce the occupied space
|
|
|
|
SYNOPSIS
|
|
shrink_join_buffers()
|
|
jt table up to which the buffers are to be shrunk
|
|
curr_space the size of the space used by the buffers for tables 1..jt
|
|
needed_space the size of the space that has to be used by these buffers
|
|
|
|
DESCRIPTION
|
|
The function makes an attempt to shrink all join buffers used for the
|
|
tables starting from the first up to jt to reduce the total size of the
|
|
space occupied by the buffers used for tables 1,...,jt from curr_space
|
|
to needed_space.
|
|
The function assumes that the buffer for the table jt has not been
|
|
allocated yet.
|
|
|
|
RETURN
|
|
FALSE if all buffer have been successfully shrunk
|
|
TRUE otherwise
|
|
*/
|
|
|
|
bool JOIN::shrink_join_buffers(JOIN_TAB *jt,
|
|
ulonglong curr_space,
|
|
ulonglong needed_space)
|
|
{
|
|
JOIN_TAB *tab;
|
|
JOIN_CACHE *cache;
|
|
for (tab= first_linear_tab(this, WITHOUT_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab != jt;
|
|
tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS))
|
|
{
|
|
cache= tab->cache;
|
|
if (cache)
|
|
{
|
|
size_t buff_size;
|
|
if (needed_space < cache->get_min_join_buffer_size())
|
|
return TRUE;
|
|
if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
|
|
{
|
|
revise_cache_usage(tab);
|
|
return TRUE;
|
|
}
|
|
buff_size= cache->get_join_buffer_size();
|
|
curr_space-= buff_size;
|
|
if (needed_space < buff_size)
|
|
{
|
|
/*
|
|
Safety: fail if we've exhausted available buffer space with
|
|
reduced join buffers.
|
|
*/
|
|
DBUG_ASSERT(0);
|
|
return TRUE;
|
|
}
|
|
needed_space-= buff_size;
|
|
}
|
|
}
|
|
|
|
cache= jt->cache;
|
|
DBUG_ASSERT(cache);
|
|
if (needed_space < cache->get_min_join_buffer_size())
|
|
return TRUE;
|
|
cache->set_join_buffer_size((size_t)needed_space);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
int
|
|
JOIN::reinit()
|
|
{
|
|
DBUG_ENTER("JOIN::reinit");
|
|
|
|
first_record= false;
|
|
group_sent= false;
|
|
cleaned= false;
|
|
accepted_rows= 0;
|
|
|
|
if (aggr_tables)
|
|
{
|
|
JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
|
|
JOIN_TAB *end_tab= curr_tab + aggr_tables;
|
|
for ( ; curr_tab < end_tab; curr_tab++)
|
|
{
|
|
TABLE *tmp_table= curr_tab->table;
|
|
if (!tmp_table->is_created())
|
|
continue;
|
|
tmp_table->file->extra(HA_EXTRA_RESET_STATE);
|
|
tmp_table->file->ha_delete_all_rows();
|
|
}
|
|
}
|
|
clear_sj_tmp_tables(this);
|
|
if (current_ref_ptrs != items0)
|
|
{
|
|
set_items_ref_array(items0);
|
|
}
|
|
|
|
/* need to reset ref access state (see join_read_key) */
|
|
if (join_tab)
|
|
{
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->ref.key_err= TRUE;
|
|
}
|
|
}
|
|
|
|
clear_sum_funcs();
|
|
|
|
if (no_rows_in_result_called)
|
|
{
|
|
/* Reset effect of possible no_rows_in_result() */
|
|
List_iterator_fast<Item> it(fields_list);
|
|
Item *item;
|
|
no_rows_in_result_called= 0;
|
|
while ((item= it++))
|
|
item->restore_to_before_no_rows_in_result();
|
|
}
|
|
|
|
if (!(select_options & SELECT_DESCRIBE))
|
|
if (init_ftfuncs(thd, select_lex, MY_TEST(order)))
|
|
DBUG_RETURN(1);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
Prepare join result.
|
|
|
|
@details Prepare join result prior to join execution or describing.
|
|
Instantiate derived tables and get schema tables result if necessary.
|
|
|
|
@return
|
|
TRUE An error during derived or schema tables instantiation.
|
|
FALSE Ok
|
|
*/
|
|
|
|
bool JOIN::prepare_result(List<Item> **columns_list)
|
|
{
|
|
DBUG_ENTER("JOIN::prepare_result");
|
|
|
|
error= 0;
|
|
/* Create result tables for materialized views. */
|
|
if (!zero_result_cause &&
|
|
select_lex->handle_derived(thd->lex, DT_CREATE))
|
|
goto err;
|
|
|
|
if (result->prepare2(this))
|
|
goto err;
|
|
|
|
if ((select_lex->options & OPTION_SCHEMA_TABLE) &&
|
|
get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
|
|
goto err;
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
err:
|
|
error= 1;
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
@retval
|
|
0 ok
|
|
1 error
|
|
*/
|
|
|
|
|
|
bool JOIN::save_explain_data(Explain_query *output, bool can_overwrite,
|
|
bool need_tmp_table, bool need_order,
|
|
bool distinct)
|
|
{
|
|
DBUG_ENTER("JOIN::save_explain_data");
|
|
DBUG_PRINT("enter", ("Save explain Select_lex: %u (%p) parent lex: %p stmt_lex: %p present select: %u (%p)",
|
|
select_lex->select_number, select_lex,
|
|
select_lex->parent_lex, thd->lex->stmt_lex,
|
|
(output->get_select(select_lex->select_number) ?
|
|
select_lex->select_number : 0),
|
|
(output->get_select(select_lex->select_number) ?
|
|
output->get_select(select_lex->select_number)
|
|
->select_lex : NULL)));
|
|
/*
|
|
If there is SELECT in this statement with the same number it must be the
|
|
same SELECT
|
|
*/
|
|
DBUG_ASSERT(select_lex->select_number == FAKE_SELECT_LEX_ID || !output ||
|
|
!output->get_select(select_lex->select_number) ||
|
|
output->get_select(select_lex->select_number)->select_lex ==
|
|
select_lex);
|
|
|
|
if (select_lex->select_number != FAKE_SELECT_LEX_ID &&
|
|
have_query_plan != JOIN::QEP_NOT_PRESENT_YET &&
|
|
have_query_plan != JOIN::QEP_DELETED && // this happens when there was
|
|
// no QEP ever, but then
|
|
//cleanup() is called multiple times
|
|
output && // for "SET" command in SPs.
|
|
(can_overwrite? true: !output->get_select(select_lex->select_number)))
|
|
{
|
|
const char *message= NULL;
|
|
if (!table_count || !tables_list || zero_result_cause)
|
|
{
|
|
/* It's a degenerate join */
|
|
message= zero_result_cause ? zero_result_cause : "No tables used";
|
|
}
|
|
bool rc= save_explain_data_intern(thd->lex->explain, need_tmp_table,
|
|
need_order, distinct, message);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/*
|
|
Can have join_tab==NULL for degenerate cases (e.g. SELECT .. UNION ... SELECT LIMIT 0)
|
|
*/
|
|
if (select_lex == select_lex->master_unit()->fake_select_lex && join_tab)
|
|
{
|
|
/*
|
|
This is fake_select_lex. It has no query plan, but we need to set up a
|
|
tracker for ANALYZE
|
|
*/
|
|
uint nr= select_lex->master_unit()->first_select()->select_number;
|
|
Explain_union *eu= output->get_union(nr);
|
|
explain= &eu->fake_select_lex_explain;
|
|
join_tab[0].tracker= eu->get_fake_select_lex_tracker();
|
|
for (uint i=0 ; i < exec_join_tab_cnt() + aggr_tables; i++)
|
|
{
|
|
if (join_tab[i].filesort)
|
|
{
|
|
if (!(join_tab[i].filesort->tracker=
|
|
new Filesort_tracker(thd->lex->analyze_stmt)))
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
int JOIN::exec()
|
|
{
|
|
int res;
|
|
DBUG_EXECUTE_IF("show_explain_probe_join_exec_start",
|
|
if (dbug_user_var_equals_int(thd,
|
|
"show_explain_probe_select_id",
|
|
select_lex->select_number))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
ANALYZE_START_TRACKING(thd, &explain->time_tracker);
|
|
res= exec_inner();
|
|
ANALYZE_STOP_TRACKING(thd, &explain->time_tracker);
|
|
|
|
DBUG_EXECUTE_IF("show_explain_probe_join_exec_end",
|
|
if (dbug_user_var_equals_int(thd,
|
|
"show_explain_probe_select_id",
|
|
select_lex->select_number))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
return res;
|
|
}
|
|
|
|
|
|
int JOIN::exec_inner()
|
|
{
|
|
List<Item> *columns_list= &fields_list;
|
|
DBUG_ENTER("JOIN::exec_inner");
|
|
DBUG_ASSERT(optimization_state == JOIN::OPTIMIZATION_DONE);
|
|
|
|
THD_STAGE_INFO(thd, stage_executing);
|
|
|
|
/*
|
|
Enable LIMIT ROWS EXAMINED during query execution if:
|
|
(1) This JOIN is the outermost query (not a subquery or derived table)
|
|
This ensures that the limit is enabled when actual execution begins,
|
|
and not if a subquery is evaluated during optimization of the outer
|
|
query.
|
|
(2) This JOIN is not the result of a UNION. In this case do not apply the
|
|
limit in order to produce the partial query result stored in the
|
|
UNION temp table.
|
|
*/
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_exec(thd, "join_execution");
|
|
trace_exec.add_select_number(select_lex->select_number);
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
|
|
if (!select_lex->outer_select() && // (1)
|
|
select_lex != select_lex->master_unit()->fake_select_lex) // (2)
|
|
thd->lex->set_limit_rows_examined();
|
|
|
|
if (procedure)
|
|
{
|
|
procedure_fields_list= fields_list;
|
|
if (procedure->change_columns(thd, procedure_fields_list) ||
|
|
result->prepare(procedure_fields_list, unit))
|
|
{
|
|
thd->limit_found_rows= 0;
|
|
DBUG_RETURN(0);
|
|
}
|
|
columns_list= &procedure_fields_list;
|
|
}
|
|
if (result->prepare2(this))
|
|
DBUG_RETURN(error);
|
|
|
|
if (!tables_list && (table_count || !select_lex->with_sum_func) &&
|
|
!select_lex->have_window_funcs())
|
|
{ // Only test of functions
|
|
if (select_options & SELECT_DESCRIBE)
|
|
select_describe(this, FALSE, FALSE, FALSE,
|
|
(zero_result_cause?zero_result_cause:"No tables used"));
|
|
else
|
|
{
|
|
if (result->send_result_set_metadata(*columns_list,
|
|
Protocol::SEND_NUM_ROWS |
|
|
Protocol::SEND_EOF))
|
|
{
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/*
|
|
We have to test for 'conds' here as the WHERE may not be constant
|
|
even if we don't have any tables for prepared statements or if
|
|
conds uses something like 'rand()'.
|
|
If the HAVING clause is either impossible or always true, then
|
|
JOIN::having is set to NULL by optimize_cond.
|
|
In this case JOIN::exec must check for JOIN::having_value, in the
|
|
same way it checks for JOIN::cond_value.
|
|
*/
|
|
DBUG_ASSERT(error == 0);
|
|
if (cond_value != Item::COND_FALSE &&
|
|
having_value != Item::COND_FALSE &&
|
|
(!conds || conds->val_int()) &&
|
|
(!having || having->val_int()))
|
|
{
|
|
if (do_send_rows &&
|
|
(procedure ? (procedure->send_row(procedure_fields_list) ||
|
|
procedure->end_of_records()):
|
|
result->send_data_with_check(fields_list, unit, 0)> 0))
|
|
error= 1;
|
|
else
|
|
send_records= ((select_options & OPTION_FOUND_ROWS) ? 1 :
|
|
thd->get_sent_row_count());
|
|
}
|
|
else
|
|
send_records= 0;
|
|
if (likely(!error))
|
|
{
|
|
join_free(); // Unlock all cursors
|
|
error= (int) result->send_eof();
|
|
}
|
|
}
|
|
/* Single select (without union) always returns 0 or 1 row */
|
|
thd->limit_found_rows= send_records;
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/*
|
|
Evaluate expensive constant conditions that were not evaluated during
|
|
optimization. Do not evaluate them for EXPLAIN statements as these
|
|
condtions may be arbitrarily costly, and because the optimize phase
|
|
might not have produced a complete executable plan for EXPLAINs.
|
|
*/
|
|
if (!zero_result_cause &&
|
|
exec_const_cond && !(select_options & SELECT_DESCRIBE) &&
|
|
!exec_const_cond->val_int())
|
|
zero_result_cause= "Impossible WHERE noticed after reading const tables";
|
|
|
|
/*
|
|
We've called exec_const_cond->val_int(). This may have caused an error.
|
|
*/
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
if (zero_result_cause)
|
|
{
|
|
if (select_lex->have_window_funcs() && send_row_on_empty_set())
|
|
{
|
|
/*
|
|
The query produces just one row but it has window functions.
|
|
|
|
The only way to compute the value of window function(s) is to
|
|
run the entire window function computation step (there is no shortcut).
|
|
*/
|
|
const_tables= table_count;
|
|
first_select= sub_select_postjoin_aggr;
|
|
}
|
|
else
|
|
{
|
|
(void) return_zero_rows(this, result, &select_lex->leaf_tables,
|
|
columns_list,
|
|
send_row_on_empty_set(),
|
|
select_options,
|
|
zero_result_cause,
|
|
having ? having : tmp_having, &all_fields);
|
|
DBUG_RETURN(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Evaluate all constant expressions with subqueries in the
|
|
ORDER/GROUP clauses to make sure that all subqueries return a
|
|
single row. The evaluation itself will trigger an error if that is
|
|
not the case.
|
|
*/
|
|
if (exec_const_order_group_cond.elements &&
|
|
!(select_options & SELECT_DESCRIBE) &&
|
|
!select_lex->pushdown_select)
|
|
{
|
|
List_iterator_fast<Item> const_item_it(exec_const_order_group_cond);
|
|
Item *cur_const_item;
|
|
StringBuffer<MAX_FIELD_WIDTH> tmp;
|
|
while ((cur_const_item= const_item_it++))
|
|
{
|
|
tmp.set_buffer_if_not_allocated(&my_charset_bin);
|
|
cur_const_item->val_str(&tmp);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
|
|
get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
if (select_options & SELECT_DESCRIBE)
|
|
{
|
|
select_describe(this, need_tmp,
|
|
order != 0 && !skip_sort_order,
|
|
select_distinct,
|
|
!table_count ? "No tables used" : NullS);
|
|
DBUG_RETURN(0);
|
|
}
|
|
else if (select_lex->pushdown_select)
|
|
{
|
|
/* Execute the query pushed into a foreign engine */
|
|
error= select_lex->pushdown_select->execute();
|
|
DBUG_RETURN(error);
|
|
}
|
|
else
|
|
{
|
|
/* it's a const select, materialize it. */
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
}
|
|
|
|
/* XXX: When can we have here thd->is_error() not zero? */
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
THD_STAGE_INFO(thd, stage_sending_data);
|
|
DBUG_PRINT("info", ("%s", thd->proc_info));
|
|
result->send_result_set_metadata(
|
|
procedure ? procedure_fields_list : *fields,
|
|
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
|
|
|
|
error= result->view_structure_only() ? false : do_select(this, procedure);
|
|
/* Accumulate the counts from all join iterations of all join parts. */
|
|
thd->ps_report_examined_row_count();
|
|
|
|
DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
|
|
(ulong) thd->get_examined_row_count()));
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
|
|
/**
|
|
Clean up join.
|
|
|
|
@return
|
|
Return error that hold JOIN.
|
|
*/
|
|
|
|
int
|
|
JOIN::destroy()
|
|
{
|
|
DBUG_ENTER("JOIN::destroy");
|
|
|
|
DBUG_PRINT("info", ("select %p (%u) <> JOIN %p",
|
|
select_lex, select_lex->select_number, this));
|
|
select_lex->join= 0;
|
|
|
|
cond_equal= 0;
|
|
having_equal= 0;
|
|
|
|
cleanup(1);
|
|
|
|
if (join_tab)
|
|
{
|
|
for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS,
|
|
WITH_CONST_TABLES);
|
|
tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->aggr)
|
|
{
|
|
free_tmp_table(thd, tab->table);
|
|
delete tab->tmp_table_param;
|
|
tab->tmp_table_param= NULL;
|
|
tab->aggr= NULL;
|
|
}
|
|
tab->table= NULL;
|
|
}
|
|
}
|
|
|
|
/* Cleanup items referencing temporary table columns */
|
|
cleanup_item_list(tmp_all_fields1);
|
|
cleanup_item_list(tmp_all_fields3);
|
|
destroy_sj_tmp_tables(this);
|
|
delete_dynamic(&keyuse);
|
|
if (save_qep)
|
|
delete(save_qep);
|
|
if (ext_keyuses_for_splitting)
|
|
delete(ext_keyuses_for_splitting);
|
|
delete procedure;
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
|
|
void JOIN::cleanup_item_list(List<Item> &items) const
|
|
{
|
|
DBUG_ENTER("JOIN::cleanup_item_list");
|
|
if (!items.is_empty())
|
|
{
|
|
List_iterator_fast<Item> it(items);
|
|
Item *item;
|
|
while ((item= it++))
|
|
item->cleanup();
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Look for provision of the select_handler interface by a foreign engine.
|
|
Must not be called directly, use find_single_select_handler() or
|
|
find_partial_select_handler() instead.
|
|
|
|
@param
|
|
thd The thread handler
|
|
select_lex SELECT_LEX object, must be passed in the cases of:
|
|
- single select pushdown
|
|
- partial pushdown (part of a UNION/EXCEPT/INTERSECT)
|
|
Must be NULL in case of entire unit pushdown
|
|
select_lex_unit SELECT_LEX_UNIT object, must be passed in the cases of:
|
|
- entire unit pushdown
|
|
- partial pushdown (part of a UNION/EXCEPT/INTERSECT)
|
|
Must be NULL in case of single select pushdown
|
|
|
|
@details
|
|
The function checks that this is an upper level select and if so looks
|
|
through its tables searching for one whose handlerton owns a
|
|
create_select call-back function. If the call of this function returns
|
|
a select_handler interface object then the server will push the select
|
|
query into this engine.
|
|
This function does not check if the select has tables from
|
|
different engines. Such a check must be done inside each engine's
|
|
create_select function.
|
|
Also the engine's create_select function must perform other checks
|
|
to make sure the engine can execute the query.
|
|
|
|
@retval the found select_handler if the search is successful
|
|
0 otherwise
|
|
*/
|
|
|
|
static
|
|
select_handler *find_select_handler_inner(THD *thd,
|
|
SELECT_LEX *select_lex,
|
|
SELECT_LEX_UNIT *select_lex_unit)
|
|
{
|
|
if (select_lex->master_unit()->outer_select() ||
|
|
(select_lex_unit && select_lex->master_unit()->with_clause))
|
|
{
|
|
/*
|
|
Pushdown is not supported neither for non-top-level SELECTs nor for parts
|
|
of SELECT_LEX_UNITs that have CTEs (SELECT_LEX_UNIT::with_clause)
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
TABLE_LIST *tbl= nullptr;
|
|
// For SQLCOM_INSERT_SELECT the server takes TABLE_LIST
|
|
// from thd->lex->query_tables and skips its first table
|
|
// b/c it is the target table for the INSERT..SELECT.
|
|
if (thd->lex->sql_command != SQLCOM_INSERT_SELECT)
|
|
{
|
|
tbl= select_lex->join->tables_list;
|
|
}
|
|
else if (thd->lex->query_tables &&
|
|
thd->lex->query_tables->next_global)
|
|
{
|
|
tbl= thd->lex->query_tables->next_global;
|
|
}
|
|
else
|
|
return 0;
|
|
|
|
for (;tbl; tbl= tbl->next_global)
|
|
{
|
|
if (!tbl->table)
|
|
continue;
|
|
handlerton *ht= tbl->table->file->partition_ht();
|
|
if (!ht->create_select)
|
|
continue;
|
|
select_handler *sh= ht->create_select(thd, select_lex, select_lex_unit);
|
|
if (sh)
|
|
return sh;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Wrapper for find_select_handler_inner() for the case of single select
|
|
pushdown. See more comments at the description of
|
|
find_select_handler_inner()
|
|
|
|
*/
|
|
select_handler *find_single_select_handler(THD *thd, SELECT_LEX *select_lex)
|
|
{
|
|
return find_select_handler_inner(thd, select_lex, nullptr);
|
|
}
|
|
|
|
|
|
/**
|
|
Wrapper for find_select_handler_inner() for the case of partial select
|
|
pushdown. Partial pushdown means that a unit (i.e. multiple selects combined
|
|
with UNION/EXCEPT/INTERSECT operators) cannot be pushed down to
|
|
the storage engine as a whole but some particular selects of this unit can.
|
|
For example,
|
|
SELECT a FROM federated.t1 -- can be pushed down to Federated
|
|
UNION
|
|
SELECT b FROM local.t2 -- cannot be pushed down, executed locally
|
|
|
|
See more comments at the description of find_select_handler_inner()
|
|
|
|
*/
|
|
select_handler *
|
|
find_partial_select_handler(THD *thd, SELECT_LEX *select_lex,
|
|
SELECT_LEX_UNIT *select_lex_unit)
|
|
{
|
|
return find_select_handler_inner(thd, select_lex, select_lex_unit);
|
|
}
|
|
|
|
|
|
/**
|
|
An entry point to single-unit select (a select without UNION).
|
|
|
|
@param thd thread handler
|
|
@param rref_pointer_array a reference to ref_pointer_array of
|
|
the top-level select_lex for this query
|
|
@param tables list of all tables used in this query.
|
|
The tables have been pre-opened.
|
|
@param fields list of items in SELECT list of the top-level
|
|
select
|
|
e.g. SELECT a, b, c FROM t1 will have Item_field
|
|
for a, b and c in this list.
|
|
@param conds top level item of an expression representing
|
|
WHERE clause of the top level select
|
|
@param og_num total number of ORDER BY and GROUP BY clauses
|
|
arguments
|
|
@param order linked list of ORDER BY agruments
|
|
@param group linked list of GROUP BY arguments
|
|
@param having top level item of HAVING expression
|
|
@param proc_param list of PROCEDUREs
|
|
@param select_options select options (BIG_RESULT, etc)
|
|
@param result an instance of result set handling class.
|
|
This object is responsible for send result
|
|
set rows to the client or inserting them
|
|
into a table.
|
|
@param select_lex the only SELECT_LEX of this query
|
|
@param unit top-level UNIT of this query
|
|
UNIT is an artificial object created by the
|
|
parser for every SELECT clause.
|
|
e.g.
|
|
SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
|
|
has 2 unions.
|
|
|
|
@retval
|
|
FALSE success
|
|
@retval
|
|
TRUE an error
|
|
*/
|
|
|
|
bool
|
|
mysql_select(THD *thd, TABLE_LIST *tables, List<Item> &fields, COND *conds,
|
|
uint og_num, ORDER *order, ORDER *group, Item *having,
|
|
ORDER *proc_param, ulonglong select_options, select_result *result,
|
|
SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex)
|
|
{
|
|
int err= 0;
|
|
bool free_join= 1, exec_error= 0;
|
|
DBUG_ENTER("mysql_select");
|
|
|
|
if (!fields.is_empty())
|
|
select_lex->context.resolve_in_select_list= true;
|
|
JOIN *join;
|
|
if (select_lex->join != 0)
|
|
{
|
|
join= select_lex->join;
|
|
/*
|
|
is it single SELECT in derived table, called in derived table
|
|
creation
|
|
*/
|
|
if (select_lex->get_linkage() != DERIVED_TABLE_TYPE ||
|
|
(select_options & SELECT_DESCRIBE))
|
|
{
|
|
if (select_lex->get_linkage() != GLOBAL_OPTIONS_TYPE)
|
|
{
|
|
/*
|
|
Original join tabs might be overwritten at first
|
|
subselect execution. So we need to restore them.
|
|
*/
|
|
Item_subselect *subselect= select_lex->master_unit()->item;
|
|
if (subselect && subselect->is_uncacheable() && join->reinit())
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
else
|
|
{
|
|
if (!join->prepared &&
|
|
(err= join->prepare(tables, conds, og_num, order, false, group,
|
|
having, proc_param, select_lex, unit)))
|
|
{
|
|
goto err;
|
|
}
|
|
}
|
|
}
|
|
free_join= 0;
|
|
join->select_options= select_options;
|
|
}
|
|
else
|
|
{
|
|
if (thd->lex->describe)
|
|
select_options|= SELECT_DESCRIBE;
|
|
|
|
/*
|
|
When in EXPLAIN, delay deleting the joins so that they are still
|
|
available when we're producing EXPLAIN EXTENDED warning text.
|
|
*/
|
|
if (select_options & SELECT_DESCRIBE)
|
|
free_join= 0;
|
|
|
|
if (!(join= new (thd->mem_root) JOIN(thd, fields, select_options, result)))
|
|
DBUG_RETURN(TRUE);
|
|
THD_STAGE_INFO(thd, stage_init);
|
|
thd->lex->used_tables=0;
|
|
if (!join->prepared &&
|
|
(err= join->prepare(tables, conds, og_num, order, false, group, having,
|
|
proc_param, select_lex, unit)))
|
|
{
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
thd->get_stmt_da()->reset_current_row_for_warning(1);
|
|
/* Look for a table owned by an engine with the select_handler interface */
|
|
select_lex->pushdown_select= find_single_select_handler(thd, select_lex);
|
|
|
|
if ((err= join->optimize()))
|
|
{
|
|
goto err; // 1
|
|
}
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
join->conds_history= join->conds;
|
|
join->having_history= (join->having?join->having:join->tmp_having);
|
|
}
|
|
|
|
if (unlikely(thd->is_error()))
|
|
goto err;
|
|
|
|
exec_error= join->exec();
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
select_lex->where= join->conds_history;
|
|
select_lex->having= join->having_history;
|
|
}
|
|
|
|
err:
|
|
thd->push_final_warnings();
|
|
if (select_lex->pushdown_select)
|
|
{
|
|
delete select_lex->pushdown_select;
|
|
select_lex->pushdown_select= NULL;
|
|
}
|
|
|
|
if (free_join)
|
|
{
|
|
THD_STAGE_INFO(thd, stage_end);
|
|
err|= (int)(select_lex->cleanup());
|
|
DBUG_RETURN(exec_error || err || thd->is_error());
|
|
}
|
|
DBUG_RETURN(exec_error || err);
|
|
}
|
|
|
|
|
|
/**
|
|
Approximate how many records are going to be returned by this table in this
|
|
select with this key.
|
|
|
|
@param thd Thread handle
|
|
@param select Select to be examined
|
|
@param table The table of interest
|
|
@param keys The keys of interest
|
|
@param limit Maximum number of rows of interest
|
|
@param quick_count Pointer to where we want the estimate written
|
|
|
|
@return Status
|
|
@retval false Success
|
|
@retval true Error
|
|
|
|
*/
|
|
static bool get_quick_record_count(THD *thd, SQL_SELECT *select,
|
|
TABLE *table,
|
|
const key_map *keys,ha_rows limit,
|
|
ha_rows *quick_count)
|
|
{
|
|
quick_select_return error;
|
|
DBUG_ENTER("get_quick_record_count");
|
|
uchar buff[STACK_BUFF_ALLOC];
|
|
if (unlikely(check_stack_overrun(thd, STACK_MIN_SIZE, buff)))
|
|
DBUG_RETURN(false); // Fatal error flag is set
|
|
if (select)
|
|
{
|
|
select->head=table;
|
|
table->reginfo.impossible_range=0;
|
|
/*
|
|
EQ_FUNC and EQUAL_FUNC already sent unusable key notes (if any)
|
|
during update_ref_and_keys(). Have only other functions raise notes
|
|
from can_optimize_scalar_range().
|
|
*/
|
|
error= select->test_quick_select(thd, *(key_map *)keys, (table_map) 0,
|
|
limit, 0, FALSE,
|
|
TRUE, /* remove_where_parts*/
|
|
FALSE,
|
|
Item_func::BITMAP_EXCEPT_ANY_EQUALITY);
|
|
|
|
if (error == SQL_SELECT::OK)
|
|
{
|
|
if (select->quick)
|
|
{
|
|
/*
|
|
opt_range_condition_rows was updated in test_quick_select to be
|
|
the smallest number of rows in any range.
|
|
select->quick->records is the number of rows in range with
|
|
smallest cost.
|
|
*/
|
|
DBUG_ASSERT(select->quick->records >=
|
|
table->opt_range_condition_rows);
|
|
*quick_count= select->quick->records;
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
if (error == SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
{
|
|
table->reginfo.impossible_range=1;
|
|
*quick_count= 0;
|
|
DBUG_RETURN(false);
|
|
}
|
|
if (unlikely(error == SQL_SELECT::ERROR))
|
|
DBUG_RETURN(true);
|
|
|
|
DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
|
|
}
|
|
*quick_count= HA_POS_ERROR;
|
|
DBUG_RETURN(false); /* This shouldn't happen */
|
|
}
|
|
|
|
/*
|
|
This structure is used to collect info on potentially sargable
|
|
predicates in order to check whether they become sargable after
|
|
reading const tables.
|
|
We form a bitmap of indexes that can be used for sargable predicates.
|
|
Only such indexes are involved in range analysis.
|
|
*/
|
|
struct SARGABLE_PARAM
|
|
{
|
|
Field *field; /* field against which to check sargability */
|
|
Item **arg_value; /* values of potential keys for lookups */
|
|
uint num_values; /* number of values in the above array */
|
|
};
|
|
|
|
|
|
/*
|
|
Mark all tables inside a join nest as constant.
|
|
|
|
@detail This is called when there is a local "Impossible WHERE" inside
|
|
a multi-table LEFT JOIN.
|
|
*/
|
|
|
|
void mark_join_nest_as_const(JOIN *join,
|
|
TABLE_LIST *join_nest,
|
|
table_map *found_const_table_map,
|
|
uint *const_count)
|
|
{
|
|
List_iterator<TABLE_LIST> it(join_nest->nested_join->join_list);
|
|
TABLE_LIST *tbl;
|
|
Json_writer_object emb_obj(join->thd);
|
|
Json_writer_object trace_obj(join->thd, "mark_join_nest_as_const");
|
|
Json_writer_array trace_array(join->thd, "members");
|
|
|
|
while ((tbl= it++))
|
|
{
|
|
if (tbl->nested_join)
|
|
{
|
|
mark_join_nest_as_const(join, tbl, found_const_table_map, const_count);
|
|
continue;
|
|
}
|
|
JOIN_TAB *tab= tbl->table->reginfo.join_tab;
|
|
|
|
if (!(join->const_table_map & tab->table->map))
|
|
{
|
|
tab->type= JT_CONST;
|
|
tab->info= ET_IMPOSSIBLE_ON_CONDITION;
|
|
tab->table->const_table= 1;
|
|
|
|
join->const_table_map|= tab->table->map;
|
|
*found_const_table_map|= tab->table->map;
|
|
set_position(join,(*const_count)++,tab,(KEYUSE*) 0);
|
|
mark_as_null_row(tab->table); // All fields are NULL
|
|
|
|
trace_array.add_table_name(tab->table);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Get the condition that can be used to do range analysis/partition
|
|
pruning/etc
|
|
|
|
@detail
|
|
Figure out which condition we can use:
|
|
- For INNER JOIN, we use the WHERE,
|
|
- "t1 LEFT JOIN t2 ON ..." uses t2's ON expression
|
|
- "t1 LEFT JOIN (...) ON ..." uses the join nest's ON expression.
|
|
*/
|
|
|
|
static Item **get_sargable_cond(JOIN *join, TABLE *table)
|
|
{
|
|
Item **retval;
|
|
if (table->pos_in_table_list->on_expr)
|
|
{
|
|
/*
|
|
This is an inner table from a single-table LEFT JOIN, "t1 LEFT JOIN
|
|
t2 ON cond". Use the condition cond.
|
|
*/
|
|
retval= &table->pos_in_table_list->on_expr;
|
|
}
|
|
else if (table->pos_in_table_list->embedding &&
|
|
!table->pos_in_table_list->embedding->sj_on_expr)
|
|
{
|
|
/*
|
|
This is the inner side of a multi-table outer join. Use the
|
|
appropriate ON expression.
|
|
*/
|
|
retval= &(table->pos_in_table_list->embedding->on_expr);
|
|
}
|
|
else
|
|
{
|
|
/* The table is not inner wrt some LEFT JOIN. Use the WHERE clause */
|
|
retval= &join->conds;
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
|
|
/**
|
|
Calculate the best possible join and initialize the join structure.
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Fatal error
|
|
*/
|
|
|
|
static bool
|
|
make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
|
|
DYNAMIC_ARRAY *keyuse_array)
|
|
{
|
|
int error= 0;
|
|
uint i,table_count,const_count,key;
|
|
uint sort_space;
|
|
table_map found_const_table_map, all_table_map;
|
|
key_map const_ref, eq_part;
|
|
bool has_expensive_keyparts;
|
|
TABLE **table_vector;
|
|
JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector;
|
|
KEYUSE *keyuse,*start_keyuse;
|
|
table_map outer_join=0;
|
|
table_map no_rows_const_tables= 0;
|
|
SARGABLE_PARAM *sargables= 0;
|
|
List_iterator<TABLE_LIST> ti(tables_list);
|
|
TABLE_LIST *tables;
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("make_join_statistics");
|
|
|
|
table_count=join->table_count;
|
|
|
|
/*
|
|
best_extension_by_limited_search need sort space for 2POSITIION
|
|
objects per remaining table, which gives us
|
|
2*(T + T-1 + T-2 + T-3...1 POSITIONS) = 2*(T+1)/2*T = (T*T+T)
|
|
*/
|
|
join->sort_space= sort_space= (table_count*table_count + table_count);
|
|
|
|
/*
|
|
best_positions is ok to allocate with alloc() as we copy things to it with
|
|
memcpy()
|
|
*/
|
|
|
|
if (!multi_alloc_root(join->thd->mem_root,
|
|
&stat, sizeof(JOIN_TAB)*(table_count),
|
|
&stat_ref, sizeof(JOIN_TAB*)* MAX_TABLES,
|
|
&stat_vector, sizeof(JOIN_TAB*)* (table_count +1),
|
|
&table_vector, sizeof(TABLE*)*(table_count*2),
|
|
&join->positions, sizeof(POSITION)*(table_count + 1),
|
|
&join->sort_positions, sizeof(POSITION)*(sort_space),
|
|
&join->best_positions,
|
|
sizeof(POSITION)*(table_count + 1),
|
|
NullS))
|
|
DBUG_RETURN(1);
|
|
|
|
/* The following should be optimized to only clear critical things */
|
|
bzero((void*)stat, sizeof(JOIN_TAB)* table_count);
|
|
join->top_join_tab_count= table_count;
|
|
|
|
/* Initialize POSITION objects */
|
|
for (i=0 ; i <= table_count ; i++)
|
|
(void) new ((char*) (join->positions + i)) POSITION;
|
|
for (i=0 ; i < sort_space ; i++)
|
|
(void) new ((char*) (join->sort_positions + i)) POSITION;
|
|
|
|
join->best_ref= stat_vector;
|
|
|
|
stat_end=stat+table_count;
|
|
found_const_table_map= all_table_map=0;
|
|
const_count=0;
|
|
|
|
for (s= stat, i= 0; (tables= ti++); s++, i++)
|
|
{
|
|
TABLE_LIST *embedding= tables->embedding;
|
|
TABLE *table= tables->table;
|
|
stat_vector[i]=s;
|
|
table_vector[i]= s->table= table;
|
|
s->tab_list= tables;
|
|
table->pos_in_table_list= tables;
|
|
error= tables->fetch_number_of_rows();
|
|
/* Calculate table->use_stat_records */
|
|
set_statistics_for_table(join->thd, table);
|
|
bitmap_clear_all(&table->cond_set);
|
|
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
|
|
#else
|
|
const bool all_partitions_pruned_away= FALSE;
|
|
#endif
|
|
|
|
DBUG_EXECUTE_IF("bug11747970_raise_error",
|
|
{ join->thd->set_killed(KILL_QUERY_HARD); });
|
|
if (unlikely(error))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
goto error;
|
|
}
|
|
table->opt_range_keys.clear_all();
|
|
table->intersect_keys.clear_all();
|
|
table->reginfo.join_tab=s;
|
|
table->reginfo.not_exists_optimize=0;
|
|
bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
|
|
all_table_map|= table->map;
|
|
s->preread_init_done= FALSE;
|
|
s->join=join;
|
|
|
|
s->dependent= tables->dep_tables;
|
|
if (tables->schema_table)
|
|
{
|
|
/*
|
|
Information schema is slow and we don't know how many rows we will
|
|
find. Be setting a moderate ammount of rows we are more likely
|
|
to have it materialized if needed.
|
|
*/
|
|
table->file->stats.records= table->used_stat_records= 100;
|
|
}
|
|
table->opt_range_condition_rows= table->stat_records();
|
|
|
|
s->on_expr_ref= &tables->on_expr;
|
|
if (*s->on_expr_ref)
|
|
{
|
|
/* s is the only inner table of an outer join */
|
|
if (!table->is_filled_at_execution() &&
|
|
((!table->file->stats.records &&
|
|
(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
|
|
all_partitions_pruned_away) && !embedding)
|
|
{ // Empty table
|
|
s->dependent= 0; // Ignore LEFT JOIN depend.
|
|
no_rows_const_tables |= table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
continue;
|
|
}
|
|
outer_join|= table->map;
|
|
s->embedding_map= 0;
|
|
for (;embedding; embedding= embedding->embedding)
|
|
s->embedding_map|= embedding->nested_join->nj_map;
|
|
continue;
|
|
}
|
|
if (embedding)
|
|
{
|
|
/* s belongs to a nested join, maybe to several embedded joins */
|
|
s->embedding_map= 0;
|
|
bool inside_an_outer_join= FALSE;
|
|
do
|
|
{
|
|
/*
|
|
If this is a semi-join nest, skip it, and proceed upwards. Maybe
|
|
we're in some outer join nest
|
|
*/
|
|
if (embedding->sj_on_expr)
|
|
{
|
|
embedding= embedding->embedding;
|
|
continue;
|
|
}
|
|
inside_an_outer_join= TRUE;
|
|
NESTED_JOIN *nested_join= embedding->nested_join;
|
|
s->embedding_map|=nested_join->nj_map;
|
|
s->dependent|= embedding->dep_tables;
|
|
embedding= embedding->embedding;
|
|
outer_join|= nested_join->used_tables;
|
|
}
|
|
while (embedding);
|
|
if (inside_an_outer_join)
|
|
continue;
|
|
}
|
|
if (!table->is_filled_at_execution() &&
|
|
(table->s->system ||
|
|
(table->file->stats.records <= 1 &&
|
|
(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
|
|
all_partitions_pruned_away) &&
|
|
!s->dependent &&
|
|
!table->fulltext_searched && !join->no_const_tables)
|
|
{
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
no_rows_const_tables |= table->map;
|
|
}
|
|
|
|
/* SJ-Materialization handling: */
|
|
if (table->pos_in_table_list->jtbm_subselect &&
|
|
table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
no_rows_const_tables |= table->map;
|
|
table->file->stats.records= 0;
|
|
}
|
|
}
|
|
|
|
stat_vector[i]=0;
|
|
join->outer_join=outer_join;
|
|
|
|
if (join->outer_join)
|
|
{
|
|
/*
|
|
Build transitive closure for relation 'to be dependent on'.
|
|
This will speed up the plan search for many cases with outer joins,
|
|
as well as allow us to catch illegal cross references/
|
|
Warshall's algorithm is used to build the transitive closure.
|
|
As we use bitmaps to represent the relation the complexity
|
|
of the algorithm is O((number of tables)^2).
|
|
|
|
The classic form of the Warshall's algorithm would look like:
|
|
for (i= 0; i < table_count; i++)
|
|
{
|
|
for (j= 0; j < table_count; j++)
|
|
{
|
|
for (k= 0; k < table_count; k++)
|
|
{
|
|
if (bitmap_is_set(stat[j].dependent, i) &&
|
|
bitmap_is_set(stat[i].dependent, k))
|
|
bitmap_set_bit(stat[j].dependent, k);
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
for (s= stat ; s < stat_end ; s++)
|
|
{
|
|
TABLE *table= s->table;
|
|
for (JOIN_TAB *t= stat ; t < stat_end ; t++)
|
|
{
|
|
if (t->dependent & table->map)
|
|
t->dependent |= table->reginfo.join_tab->dependent;
|
|
}
|
|
if (outer_join & s->table->map)
|
|
s->table->maybe_null= 1;
|
|
}
|
|
/* Catch illegal cross references for outer joins */
|
|
for (i= 0, s= stat ; i < table_count ; i++, s++)
|
|
{
|
|
if (s->dependent & s->table->map)
|
|
{
|
|
join->table_count=0; // Don't use join->table
|
|
my_message(ER_WRONG_OUTER_JOIN,
|
|
ER_THD(join->thd, ER_WRONG_OUTER_JOIN), MYF(0));
|
|
goto error;
|
|
}
|
|
s->key_dependent= s->dependent;
|
|
}
|
|
}
|
|
|
|
{
|
|
for (JOIN_TAB *s= stat ; s < stat_end ; s++)
|
|
{
|
|
TABLE_LIST *tl= s->table->pos_in_table_list;
|
|
if (tl->embedding && tl->embedding->sj_subq_pred)
|
|
{
|
|
s->embedded_dependent= tl->embedding->original_subq_pred_used_tables;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
trace_table_dependencies(thd, stat, join->table_count);
|
|
|
|
if (join->conds || outer_join)
|
|
{
|
|
if (update_ref_and_keys(thd, keyuse_array, stat, join->table_count,
|
|
join->conds, ~outer_join, join->select_lex, &sargables))
|
|
goto error;
|
|
/*
|
|
Keyparts without prefixes may be useful if this JOIN is a subquery, and
|
|
if the subquery may be executed via the IN-EXISTS strategy.
|
|
*/
|
|
bool skip_unprefixed_keyparts=
|
|
!(join->is_in_subquery() &&
|
|
join->unit->item->get_IN_subquery()->test_strategy(SUBS_IN_TO_EXISTS));
|
|
|
|
if (keyuse_array->elements &&
|
|
sort_and_filter_keyuse(join, keyuse_array,
|
|
skip_unprefixed_keyparts))
|
|
goto error;
|
|
DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
|
|
if (unlikely(thd->trace_started()))
|
|
print_keyuse_array_for_trace(thd, keyuse_array);
|
|
}
|
|
|
|
join->const_table_map= no_rows_const_tables;
|
|
join->const_tables= const_count;
|
|
eliminate_tables(join);
|
|
join->const_table_map &= ~no_rows_const_tables;
|
|
const_count= join->const_tables;
|
|
found_const_table_map= join->const_table_map;
|
|
|
|
/* Read tables with 0 or 1 rows (system tables) */
|
|
for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
|
|
p_pos < p_end ;
|
|
p_pos++)
|
|
{
|
|
s= p_pos->table;
|
|
if (! (s->table->map & join->eliminated_tables))
|
|
{
|
|
int tmp;
|
|
s->type=JT_SYSTEM;
|
|
join->const_table_map|=s->table->map;
|
|
if ((tmp=join_read_const_table(join->thd, s, p_pos)))
|
|
{
|
|
if (tmp > 0)
|
|
goto error; // Fatal error
|
|
}
|
|
else
|
|
{
|
|
found_const_table_map|= s->table->map;
|
|
s->table->pos_in_table_list->optimized_away= TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* loop until no more const tables are found */
|
|
int ref_changed;
|
|
do
|
|
{
|
|
ref_changed = 0;
|
|
more_const_tables_found:
|
|
|
|
/*
|
|
We only have to loop from stat_vector + const_count as
|
|
set_position() will move all const_tables first in stat_vector
|
|
*/
|
|
|
|
for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
|
|
{
|
|
TABLE *table= s->table;
|
|
|
|
if (table->is_filled_at_execution())
|
|
continue;
|
|
|
|
/*
|
|
If equi-join condition by a key is null rejecting and after a
|
|
substitution of a const table the key value happens to be null
|
|
then we can state that there are no matches for this equi-join.
|
|
*/
|
|
if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map &&
|
|
!(table->map & join->eliminated_tables))
|
|
{
|
|
/*
|
|
When performing an outer join operation if there are no matching rows
|
|
for the single row of the outer table all the inner tables are to be
|
|
null complemented and thus considered as constant tables.
|
|
Here we apply this consideration to the case of outer join operations
|
|
with a single inner table only because the case with nested tables
|
|
would require a more thorough analysis.
|
|
TODO. Apply single row substitution to null complemented inner tables
|
|
for nested outer join operations.
|
|
*/
|
|
while (keyuse->table == table)
|
|
{
|
|
if (!keyuse->is_for_hash_join() &&
|
|
!(keyuse->val->used_tables() & ~join->const_table_map) &&
|
|
keyuse->val->is_null() && keyuse->null_rejecting)
|
|
{
|
|
s->type= JT_CONST;
|
|
s->table->const_table= 1;
|
|
mark_as_null_row(table);
|
|
found_const_table_map|= table->map;
|
|
join->const_table_map|= table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
goto more_const_tables_found;
|
|
}
|
|
keyuse++;
|
|
}
|
|
}
|
|
|
|
if (s->dependent) // If dependent on some table
|
|
{
|
|
// All dep. must be constants
|
|
if (s->dependent & ~(found_const_table_map))
|
|
continue;
|
|
if (table->file->stats.records <= 1L &&
|
|
(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
|
|
!table->pos_in_table_list->embedding &&
|
|
!((outer_join & table->map) &&
|
|
(*s->on_expr_ref)->is_expensive()))
|
|
{ // system table
|
|
int tmp= 0;
|
|
s->type= JT_SYSTEM;
|
|
join->const_table_map|=table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
if ((tmp= join_read_const_table(join->thd, s,
|
|
join->positions+const_count-1)))
|
|
{
|
|
if (tmp > 0)
|
|
goto error; // Fatal error
|
|
}
|
|
else
|
|
found_const_table_map|= table->map;
|
|
continue;
|
|
}
|
|
}
|
|
/* check if table can be read by key or table only uses const refs */
|
|
if ((keyuse=s->keyuse))
|
|
{
|
|
s->type= JT_REF;
|
|
while (keyuse->table == table)
|
|
{
|
|
if (keyuse->is_for_hash_join())
|
|
{
|
|
keyuse++;
|
|
continue;
|
|
}
|
|
start_keyuse=keyuse;
|
|
key=keyuse->key;
|
|
s->keys.set_bit(key); // TODO: remove this ?
|
|
|
|
const_ref.clear_all();
|
|
eq_part.clear_all();
|
|
has_expensive_keyparts= false;
|
|
do
|
|
{
|
|
if (keyuse->val->type() != Item::NULL_ITEM &&
|
|
!keyuse->optimize &&
|
|
keyuse->keypart != FT_KEYPART)
|
|
{
|
|
if (!((~found_const_table_map) & keyuse->used_tables))
|
|
{
|
|
const_ref.set_bit(keyuse->keypart);
|
|
if (keyuse->val->is_expensive())
|
|
has_expensive_keyparts= true;
|
|
}
|
|
eq_part.set_bit(keyuse->keypart);
|
|
}
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->key == key);
|
|
|
|
TABLE_LIST *embedding= table->pos_in_table_list->embedding;
|
|
/*
|
|
TODO (low priority): currently we ignore the const tables that
|
|
are within a semi-join nest which is within an outer join nest.
|
|
The effect of this is that we don't do const substitution for
|
|
such tables.
|
|
*/
|
|
KEY *keyinfo= table->key_info + key;
|
|
uint key_parts= table->actual_n_key_parts(keyinfo);
|
|
if (eq_part.is_prefix(key_parts) &&
|
|
!table->fulltext_searched &&
|
|
(!embedding || (embedding->sj_on_expr && !embedding->embedding)))
|
|
{
|
|
key_map base_part, base_const_ref, base_eq_part;
|
|
base_part.set_prefix(keyinfo->user_defined_key_parts);
|
|
base_const_ref= const_ref;
|
|
base_const_ref.intersect(base_part);
|
|
base_eq_part= eq_part;
|
|
base_eq_part.intersect(base_part);
|
|
|
|
/*
|
|
We can read the const record if we are using a full unique key and
|
|
if the table is not an unopened to be materialized table/view.
|
|
*/
|
|
if ((table->actual_key_flags(keyinfo) & HA_NOSAME) &&
|
|
(!s->table->pos_in_table_list->is_materialized_derived() ||
|
|
s->table->pos_in_table_list->fill_me))
|
|
{
|
|
|
|
if (base_const_ref == base_eq_part &&
|
|
!has_expensive_keyparts &&
|
|
!((outer_join & table->map) &&
|
|
(*s->on_expr_ref)->is_expensive()))
|
|
{ // Found everything for ref.
|
|
int tmp;
|
|
ref_changed = 1;
|
|
s->type= JT_CONST;
|
|
join->const_table_map|=table->map;
|
|
set_position(join,const_count++,s,start_keyuse);
|
|
/* create_ref_for_key will set s->table->const_table */
|
|
if (create_ref_for_key(join, s, start_keyuse, FALSE,
|
|
found_const_table_map))
|
|
goto error;
|
|
if ((tmp=join_read_const_table(join->thd, s,
|
|
join->positions+const_count-1)))
|
|
{
|
|
if (tmp > 0)
|
|
goto error; // Fatal error
|
|
}
|
|
else
|
|
found_const_table_map|= table->map;
|
|
break;
|
|
}
|
|
}
|
|
else if (base_const_ref == base_eq_part)
|
|
s->const_keys.set_bit(key);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (ref_changed);
|
|
|
|
join->sort_by_table= get_sort_by_table(join->order, join->group_list,
|
|
join->select_lex->leaf_tables,
|
|
join->const_table_map);
|
|
join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join);
|
|
/*
|
|
Update info on indexes that can be used for search lookups as
|
|
reading const tables may has added new sargable predicates.
|
|
*/
|
|
if (const_count && sargables)
|
|
{
|
|
for( ; sargables->field ; sargables++)
|
|
{
|
|
Field *field= sargables->field;
|
|
JOIN_TAB *join_tab= field->table->reginfo.join_tab;
|
|
key_map possible_keys= field->key_start;
|
|
possible_keys.intersect(field->table->keys_in_use_for_query);
|
|
bool is_const= 1;
|
|
for (uint j=0; j < sargables->num_values; j++)
|
|
is_const&= sargables->arg_value[j]->const_item();
|
|
if (is_const)
|
|
join_tab[0].const_keys.merge(possible_keys);
|
|
}
|
|
}
|
|
|
|
join->impossible_where= false;
|
|
if (join->conds && const_count)
|
|
{
|
|
Item* &conds= join->conds;
|
|
COND_EQUAL *orig_cond_equal = join->cond_equal;
|
|
|
|
conds->update_used_tables();
|
|
conds= conds->remove_eq_conds(join->thd, &join->cond_value, true);
|
|
if (conds && conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
join->cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
|
|
join->select_lex->where= conds;
|
|
if (join->cond_value == Item::COND_FALSE)
|
|
{
|
|
join->impossible_where= true;
|
|
conds= (Item*) Item_false;
|
|
}
|
|
|
|
join->cond_equal= NULL;
|
|
if (conds)
|
|
{
|
|
if (conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
join->cond_equal= (&((Item_cond_and *) conds)->m_cond_equal);
|
|
else if (conds->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) conds)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
if (!join->cond_equal)
|
|
join->cond_equal= new COND_EQUAL;
|
|
join->cond_equal->current_level.empty();
|
|
join->cond_equal->current_level.push_back((Item_equal*) conds,
|
|
join->thd->mem_root);
|
|
}
|
|
}
|
|
|
|
if (orig_cond_equal != join->cond_equal)
|
|
{
|
|
/*
|
|
If join->cond_equal has changed all references to it from COND_EQUAL
|
|
objects associated with ON expressions must be updated.
|
|
*/
|
|
for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
|
|
{
|
|
if (*s->on_expr_ref && s->cond_equal &&
|
|
s->cond_equal->upper_levels == orig_cond_equal)
|
|
s->cond_equal->upper_levels= join->cond_equal;
|
|
}
|
|
}
|
|
}
|
|
|
|
join->join_tab= stat;
|
|
join->make_notnull_conds_for_range_scans();
|
|
|
|
/* Calc how many (possible) matched records in each table */
|
|
|
|
/*
|
|
Todo: add a function so that we can add these Json_writer_objects
|
|
easily.
|
|
Another way would be to enclose them in a scope {};
|
|
*/
|
|
{
|
|
Json_writer_object rows_estimation_wrapper(thd);
|
|
Json_writer_array rows_estimation(thd, "rows_estimation");
|
|
|
|
for (s=stat ; s < stat_end ; s++)
|
|
{
|
|
s->startup_cost= 0;
|
|
if (s->type == JT_SYSTEM || s->type == JT_CONST)
|
|
{
|
|
Json_writer_object table_records(thd);
|
|
ha_rows records= 1;
|
|
if (s->type == JT_SYSTEM || s->table->file->stats.records == 0)
|
|
records= s->table->file->stats.records;
|
|
/* zero or one matching row */
|
|
s->records= s->found_records= records;
|
|
s->records_init= s->records_out= rows2double(records);
|
|
s->read_time=1.0;
|
|
table_records.add_table_name(s).
|
|
add("rows", s->found_records).
|
|
add("cost", s->read_time).
|
|
add("table_type", s->type == JT_CONST ?
|
|
"const" : "system");
|
|
continue;
|
|
}
|
|
/*
|
|
Approximate found rows and time to read them
|
|
Update found_records, records, read_time and other scan related
|
|
variables
|
|
*/
|
|
s->estimate_scan_time();
|
|
|
|
if (s->table->is_splittable())
|
|
s->add_keyuses_for_splitting();
|
|
|
|
/*
|
|
Add to stat->const_keys those indexes for which all group fields or
|
|
all select distinct fields participate in one index.
|
|
*/
|
|
add_group_and_distinct_keys(join, s);
|
|
|
|
/* This will be updated in calculate_cond_selectivity_for_table() */
|
|
s->table->set_cond_selectivity(1.0);
|
|
DBUG_ASSERT(s->table->used_stat_records == 0 ||
|
|
s->table->cond_selectivity <=
|
|
s->table->opt_range_condition_rows /
|
|
s->table->used_stat_records);
|
|
/*
|
|
Perform range analysis if there are keys it could use (1).
|
|
Don't do range analysis for materialized subqueries (2).
|
|
Don't do range analysis for materialized derived tables/views (3)
|
|
*/
|
|
if ((!s->const_keys.is_clear_all() ||
|
|
!bitmap_is_clear_all(&s->table->cond_set)) && // (1)
|
|
!s->table->is_filled_at_execution() && // (2)
|
|
!(s->table->pos_in_table_list->derived && // (3)
|
|
s->table->pos_in_table_list->is_materialized_derived())) // (3)
|
|
{
|
|
bool impossible_range= FALSE;
|
|
ha_rows records= HA_ROWS_MAX;
|
|
SQL_SELECT *select= 0;
|
|
Item **sargable_cond= NULL;
|
|
if (!s->const_keys.is_clear_all())
|
|
{
|
|
sargable_cond= get_sargable_cond(join, s->table);
|
|
bool is_sargable_cond_of_where= sargable_cond == &join->conds;
|
|
|
|
select= make_select(s->table, found_const_table_map,
|
|
found_const_table_map,
|
|
*sargable_cond,
|
|
(SORT_INFO*) 0, 1, &error);
|
|
if (!select)
|
|
goto error;
|
|
if (get_quick_record_count(join->thd, select, s->table,
|
|
&s->const_keys, join->row_limit, &records))
|
|
{
|
|
/* There was an error in test_quick_select */
|
|
delete select;
|
|
goto error;
|
|
}
|
|
/*
|
|
Range analyzer might have modified the condition. Put it the new
|
|
condition to where we got it from.
|
|
*/
|
|
*sargable_cond= select->cond;
|
|
|
|
if (is_sargable_cond_of_where &&
|
|
join->conds && join->conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) (join->conds))->functype() ==
|
|
Item_func::COND_AND_FUNC)
|
|
join->cond_equal= &((Item_cond_and*) (join->conds))->m_cond_equal;
|
|
|
|
s->quick=select->quick;
|
|
select->quick=0;
|
|
s->needed_reg=select->needed_reg;
|
|
impossible_range= records == 0 && s->table->reginfo.impossible_range;
|
|
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER))
|
|
s->table->init_cost_info_for_usable_range_rowid_filters(join->thd);
|
|
}
|
|
if (!impossible_range)
|
|
{
|
|
if (!sargable_cond)
|
|
sargable_cond= get_sargable_cond(join, s->table);
|
|
if (join->thd->variables.optimizer_use_condition_selectivity > 1)
|
|
calculate_cond_selectivity_for_table(join->thd, s->table,
|
|
sargable_cond);
|
|
if (s->table->reginfo.impossible_range)
|
|
{
|
|
impossible_range= TRUE;
|
|
records= 0;
|
|
}
|
|
}
|
|
if (impossible_range)
|
|
{
|
|
/*
|
|
Impossible WHERE or ON expression
|
|
In case of ON, we mark that the we match one empty NULL row.
|
|
In case of WHERE, don't set found_const_table_map to get the
|
|
caller to abort with a zero row result.
|
|
*/
|
|
TABLE_LIST *emb= s->table->pos_in_table_list->embedding;
|
|
if (emb && !emb->sj_on_expr && !*s->on_expr_ref)
|
|
{
|
|
/* Mark all tables in a multi-table join nest as const */
|
|
mark_join_nest_as_const(join, emb, &found_const_table_map,
|
|
&const_count);
|
|
}
|
|
else
|
|
{
|
|
double records= 1;
|
|
join->const_table_map|= s->table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
s->type= JT_CONST;
|
|
s->table->const_table= 1;
|
|
if (*s->on_expr_ref)
|
|
{
|
|
/* Generate empty row */
|
|
s->info= ET_IMPOSSIBLE_ON_CONDITION;
|
|
found_const_table_map|= s->table->map;
|
|
mark_as_null_row(s->table); // All fields are NULL
|
|
records= 0;
|
|
}
|
|
s->records_init= s->records_out= records;
|
|
s->found_records= s->records= (ha_rows)records;
|
|
}
|
|
}
|
|
if (records != HA_POS_ERROR)
|
|
{
|
|
s->found_records=records;
|
|
s->read_time= s->quick ? s->quick->read_time : 0.0;
|
|
}
|
|
if (select)
|
|
delete select;
|
|
else
|
|
{
|
|
if (unlikely(thd->trace_started()))
|
|
add_table_scan_values_to_trace(thd, s);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (unlikely(thd->trace_started()))
|
|
add_table_scan_values_to_trace(thd, s);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pull_out_semijoin_tables(join))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
join->join_tab=stat;
|
|
join->top_join_tab_count= table_count;
|
|
join->map2table=stat_ref;
|
|
join->table= table_vector;
|
|
join->const_tables=const_count;
|
|
join->found_const_table_map=found_const_table_map;
|
|
|
|
if (join->const_tables != join->table_count)
|
|
optimize_keyuse(join, keyuse_array);
|
|
|
|
DBUG_ASSERT(!join->conds || !join->cond_equal ||
|
|
!join->cond_equal->current_level.elements ||
|
|
(join->conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) (join->conds))->functype() ==
|
|
Item_func::COND_AND_FUNC &&
|
|
join->cond_equal ==
|
|
&((Item_cond_and *) (join->conds))->m_cond_equal) ||
|
|
(join->conds->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) (join->conds))->functype() ==
|
|
Item_func::MULT_EQUAL_FUNC &&
|
|
join->cond_equal->current_level.elements == 1 &&
|
|
join->cond_equal->current_level.head() == join->conds));
|
|
|
|
if (optimize_semijoin_nests(join, all_table_map))
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
{
|
|
SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
|
|
|
|
/* Find an optimal join order of the non-constant tables. */
|
|
if (join->const_tables != join->table_count)
|
|
{
|
|
if (choose_plan(join, all_table_map & ~join->const_table_map, 0))
|
|
goto error;
|
|
|
|
#ifdef HAVE_valgrind
|
|
// JOIN::positions holds the current query plan. We've already
|
|
// made the plan choice, so we should only use JOIN::best_positions
|
|
for (uint k=join->const_tables; k < join->table_count; k++)
|
|
MEM_UNDEFINED(&join->positions[k], sizeof(join->positions[k]));
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
memcpy((uchar*) join->best_positions,(uchar*) join->positions,
|
|
sizeof(POSITION)*join->const_tables);
|
|
join->join_record_count= 1.0;
|
|
/* Const tables are part of optimizer setup and not counted in cost */
|
|
join->best_read=0.0;
|
|
}
|
|
|
|
if (!(join->select_options & SELECT_DESCRIBE) &&
|
|
unit->derived && unit->derived->is_materialized_derived())
|
|
{
|
|
/*
|
|
Calculate estimated number of rows for materialized derived
|
|
table/view.
|
|
*/
|
|
double records= 1.0;
|
|
ha_rows rows;
|
|
for (i= 0; i < join->table_count ; i++)
|
|
if (double rr= join->best_positions[i].records_read)
|
|
records= COST_MULT(records, rr);
|
|
rows= double_to_rows(records);
|
|
set_if_smaller(rows, unit->lim.get_select_limit());
|
|
join->select_lex->increase_derived_records(rows);
|
|
}
|
|
}
|
|
|
|
if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
|
|
goto error;
|
|
|
|
DEBUG_SYNC(join->thd, "inside_make_join_statistics");
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
error:
|
|
/*
|
|
Need to clean up join_tab from TABLEs in case of error.
|
|
They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab
|
|
may not be assigned yet by this function (which is building join_tab).
|
|
Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke.
|
|
*/
|
|
{
|
|
TABLE_LIST *tmp_table;
|
|
List_iterator<TABLE_LIST> ti2(tables_list);
|
|
while ((tmp_table= ti2++))
|
|
tmp_table->table->reginfo.join_tab= NULL;
|
|
}
|
|
DBUG_RETURN (1);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Check with keys are used and with tables references with tables
|
|
Updates in stat:
|
|
keys Bitmap of all used keys
|
|
const_keys Bitmap of all keys with may be used with quick_select
|
|
keyuse Pointer to possible keys
|
|
*****************************************************************************/
|
|
|
|
|
|
/**
|
|
Merge new key definitions to old ones, remove those not used in both.
|
|
|
|
This is called for OR between different levels.
|
|
|
|
That is, the function operates on an array of KEY_FIELD elements which has
|
|
two parts:
|
|
|
|
$LEFT_PART $RIGHT_PART
|
|
+-----------------------+-----------------------+
|
|
start new_fields end
|
|
|
|
$LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two
|
|
parts of the OR condition. Our task is to produce an array of KEY_FIELD
|
|
elements that would correspond to "$LEFT_PART OR $RIGHT_PART".
|
|
|
|
The rules for combining elements are as follows:
|
|
|
|
(keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)=
|
|
|
|
= AND_ij (keyfieldA_i OR keyfieldB_j)
|
|
|
|
We discard all (keyfieldA_i OR keyfieldB_j) that refer to different
|
|
fields. For those referring to the same field, the logic is as follows:
|
|
|
|
t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different
|
|
we can't produce a single equality,
|
|
so produce nothing)
|
|
|
|
t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1
|
|
|
|
t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set
|
|
KEY_OPTIMIZE_REF_OR_NULL flag
|
|
|
|
The last one is for ref_or_null access. We have handling for this special
|
|
because it's needed for evaluating IN subqueries that are internally
|
|
transformed into
|
|
|
|
@code
|
|
EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL)
|
|
@endcode
|
|
|
|
See add_key_fields() for discussion of what is and_level.
|
|
|
|
KEY_FIELD::null_rejecting is processed as follows: @n
|
|
result has null_rejecting=true if it is set for both ORed references.
|
|
for example:
|
|
- (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true
|
|
- (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
|
|
|
|
@todo
|
|
The result of this is that we're missing some 'ref' accesses.
|
|
OptimizerTeam: Fix this
|
|
*/
|
|
|
|
static KEY_FIELD *
|
|
merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
|
|
uint and_level)
|
|
{
|
|
if (start == new_fields)
|
|
return start; // Impossible or
|
|
if (new_fields == end)
|
|
return start; // No new fields, skip all
|
|
|
|
KEY_FIELD *first_free=new_fields;
|
|
|
|
/* Mark all found fields in old array */
|
|
for (; new_fields != end ; new_fields++)
|
|
{
|
|
for (KEY_FIELD *old=start ; old != first_free ; old++)
|
|
{
|
|
if (old->field == new_fields->field)
|
|
{
|
|
/*
|
|
NOTE: below const_item() call really works as "!used_tables()", i.e.
|
|
it can return FALSE where it is feasible to make it return TRUE.
|
|
|
|
The cause is as follows: Some of the tables are already known to be
|
|
const tables (the detection code is in make_join_statistics(),
|
|
above the update_ref_and_keys() call), but we didn't propagate
|
|
information about this: TABLE::const_table is not set to TRUE, and
|
|
Item::update_used_tables() hasn't been called for each item.
|
|
The result of this is that we're missing some 'ref' accesses.
|
|
TODO: OptimizerTeam: Fix this
|
|
*/
|
|
if (!new_fields->val->const_item())
|
|
{
|
|
/*
|
|
If the value matches, we can use the key reference.
|
|
If not, we keep it until we have examined all new values
|
|
*/
|
|
if (old->val->eq(new_fields->val, old->field->binary()))
|
|
{
|
|
old->level= and_level;
|
|
old->optimize= ((old->optimize & new_fields->optimize &
|
|
KEY_OPTIMIZE_EXISTS) |
|
|
((old->optimize | new_fields->optimize) &
|
|
KEY_OPTIMIZE_REF_OR_NULL));
|
|
old->null_rejecting= (old->null_rejecting &&
|
|
new_fields->null_rejecting);
|
|
}
|
|
}
|
|
else if (old->eq_func && new_fields->eq_func &&
|
|
old->val->eq_by_collation(new_fields->val,
|
|
old->field->binary(),
|
|
old->field->charset()))
|
|
|
|
{
|
|
old->level= and_level;
|
|
old->optimize= ((old->optimize & new_fields->optimize &
|
|
KEY_OPTIMIZE_EXISTS) |
|
|
((old->optimize | new_fields->optimize) &
|
|
KEY_OPTIMIZE_REF_OR_NULL));
|
|
old->null_rejecting= (old->null_rejecting &&
|
|
new_fields->null_rejecting);
|
|
}
|
|
else if (old->eq_func && new_fields->eq_func &&
|
|
((old->val->can_eval_in_optimize() && old->val->is_null()) ||
|
|
(!new_fields->val->is_expensive() &&
|
|
new_fields->val->is_null())))
|
|
{
|
|
/* field = expression OR field IS NULL */
|
|
old->level= and_level;
|
|
if (old->field->maybe_null())
|
|
{
|
|
old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
|
|
/* The referred expression can be NULL: */
|
|
old->null_rejecting= 0;
|
|
}
|
|
/*
|
|
Remember the NOT NULL value unless the value does not depend
|
|
on other tables.
|
|
*/
|
|
if (!old->val->used_tables() && !old->val->is_expensive() &&
|
|
old->val->is_null())
|
|
old->val= new_fields->val;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We are comparing two different const. In this case we can't
|
|
use a key-lookup on this so it's better to remove the value
|
|
and let the range optimzier handle it
|
|
*/
|
|
if (old == --first_free) // If last item
|
|
break;
|
|
*old= *first_free; // Remove old value
|
|
old--; // Retry this value
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* Remove all not used items */
|
|
for (KEY_FIELD *old=start ; old != first_free ;)
|
|
{
|
|
if (old->level != and_level)
|
|
{ // Not used in all levels
|
|
if (old == --first_free)
|
|
break;
|
|
*old= *first_free; // Remove old value
|
|
continue;
|
|
}
|
|
old++;
|
|
}
|
|
return first_free;
|
|
}
|
|
|
|
|
|
/*
|
|
Given a field, return its index in semi-join's select list, or UINT_MAX
|
|
|
|
DESCRIPTION
|
|
Given a field, we find its table; then see if the table is within a
|
|
semi-join nest and if the field was in select list of the subselect.
|
|
If it was, we return field's index in the select list. The value is used
|
|
by LooseScan strategy.
|
|
*/
|
|
|
|
static uint get_semi_join_select_list_index(Field *field)
|
|
{
|
|
uint res= UINT_MAX;
|
|
TABLE_LIST *emb_sj_nest;
|
|
if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
|
|
emb_sj_nest->sj_on_expr)
|
|
{
|
|
Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
|
|
st_select_lex *subq_lex= subq_pred->unit->first_select();
|
|
uint ncols= subq_pred->left_exp()->cols();
|
|
if (ncols == 1)
|
|
{
|
|
Item *sel_item= subq_lex->ref_pointer_array[0];
|
|
if (sel_item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*)sel_item)->field->eq(field))
|
|
{
|
|
res= 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint i= 0; i < ncols; i++)
|
|
{
|
|
Item *sel_item= subq_lex->ref_pointer_array[i];
|
|
if (sel_item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*)sel_item)->field->eq(field))
|
|
{
|
|
res= i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
Add a possible key to array of possible keys if it's usable as a key
|
|
|
|
@param key_fields Pointer to add key, if usable
|
|
@param and_level And level, to be stored in KEY_FIELD
|
|
@param cond Condition predicate
|
|
@param field Field used in comparision
|
|
@param eq_func True if we used =, <=> or IS NULL
|
|
@param value Value used for comparison with field
|
|
@param num_values Number of values[] that we are comparing against
|
|
@param usable_tables Tables which can be used for key optimization
|
|
@param sargables IN/OUT Array of found sargable candidates
|
|
@param row_col_no if = n that > 0 then field is compared only
|
|
against the n-th component of row values
|
|
|
|
@note
|
|
If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
|
|
table, we store this to be able to do not exists optimization later.
|
|
|
|
@returns
|
|
*key_fields is incremented if we stored a key in the array
|
|
*/
|
|
|
|
static void
|
|
add_key_field(JOIN *join,
|
|
KEY_FIELD **key_fields,uint and_level, Item_bool_func *cond,
|
|
Field *field, bool eq_func, Item **value, uint num_values,
|
|
table_map usable_tables, SARGABLE_PARAM **sargables,
|
|
uint row_col_no= 0)
|
|
{
|
|
uint optimize= 0;
|
|
if (eq_func &&
|
|
((join->is_allowed_hash_join_access() &&
|
|
field->hash_join_is_possible() &&
|
|
!(field->table->pos_in_table_list->is_materialized_derived() &&
|
|
field->table->is_created())) ||
|
|
(field->table->pos_in_table_list->is_materialized_derived() &&
|
|
!field->table->is_created() && !(field->flags & BLOB_FLAG))))
|
|
{
|
|
optimize= KEY_OPTIMIZE_EQ;
|
|
}
|
|
else if (!(field->flags & PART_KEY_FLAG))
|
|
{
|
|
// Don't remove column IS NULL on a LEFT JOIN table
|
|
if (eq_func && (*value)->type() == Item::NULL_ITEM &&
|
|
field->table->maybe_null && !field->null_ptr)
|
|
{
|
|
optimize= KEY_OPTIMIZE_EXISTS;
|
|
DBUG_ASSERT(num_values == 1);
|
|
}
|
|
}
|
|
if (optimize != KEY_OPTIMIZE_EXISTS)
|
|
{
|
|
table_map used_tables=0;
|
|
bool optimizable=0;
|
|
for (uint i=0; i<num_values; i++)
|
|
{
|
|
Item *curr_val;
|
|
if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM)
|
|
{
|
|
Item_row *value_tuple= (Item_row *) (value[i]->real_item());
|
|
curr_val= value_tuple->element_index(row_col_no - 1);
|
|
}
|
|
else
|
|
curr_val= value[i];
|
|
table_map value_used_tables= curr_val->used_tables();
|
|
used_tables|= value_used_tables;
|
|
if (!(value_used_tables & (field->table->map | RAND_TABLE_BIT)))
|
|
optimizable=1;
|
|
}
|
|
if (!optimizable)
|
|
return;
|
|
if (!(usable_tables & field->table->map))
|
|
{
|
|
if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
|
|
!field->table->maybe_null || field->null_ptr)
|
|
return; // Can't use left join optimize
|
|
optimize= KEY_OPTIMIZE_EXISTS;
|
|
}
|
|
else
|
|
{
|
|
JOIN_TAB *stat=field->table->reginfo.join_tab;
|
|
key_map possible_keys=field->get_possible_keys();
|
|
possible_keys.intersect(field->table->keys_in_use_for_query);
|
|
stat[0].keys.merge(possible_keys); // Add possible keys
|
|
|
|
/*
|
|
Save the following cases:
|
|
Field op constant
|
|
Field LIKE constant where constant doesn't start with a wildcard
|
|
Field = field2 where field2 is in a different table
|
|
Field op formula
|
|
Field IS NULL
|
|
Field IS NOT NULL
|
|
Field BETWEEN ...
|
|
Field IN ...
|
|
*/
|
|
if (field->flags & PART_KEY_FLAG)
|
|
{
|
|
stat[0].key_dependent|= used_tables;
|
|
if (field->key_start.bits_set())
|
|
stat[0].key_start_dependent= 1;
|
|
}
|
|
|
|
bool is_const=1;
|
|
for (uint i=0; i<num_values; i++)
|
|
{
|
|
Item *curr_val;
|
|
if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM)
|
|
{
|
|
Item_row *value_tuple= (Item_row *) (value[i]->real_item());
|
|
curr_val= value_tuple->element_index(row_col_no - 1);
|
|
}
|
|
else
|
|
curr_val= value[i];
|
|
if (!(is_const&= curr_val->const_item()))
|
|
break;
|
|
}
|
|
if (is_const)
|
|
{
|
|
stat[0].const_keys.merge(possible_keys);
|
|
bitmap_set_bit(&field->table->cond_set, field->field_index);
|
|
}
|
|
else if (!eq_func)
|
|
{
|
|
/*
|
|
Save info to be able check whether this predicate can be
|
|
considered as sargable for range analysis after reading const tables.
|
|
We do not save info about equalities as update_const_equal_items
|
|
will take care of updating info on keys from sargable equalities.
|
|
*/
|
|
(*sargables)--;
|
|
(*sargables)->field= field;
|
|
(*sargables)->arg_value= value;
|
|
(*sargables)->num_values= num_values;
|
|
}
|
|
if (!eq_func) // eq_func is NEVER true when num_values > 1
|
|
return;
|
|
}
|
|
}
|
|
/*
|
|
For the moment eq_func is always true. This slot is reserved for future
|
|
extensions where we want to remembers other things than just eq comparisons
|
|
*/
|
|
DBUG_ASSERT(eq_func);
|
|
/* Store possible eq field */
|
|
(*key_fields)->field= field;
|
|
(*key_fields)->eq_func= eq_func;
|
|
(*key_fields)->val= *value;
|
|
(*key_fields)->cond= cond;
|
|
(*key_fields)->level= and_level;
|
|
(*key_fields)->optimize= optimize;
|
|
/*
|
|
If the condition we are analyzing is NULL-rejecting and at least
|
|
one side of the equalities is NULLable, mark the KEY_FIELD object as
|
|
null-rejecting. This property is used by:
|
|
- add_not_null_conds() to add "column IS NOT NULL" conditions
|
|
- best_access_path() to produce better estimates for NULL-able unique keys.
|
|
*/
|
|
{
|
|
if ((cond->functype() == Item_func::EQ_FUNC ||
|
|
cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
|
|
((*value)->maybe_null() || field->real_maybe_null()))
|
|
(*key_fields)->null_rejecting= true;
|
|
else
|
|
(*key_fields)->null_rejecting= false;
|
|
}
|
|
(*key_fields)->cond_guard= NULL;
|
|
|
|
(*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
|
|
(*key_fields)++;
|
|
}
|
|
|
|
/**
|
|
Add possible keys to array of possible keys originated from a simple
|
|
predicate.
|
|
|
|
@param key_fields Pointer to add key, if usable
|
|
@param and_level And level, to be stored in KEY_FIELD
|
|
@param cond Condition predicate
|
|
@param field_item Field item used for comparison
|
|
@param eq_func True if we used =, <=> or IS NULL
|
|
@param value Value used for comparison with field_item
|
|
@param num_values Number of values[] that we are comparing against
|
|
@param usable_tables Tables which can be used for key optimization
|
|
@param sargables IN/OUT Array of found sargable candidates
|
|
@param row_col_no if = n that > 0 then field is compared only
|
|
against the n-th component of row values
|
|
|
|
@note
|
|
If field items f1 and f2 belong to the same multiple equality and
|
|
a key is added for f1, the the same key is added for f2.
|
|
|
|
@returns
|
|
*key_fields is incremented if we stored a key in the array
|
|
*/
|
|
|
|
static void
|
|
add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level,
|
|
Item_bool_func *cond, Item *field_item,
|
|
bool eq_func, Item **val,
|
|
uint num_values, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables, uint row_col_no= 0)
|
|
{
|
|
Field *field= ((Item_field *) (field_item->real_item()))->field;
|
|
add_key_field(join, key_fields, and_level, cond, field,
|
|
eq_func, val, num_values, usable_tables, sargables,
|
|
row_col_no);
|
|
Item_equal *item_equal= field_item->get_item_equal();
|
|
if (item_equal)
|
|
{
|
|
/*
|
|
Add to the set of possible key values every substitution of
|
|
the field for an equal field included into item_equal
|
|
*/
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
while (it++)
|
|
{
|
|
Field *equal_field= it.get_curr_field();
|
|
if (!field->eq(equal_field))
|
|
{
|
|
add_key_field(join, key_fields, and_level, cond, equal_field,
|
|
eq_func, val, num_values, usable_tables,
|
|
sargables, row_col_no);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Check if an expression is a non-outer field.
|
|
|
|
Checks if an expression is a field and belongs to the current select.
|
|
|
|
@param field Item expression to check
|
|
|
|
@return boolean
|
|
@retval TRUE the expression is a local field
|
|
@retval FALSE it's something else
|
|
*/
|
|
|
|
static bool
|
|
is_local_field (Item *field)
|
|
{
|
|
return field->real_item()->type() == Item::FIELD_ITEM
|
|
&& !(field->used_tables() & OUTER_REF_TABLE_BIT)
|
|
&& !((Item_field *)field->real_item())->get_depended_from();
|
|
}
|
|
|
|
|
|
/*
|
|
In this and other functions, and_level is a number that is ever-growing
|
|
and is different for the contents of every AND or OR clause. For example,
|
|
when processing clause
|
|
|
|
(a AND b AND c) OR (x AND y)
|
|
|
|
we'll have
|
|
* KEY_FIELD elements for (a AND b AND c) are assigned and_level=1
|
|
* KEY_FIELD elements for (x AND y) are assigned and_level=2
|
|
* OR operation is performed, and whatever elements are left after it are
|
|
assigned and_level=3.
|
|
|
|
The primary reason for having and_level attribute is the OR operation which
|
|
uses and_level to mark KEY_FIELDs that should get into the result of the OR
|
|
operation
|
|
*/
|
|
|
|
|
|
void
|
|
Item_cond_and::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
List_iterator_fast<Item> li(*argument_list());
|
|
KEY_FIELD *org_key_fields= *key_fields;
|
|
|
|
Item *item;
|
|
while ((item=li++))
|
|
item->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
for (; org_key_fields != *key_fields ; org_key_fields++)
|
|
org_key_fields->level= *and_level;
|
|
}
|
|
|
|
|
|
void
|
|
Item_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
List_iterator_fast<Item> li(*argument_list());
|
|
KEY_FIELD *org_key_fields= *key_fields;
|
|
|
|
(*and_level)++;
|
|
(li++)->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
KEY_FIELD *start_key_fields= *key_fields;
|
|
(*and_level)++;
|
|
item->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
*key_fields= merge_key_fields(org_key_fields,start_key_fields,
|
|
*key_fields, ++(*and_level));
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_trig_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
/*
|
|
Subquery optimization: Conditions that are pushed down into subqueries
|
|
are wrapped into Item_func_trig_cond. We process the wrapped condition
|
|
but need to set cond_guard for KEYUSE elements generated from it.
|
|
*/
|
|
if (!join->group_list && !join->order &&
|
|
join->unit->item &&
|
|
join->unit->item->substype() == Item_subselect::IN_SUBS &&
|
|
!join->unit->is_unit_op())
|
|
{
|
|
KEY_FIELD *save= *key_fields;
|
|
args[0]->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
// Indicate that this ref access candidate is for subquery lookup:
|
|
for (; save != *key_fields; save++)
|
|
save->cond_guard= get_trig_var();
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_between::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
/*
|
|
Build list of possible keys for 'a BETWEEN low AND high'.
|
|
It is handled similar to the equivalent condition
|
|
'a >= low AND a <= high':
|
|
*/
|
|
Item_field *field_item;
|
|
bool equal_func= false;
|
|
uint num_values= 2;
|
|
|
|
bool binary_cmp= (args[0]->real_item()->type() == Item::FIELD_ITEM)
|
|
? ((Item_field*) args[0]->real_item())->field->binary()
|
|
: true;
|
|
/*
|
|
Additional optimization: If 'low = high':
|
|
Handle as if the condition was "t.key = low".
|
|
*/
|
|
if (!negated && args[1]->eq(args[2], binary_cmp))
|
|
{
|
|
equal_func= true;
|
|
num_values= 1;
|
|
}
|
|
|
|
/*
|
|
Append keys for 'field <cmp> value[]' if the
|
|
condition is of the form::
|
|
'<field> BETWEEN value[1] AND value[2]'
|
|
*/
|
|
if (is_local_field(args[0]))
|
|
{
|
|
field_item= (Item_field *) (args[0]->real_item());
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
field_item, equal_func, &args[1],
|
|
num_values, usable_tables, sargables);
|
|
}
|
|
/*
|
|
Append keys for 'value[0] <cmp> field' if the
|
|
condition is of the form:
|
|
'value[0] BETWEEN field1 AND field2'
|
|
*/
|
|
for (uint i= 1; i <= num_values; i++)
|
|
{
|
|
if (is_local_field(args[i]))
|
|
{
|
|
field_item= (Item_field *) (args[i]->real_item());
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
field_item, equal_func, args,
|
|
1, usable_tables, sargables);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_in::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
DBUG_ASSERT(arg_count != 2);
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) (args[0]->real_item()), false,
|
|
args + 1, arg_count - 1, usable_tables, sargables);
|
|
}
|
|
else if (key_item()->type() == Item::ROW_ITEM &&
|
|
!(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
Item_row *key_row= (Item_row *) key_item();
|
|
Item **key_col= key_row->addr(0);
|
|
uint row_cols= key_row->cols();
|
|
for (uint i= 0; i < row_cols; i++, key_col++)
|
|
{
|
|
if (is_local_field(*key_col))
|
|
{
|
|
Item_field *field_item= (Item_field *)((*key_col)->real_item());
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
field_item, false, args + 1, arg_count - 1,
|
|
usable_tables, sargables, i + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_ne::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
if (!(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
/*
|
|
QQ: perhaps test for !is_local_field(args[1]) is not really needed here.
|
|
Other comparison functions, e.g. Item_func_le, Item_func_gt, etc,
|
|
do not have this test. See Item_bool_func2::add_key_fieldoptimize_op().
|
|
Check with the optimizer team.
|
|
*/
|
|
if (is_local_field(args[0]) && !is_local_field(args[1]))
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) (args[0]->real_item()), false,
|
|
&args[1], 1, usable_tables, sargables);
|
|
/*
|
|
QQ: perhaps test for !is_local_field(args[0]) is not really needed here.
|
|
*/
|
|
if (is_local_field(args[1]) && !is_local_field(args[0]))
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) (args[1]->real_item()), false,
|
|
&args[0], 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_like::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
if (is_local_field(args[0]) && with_sargable_pattern())
|
|
{
|
|
/*
|
|
SELECT * FROM t1 WHERE field LIKE const_pattern
|
|
const_pattern starts with a non-wildcard character
|
|
*/
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[0]->real_item(), false,
|
|
args + 1, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level,
|
|
table_map usable_tables,
|
|
SARGABLE_PARAM **sargables,
|
|
bool equal_func)
|
|
{
|
|
/* If item is of type 'field op field/constant' add it to key_fields */
|
|
if (is_local_field(args[0]))
|
|
{
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[0]->real_item(), equal_func,
|
|
args + 1, 1, usable_tables, sargables);
|
|
}
|
|
if (is_local_field(args[1]))
|
|
{
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[1]->real_item(), equal_func,
|
|
args, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_null_predicate::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level,
|
|
table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
/* column_name IS [NOT] NULL */
|
|
if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
Item *tmp= new (join->thd->mem_root) Item_null(join->thd);
|
|
if (unlikely(!tmp)) // Should never be true
|
|
return;
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[0]->real_item(),
|
|
functype() == Item_func::ISNULL_FUNC,
|
|
&tmp, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_equal::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
Item *const_item2= get_const();
|
|
Item_equal_fields_iterator it(*this);
|
|
if (const_item2)
|
|
{
|
|
|
|
/*
|
|
For each field field1 from item_equal consider the equality
|
|
field1=const_item as a condition allowing an index access of the table
|
|
with field1 by the keys value of field1.
|
|
*/
|
|
while (it++)
|
|
{
|
|
Field *equal_field= it.get_curr_field();
|
|
add_key_field(join, key_fields, *and_level, this, equal_field,
|
|
TRUE, &const_item2, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Consider all pairs of different fields included into item_equal.
|
|
For each of them (field1, field1) consider the equality
|
|
field1=field2 as a condition allowing an index access of the table
|
|
with field1 by the keys value of field2.
|
|
*/
|
|
Item_equal_fields_iterator fi(*this);
|
|
while (fi++)
|
|
{
|
|
Field *field= fi.get_curr_field();
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
Field *equal_field= it.get_curr_field();
|
|
if (!field->eq(equal_field))
|
|
{
|
|
add_key_field(join, key_fields, *and_level, this, field,
|
|
TRUE, &item, 1, usable_tables,
|
|
sargables);
|
|
}
|
|
}
|
|
it.rewind();
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static inline uint
|
|
max_part_bit(key_part_map bits)
|
|
{
|
|
if (bits == 0)
|
|
return 0;
|
|
/* find first zero bit by reverting all bits and find first bit */
|
|
return my_find_first_bit(~(ulonglong) bits);
|
|
}
|
|
|
|
|
|
/**
|
|
Add a new keuse to the specified array of KEYUSE objects
|
|
|
|
@param[in,out] keyuse_array array of keyuses to be extended
|
|
@param[in] key_field info on the key use occurrence
|
|
@param[in] key key number for the keyse to be added
|
|
@param[in] part key part for the keyuse to be added
|
|
|
|
@note
|
|
The function builds a new KEYUSE object for a key use utilizing the info
|
|
on the left and right parts of the given key use extracted from the
|
|
structure key_field, the key number and key part for this key use.
|
|
The built object is added to the dynamic array keyuse_array.
|
|
|
|
@retval 0 the built object is successfully added
|
|
@retval 1 otherwise
|
|
*/
|
|
|
|
static bool
|
|
add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
|
|
uint key, uint part)
|
|
{
|
|
KEYUSE keyuse;
|
|
Field *field= key_field->field;
|
|
|
|
keyuse.table= field->table;
|
|
keyuse.val= key_field->val;
|
|
keyuse.key= key;
|
|
if (!is_hash_join_key_no(key))
|
|
{
|
|
keyuse.keypart=part;
|
|
keyuse.keypart_map= (key_part_map) 1 << part;
|
|
}
|
|
else
|
|
{
|
|
keyuse.keypart= field->field_index;
|
|
keyuse.keypart_map= (key_part_map) 0;
|
|
}
|
|
keyuse.used_tables= key_field->val->used_tables();
|
|
keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
|
|
keyuse.ref_table_rows= 0;
|
|
keyuse.null_rejecting= key_field->null_rejecting;
|
|
keyuse.cond_guard= key_field->cond_guard;
|
|
keyuse.sj_pred_no= key_field->sj_pred_no;
|
|
keyuse.validity_ref= 0;
|
|
return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
|
|
}
|
|
|
|
|
|
/*
|
|
Add all keys with uses 'field' for some keypart
|
|
If field->and_level != and_level then only mark key_part as const_part
|
|
|
|
RETURN
|
|
0 - OK
|
|
1 - Out of memory.
|
|
*/
|
|
|
|
static LEX_CSTRING equal_str= { STRING_WITH_LEN("=") };
|
|
|
|
static bool
|
|
add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
|
|
{
|
|
Field *field=key_field->field;
|
|
TABLE *form= field->table;
|
|
THD *thd= form->in_use;
|
|
|
|
if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
|
|
{
|
|
for (uint key=0 ; key < form->s->keys ; key++)
|
|
{
|
|
if (!(form->keys_in_use_for_query.is_set(key)))
|
|
continue;
|
|
if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
|
|
continue; // ToDo: ft-keys in non-ft queries. SerG
|
|
|
|
KEY *keyinfo= form->key_info+key;
|
|
uint key_parts= form->actual_n_key_parts(keyinfo);
|
|
for (uint part=0 ; part < key_parts ; part++)
|
|
{
|
|
if (field->eq(form->key_info[key].key_part[part].field))
|
|
{
|
|
Data_type_compatibility compat=
|
|
field->can_optimize_keypart_ref(key_field->cond, key_field->val);
|
|
if (compat == Data_type_compatibility::OK)
|
|
{
|
|
if (add_keyuse(keyuse_array, key_field, key, part))
|
|
return TRUE;
|
|
}
|
|
else if (thd->give_notes_for_unusable_keys())
|
|
{
|
|
field->raise_note_cannot_use_key_part(thd, key, part,
|
|
equal_str,
|
|
key_field->cond->compare_collation(),
|
|
key_field->val,
|
|
compat);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (field->hash_join_is_possible() &&
|
|
(key_field->optimize & KEY_OPTIMIZE_EQ) &&
|
|
key_field->val->used_tables())
|
|
{
|
|
if (field->can_optimize_hash_join(key_field->cond, key_field->val) !=
|
|
Data_type_compatibility::OK)
|
|
return false;
|
|
if (form->is_splittable())
|
|
form->add_splitting_info_for_key_field(key_field);
|
|
/*
|
|
If a key use is extracted from an equi-join predicate then it is
|
|
added not only as a key use for every index whose component can
|
|
be evalusted utilizing this key use, but also as a key use for
|
|
hash join. Such key uses are marked with a special key number.
|
|
*/
|
|
if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0))
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
static bool
|
|
add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
|
|
JOIN_TAB *stat,COND *cond,table_map usable_tables)
|
|
{
|
|
Item_func_match *cond_func=NULL;
|
|
|
|
if (!cond)
|
|
return FALSE;
|
|
|
|
if (cond->type() == Item::FUNC_ITEM)
|
|
{
|
|
Item_func *func=(Item_func *)cond;
|
|
Item_func::Functype functype= func->functype();
|
|
if (functype == Item_func::FT_FUNC)
|
|
cond_func=(Item_func_match *)cond;
|
|
else if (func->argument_count() == 2)
|
|
{
|
|
Item *arg0=(Item *)(func->arguments()[0]),
|
|
*arg1=(Item *)(func->arguments()[1]);
|
|
if (arg1->const_item() && arg1->cols() == 1 &&
|
|
arg0->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
|
|
((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
|
|
(functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
|
|
cond_func= (Item_func_match *) arg0;
|
|
else if (arg0->const_item() && arg0->cols() == 1 &&
|
|
arg1->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
|
|
((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
|
|
(functype == Item_func::LT_FUNC && arg0->val_real() >=0)))
|
|
cond_func= (Item_func_match *) arg1;
|
|
}
|
|
}
|
|
else if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
|
|
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
if (add_ft_keys(keyuse_array,stat,item,usable_tables))
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!cond_func || cond_func->key == NO_SUCH_KEY ||
|
|
!(usable_tables & cond_func->table->map))
|
|
return FALSE;
|
|
|
|
KEYUSE keyuse;
|
|
keyuse.table= cond_func->table;
|
|
keyuse.val = cond_func;
|
|
keyuse.key = cond_func->key;
|
|
keyuse.keypart= FT_KEYPART;
|
|
keyuse.used_tables=cond_func->key_item()->used_tables();
|
|
keyuse.optimize= 0;
|
|
keyuse.ref_table_rows= 0;
|
|
keyuse.keypart_map= 0;
|
|
keyuse.sj_pred_no= UINT_MAX;
|
|
keyuse.validity_ref= 0;
|
|
keyuse.null_rejecting= FALSE;
|
|
return insert_dynamic(keyuse_array,(uchar*) &keyuse);
|
|
}
|
|
|
|
|
|
static int
|
|
sort_keyuse(KEYUSE *a,KEYUSE *b)
|
|
{
|
|
int res;
|
|
if (a->table->tablenr != b->table->tablenr)
|
|
return (int) (a->table->tablenr - b->table->tablenr);
|
|
if (a->key != b->key)
|
|
return (int) (a->key - b->key);
|
|
if (a->key == MAX_KEY && b->key == MAX_KEY &&
|
|
a->used_tables != b->used_tables)
|
|
return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
|
|
if (a->keypart != b->keypart)
|
|
return (int) (a->keypart - b->keypart);
|
|
// Place const values before other ones
|
|
if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
|
|
MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
|
|
return res;
|
|
/* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
|
|
return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
|
|
(b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
|
|
}
|
|
|
|
|
|
/*
|
|
Add to KEY_FIELD array all 'ref' access candidates within nested join.
|
|
|
|
This function populates KEY_FIELD array with entries generated from the
|
|
ON condition of the given nested join, and does the same for nested joins
|
|
contained within this nested join.
|
|
|
|
@param[in] nested_join_table Nested join pseudo-table to process
|
|
@param[in,out] end End of the key field array
|
|
@param[in,out] and_level And-level
|
|
@param[in,out] sargables Array of found sargable candidates
|
|
|
|
|
|
@note
|
|
We can add accesses to the tables that are direct children of this nested
|
|
join (1), and are not inner tables w.r.t their neighbours (2).
|
|
|
|
Example for #1 (outer brackets pair denotes nested join this function is
|
|
invoked for):
|
|
@code
|
|
... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
|
|
@endcode
|
|
Example for #2:
|
|
@code
|
|
... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
|
|
@endcode
|
|
In examples 1-2 for condition cond, we can add 'ref' access candidates to
|
|
t1 only.
|
|
Example #3:
|
|
@code
|
|
... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
|
|
@endcode
|
|
Here we can add 'ref' access candidates for t1 and t2, but not for t3.
|
|
*/
|
|
|
|
static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
|
|
KEY_FIELD **end, uint *and_level,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
|
|
List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
|
|
bool have_another = FALSE;
|
|
table_map tables= 0;
|
|
TABLE_LIST *table;
|
|
DBUG_ASSERT(nested_join_table->nested_join);
|
|
|
|
while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
|
|
(table= li++))))
|
|
{
|
|
if (table->nested_join)
|
|
{
|
|
if (!table->on_expr)
|
|
{
|
|
/* It's a semi-join nest. Walk into it as if it wasn't a nest */
|
|
have_another= TRUE;
|
|
li2= li;
|
|
li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
|
|
}
|
|
else
|
|
add_key_fields_for_nj(join, table, end, and_level, sargables);
|
|
}
|
|
else
|
|
if (!table->on_expr)
|
|
tables |= table->table->map;
|
|
}
|
|
if (nested_join_table->on_expr)
|
|
nested_join_table->on_expr->add_key_fields(join, end, and_level, tables,
|
|
sargables);
|
|
}
|
|
|
|
|
|
void count_cond_for_nj(SELECT_LEX *sel, TABLE_LIST *nested_join_table)
|
|
{
|
|
List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
|
|
List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
|
|
bool have_another = FALSE;
|
|
TABLE_LIST *table;
|
|
|
|
while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
|
|
(table= li++))))
|
|
if (table->nested_join)
|
|
{
|
|
if (!table->on_expr)
|
|
{
|
|
/* It's a semi-join nest. Walk into it as if it wasn't a nest */
|
|
have_another= TRUE;
|
|
li2= li;
|
|
li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
|
|
}
|
|
else
|
|
count_cond_for_nj(sel, table);
|
|
}
|
|
if (nested_join_table->on_expr)
|
|
nested_join_table->on_expr->walk(&Item::count_sargable_conds, 0, sel);
|
|
|
|
}
|
|
|
|
/**
|
|
Update keyuse array with all possible keys we can use to fetch rows.
|
|
|
|
@param thd
|
|
@param[out] keyuse Put here ordered array of KEYUSE structures
|
|
@param join_tab Array in tablenr_order
|
|
@param tables Number of tables in join
|
|
@param cond WHERE condition (note that the function analyzes
|
|
join_tab[i]->on_expr too)
|
|
@param normal_tables Tables not inner w.r.t some outer join (ones
|
|
for which we can make ref access based the WHERE
|
|
clause)
|
|
@param select_lex current SELECT
|
|
@param[out] sargables Array of found sargable candidates
|
|
|
|
@retval
|
|
0 OK
|
|
@retval
|
|
1 Out of memory.
|
|
*/
|
|
|
|
static bool
|
|
update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
|
|
uint tables, COND *cond, table_map normal_tables,
|
|
SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
|
|
{
|
|
uint and_level,i;
|
|
KEY_FIELD *key_fields, *end, *field;
|
|
uint sz;
|
|
uint m= MY_MAX(select_lex->max_equal_elems,1);
|
|
DBUG_ENTER("update_ref_and_keys");
|
|
DBUG_PRINT("enter", ("normal_tables: %llx", normal_tables));
|
|
|
|
SELECT_LEX *sel=thd->lex->current_select;
|
|
sel->cond_count= 0;
|
|
sel->between_count= 0;
|
|
if (cond)
|
|
cond->walk(&Item::count_sargable_conds, 0, sel);
|
|
for (i=0 ; i < tables ; i++)
|
|
{
|
|
if (*join_tab[i].on_expr_ref)
|
|
(*join_tab[i].on_expr_ref)->walk(&Item::count_sargable_conds, 0, sel);
|
|
}
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
|
|
TABLE_LIST *table;
|
|
while ((table= li++))
|
|
{
|
|
if (table->nested_join)
|
|
count_cond_for_nj(sel, table);
|
|
}
|
|
}
|
|
|
|
/*
|
|
We use the same piece of memory to store both KEY_FIELD
|
|
and SARGABLE_PARAM structure.
|
|
KEY_FIELD values are placed at the beginning this memory
|
|
while SARGABLE_PARAM values are put at the end.
|
|
All predicates that are used to fill arrays of KEY_FIELD
|
|
and SARGABLE_PARAM structures have at most 2 arguments
|
|
except BETWEEN predicates that have 3 arguments and
|
|
IN predicates.
|
|
This any predicate if it's not BETWEEN/IN can be used
|
|
directly to fill at most 2 array elements, either of KEY_FIELD
|
|
or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
|
|
can be filled as this predicate is considered as
|
|
saragable with respect to each of its argument.
|
|
An IN predicate can require at most 1 element as currently
|
|
it is considered as sargable only for its first argument.
|
|
Multiple equality can add elements that are filled after
|
|
substitution of field arguments by equal fields. There
|
|
can be not more than select_lex->max_equal_elems such
|
|
substitutions.
|
|
*/
|
|
sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
|
|
((sel->cond_count*2 + sel->between_count)*m+1);
|
|
if (!(key_fields=(KEY_FIELD*) thd->alloc(sz)))
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
and_level= 0;
|
|
field= end= key_fields;
|
|
*sargables= (SARGABLE_PARAM *) key_fields +
|
|
(sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
|
|
/* set a barrier for the array of SARGABLE_PARAM */
|
|
(*sargables)[0].field= 0;
|
|
|
|
if (my_init_dynamic_array2(thd->mem_root->psi_key, keyuse, sizeof(KEYUSE),
|
|
thd->alloc(sizeof(KEYUSE) * 20), 20, 64,
|
|
MYF(MY_THREAD_SPECIFIC)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (cond)
|
|
{
|
|
KEY_FIELD *saved_field= field;
|
|
cond->add_key_fields(join_tab->join, &end, &and_level, normal_tables,
|
|
sargables);
|
|
for (; field != end ; field++)
|
|
{
|
|
|
|
/* Mark that we can optimize LEFT JOIN */
|
|
if (field->val->type() == Item::NULL_ITEM &&
|
|
!field->field->real_maybe_null())
|
|
field->field->table->reginfo.not_exists_optimize=1;
|
|
}
|
|
field= saved_field;
|
|
}
|
|
for (i=0 ; i < tables ; i++)
|
|
{
|
|
/*
|
|
Block the creation of keys for inner tables of outer joins.
|
|
Here only the outer joins that can not be converted to
|
|
inner joins are left and all nests that can be eliminated
|
|
are flattened.
|
|
In the future when we introduce conditional accesses
|
|
for inner tables in outer joins these keys will be taken
|
|
into account as well.
|
|
*/
|
|
if (*join_tab[i].on_expr_ref)
|
|
(*join_tab[i].on_expr_ref)->add_key_fields(join_tab->join, &end,
|
|
&and_level,
|
|
join_tab[i].table->map,
|
|
sargables);
|
|
}
|
|
|
|
/* Process ON conditions for the nested joins */
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
|
|
TABLE_LIST *table;
|
|
while ((table= li++))
|
|
{
|
|
if (table->nested_join)
|
|
add_key_fields_for_nj(join_tab->join, table, &end, &and_level,
|
|
sargables);
|
|
}
|
|
}
|
|
|
|
/* fill keyuse with found key parts */
|
|
for ( ; field != end ; field++)
|
|
{
|
|
if (add_key_part(keyuse,field))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (select_lex->ftfunc_list->elements)
|
|
{
|
|
if (add_ft_keys(keyuse,join_tab,cond,normal_tables))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/*
|
|
check if key could be used with eq_ref
|
|
|
|
The assumption is that all previous key parts where used
|
|
*/
|
|
|
|
static void remember_if_eq_ref_key(JOIN *join, KEYUSE *use)
|
|
{
|
|
DBUG_ASSERT(use->keypart != FT_KEYPART && use->key != MAX_KEY);
|
|
TABLE *table= use->table;
|
|
KEY *key= table->key_info+use->key;
|
|
ulong key_flags= table->actual_key_flags(key);
|
|
|
|
/*
|
|
Check if possible eq_ref key
|
|
This may include keys that does not have HA_NULL_PART_KEY
|
|
set, but this is ok as best_access_path will resolve this.
|
|
*/
|
|
if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)))
|
|
{
|
|
uint key_parts= table->actual_n_key_parts(key);
|
|
if (use->keypart+1 == key_parts)
|
|
join->eq_ref_tables|= table->map;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Sort the array of possible keys and remove the following key parts:
|
|
- ref if there is a keypart which is a ref and a const.
|
|
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
|
|
then we skip the key part corresponding to b=t2.d)
|
|
- keyparts without previous keyparts
|
|
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
|
|
used in the query, we drop the partial key parts from consideration).
|
|
Special treatment for ft-keys.
|
|
Update join->eq_ref_tables with a bitmap of all tables that can possible
|
|
have a EQ_REF key.
|
|
|
|
Note that the keys are generated to be used by best_access_path() during
|
|
the optimization stage. Unused keys will later be deleted by
|
|
JOIN::drop_unused_derived_keys().
|
|
*/
|
|
|
|
bool sort_and_filter_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse,
|
|
bool skip_unprefixed_keyparts)
|
|
{
|
|
THD *thd= join->thd;
|
|
KEYUSE key_end, *prev, *save_pos, *use;
|
|
uint found_eq_constant, i;
|
|
bool found_unprefixed_key_part= 0;
|
|
|
|
join->eq_ref_tables= 0;
|
|
DBUG_ASSERT(keyuse->elements);
|
|
|
|
my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
|
|
(qsort_cmp) sort_keyuse);
|
|
|
|
bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */
|
|
if (insert_dynamic(keyuse, (uchar*) &key_end))
|
|
return TRUE;
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
|
|
generate_derived_keys(keyuse);
|
|
|
|
use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
|
|
prev= &key_end;
|
|
found_eq_constant= 0;
|
|
/* Loop over all elements except the last 'key_end' */
|
|
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
|
|
{
|
|
if (!use->is_for_hash_join())
|
|
{
|
|
if (!(use->used_tables & ~OUTER_REF_TABLE_BIT) &&
|
|
use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
|
|
use->table->const_key_parts[use->key]|= use->keypart_map;
|
|
if (use->keypart != FT_KEYPART)
|
|
{
|
|
if (use->key == prev->key && use->table == prev->table)
|
|
{
|
|
if (prev->keypart == use->keypart && found_eq_constant)
|
|
continue;
|
|
if (prev->keypart+1 < use->keypart)
|
|
{
|
|
found_unprefixed_key_part= 1;
|
|
if (skip_unprefixed_keyparts)
|
|
continue; /* remove */
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Key changed, check if previous key was a primary/unique key lookup
|
|
*/
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
found_unprefixed_key_part= 0;
|
|
if (use->keypart != 0)
|
|
{
|
|
found_unprefixed_key_part= 1;
|
|
if (skip_unprefixed_keyparts)
|
|
continue; /* remove - first found key part must be 0 */
|
|
}
|
|
}
|
|
}
|
|
else /* FT_KEY_PART */
|
|
{
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
found_unprefixed_key_part= 1; // This key cannot be EQ_REF
|
|
}
|
|
prev= use;
|
|
found_eq_constant= !use->used_tables;
|
|
use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
|
|
}
|
|
else
|
|
{
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
prev= &key_end;
|
|
}
|
|
/*
|
|
Old gcc used a memcpy(), which is undefined if save_pos==use:
|
|
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
|
|
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
|
|
This also disables a valgrind warning, so better to have the test.
|
|
*/
|
|
if (save_pos != use)
|
|
*save_pos= *use;
|
|
/* Save ptr to first use */
|
|
if (!use->table->reginfo.join_tab->keyuse)
|
|
use->table->reginfo.join_tab->keyuse= save_pos;
|
|
save_pos++;
|
|
}
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
|
|
(void) set_dynamic(keyuse,(uchar*) &key_end,i);
|
|
keyuse->elements= i;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Update some values in keyuse for faster choose_plan() loop.
|
|
*/
|
|
|
|
void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
|
|
{
|
|
KEYUSE *end,*keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
|
|
|
|
for (end= keyuse+ keyuse_array->elements ; keyuse < end ; keyuse++)
|
|
{
|
|
table_map map;
|
|
/*
|
|
If we find a ref, assume this table matches a proportional
|
|
part of this table.
|
|
For example 100 records matching a table with 5000 records
|
|
gives 5000/100 = 50 records per key
|
|
Constant tables are ignored.
|
|
To avoid bad matches, we don't make ref_table_rows less than 100.
|
|
*/
|
|
keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref
|
|
if (keyuse->used_tables &
|
|
(map= (keyuse->used_tables & ~join->const_table_map &
|
|
~OUTER_REF_TABLE_BIT)))
|
|
{
|
|
uint n_tables= my_count_bits(map);
|
|
if (n_tables == 1) // Only one table
|
|
{
|
|
DBUG_ASSERT(!(map & PSEUDO_TABLE_BITS)); // Must be a real table
|
|
Table_map_iterator it(map);
|
|
int tablenr= it.next_bit();
|
|
DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
|
|
TABLE *tmp_table=join->table[tablenr];
|
|
if (tmp_table) // already created
|
|
keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100);
|
|
}
|
|
}
|
|
/*
|
|
Outer reference (external field) is constant for single executing
|
|
of subquery
|
|
*/
|
|
if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
|
|
keyuse->ref_table_rows= 1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Check for the presence of AGGFN(DISTINCT a) queries that may be subject
|
|
to loose index scan.
|
|
|
|
Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
|
|
(QUICK_GROUP_MIN_MAX_SELECT).
|
|
Optionally (if out_args is supplied) will push the arguments of
|
|
AGGFN(DISTINCT) to the list
|
|
|
|
Check for every COUNT(DISTINCT), AVG(DISTINCT) or
|
|
SUM(DISTINCT). These can be resolved by Loose Index Scan as long
|
|
as all the aggregate distinct functions refer to the same
|
|
fields. Thus:
|
|
|
|
SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
|
|
SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS
|
|
SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS
|
|
SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS
|
|
etc.
|
|
|
|
@param join the join to check
|
|
@param[out] out_args Collect the arguments of the aggregate functions
|
|
to a list. We don't worry about duplicates as
|
|
these will be sorted out later in
|
|
get_best_group_min_max.
|
|
|
|
@return does the query qualify for indexed AGGFN(DISTINCT)
|
|
@retval true it does
|
|
@retval false AGGFN(DISTINCT) must apply distinct in it.
|
|
*/
|
|
|
|
bool
|
|
is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
|
|
{
|
|
Item_sum **sum_item_ptr;
|
|
bool result= false;
|
|
|
|
if (join->table_count != 1 || /* reference more than 1 table */
|
|
join->select_distinct || /* or a DISTINCT */
|
|
join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
|
|
return false;
|
|
|
|
Bitmap<MAX_FIELDS> first_aggdistinct_fields;
|
|
bool first_aggdistinct_fields_initialized= false;
|
|
for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
|
|
{
|
|
Item_sum *sum_item= *sum_item_ptr;
|
|
Item *expr;
|
|
/* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
|
|
switch (sum_item->sum_func())
|
|
{
|
|
case Item_sum::MIN_FUNC:
|
|
case Item_sum::MAX_FUNC:
|
|
continue;
|
|
case Item_sum::COUNT_DISTINCT_FUNC:
|
|
break;
|
|
case Item_sum::AVG_DISTINCT_FUNC:
|
|
case Item_sum::SUM_DISTINCT_FUNC:
|
|
if (sum_item->get_arg_count() == 1)
|
|
break;
|
|
/* fall through */
|
|
default: return false;
|
|
}
|
|
/*
|
|
We arrive here for every COUNT(DISTINCT),AVG(DISTINCT) or SUM(DISTINCT).
|
|
Collect the arguments of the aggregate functions to a list.
|
|
We don't worry about duplicates as these will be sorted out later in
|
|
get_best_group_min_max
|
|
*/
|
|
Bitmap<MAX_FIELDS> cur_aggdistinct_fields;
|
|
cur_aggdistinct_fields.clear_all();
|
|
for (uint i= 0; i < sum_item->get_arg_count(); i++)
|
|
{
|
|
expr= sum_item->get_arg(i);
|
|
/* The AGGFN(DISTINCT) arg is not an attribute? */
|
|
if (expr->real_item()->type() != Item::FIELD_ITEM)
|
|
return false;
|
|
|
|
Item_field* item= static_cast<Item_field*>(expr->real_item());
|
|
if (out_args)
|
|
out_args->push_back(item, join->thd->mem_root);
|
|
|
|
cur_aggdistinct_fields.set_bit(item->field->field_index);
|
|
result= true;
|
|
}
|
|
/*
|
|
If there are multiple aggregate functions, make sure that they all
|
|
refer to exactly the same set of columns.
|
|
*/
|
|
if (!first_aggdistinct_fields_initialized)
|
|
{
|
|
first_aggdistinct_fields= cur_aggdistinct_fields;
|
|
first_aggdistinct_fields_initialized=true;
|
|
}
|
|
else if (first_aggdistinct_fields != cur_aggdistinct_fields)
|
|
return false;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
Discover the indexes that can be used for GROUP BY or DISTINCT queries.
|
|
|
|
If the query has a GROUP BY clause, find all indexes that contain all
|
|
GROUP BY fields, and add those indexes to join->const_keys.
|
|
|
|
If the query has a DISTINCT clause, find all indexes that contain all
|
|
SELECT fields, and add those indexes to join->const_keys.
|
|
This allows later on such queries to be processed by a
|
|
QUICK_GROUP_MIN_MAX_SELECT.
|
|
|
|
@param join
|
|
@param join_tab
|
|
|
|
@return
|
|
None
|
|
*/
|
|
|
|
static void
|
|
add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
|
|
{
|
|
List<Item_field> indexed_fields;
|
|
List_iterator<Item_field> indexed_fields_it(indexed_fields);
|
|
ORDER *cur_group;
|
|
Item_field *cur_item;
|
|
key_map possible_keys(0);
|
|
|
|
if (join->group_list)
|
|
{ /* Collect all query fields referenced in the GROUP clause. */
|
|
for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
|
|
(*cur_group->item)->walk(&Item::collect_item_field_processor, 0,
|
|
&indexed_fields);
|
|
}
|
|
else if (join->select_distinct)
|
|
{ /* Collect all query fields referenced in the SELECT clause. */
|
|
List<Item> &select_items= join->fields_list;
|
|
List_iterator<Item> select_items_it(select_items);
|
|
Item *item;
|
|
while ((item= select_items_it++))
|
|
item->walk(&Item::collect_item_field_processor, 0, &indexed_fields);
|
|
}
|
|
else if (!join->tmp_table_param.sum_func_count ||
|
|
!is_indexed_agg_distinct(join, &indexed_fields))
|
|
{
|
|
/*
|
|
There where no GROUP BY fields and also either no aggregate
|
|
functions or not all aggregate functions where used with the
|
|
same DISTINCT (or MIN() / MAX() that works similarly).
|
|
Nothing to do there.
|
|
*/
|
|
return;
|
|
}
|
|
|
|
if (indexed_fields.elements == 0)
|
|
{
|
|
/* There where no index we could use to satisfy the GROUP BY */
|
|
return;
|
|
}
|
|
|
|
/* Intersect the keys of all group fields. */
|
|
cur_item= indexed_fields_it++;
|
|
possible_keys.merge(cur_item->field->part_of_key);
|
|
while ((cur_item= indexed_fields_it++))
|
|
{
|
|
possible_keys.intersect(cur_item->field->part_of_key);
|
|
}
|
|
|
|
if (!possible_keys.is_clear_all())
|
|
join_tab->const_keys.merge(possible_keys);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Go through all combinations of not marked tables and find the one
|
|
which uses least records
|
|
*****************************************************************************/
|
|
|
|
/** Save const tables first as used tables. */
|
|
|
|
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
|
|
{
|
|
join->positions[idx].table= table;
|
|
join->positions[idx].key=key;
|
|
join->positions[idx].records_read=1.0; /* This is a const table */
|
|
join->positions[idx].records_out=1.0; /* This is a const table */
|
|
join->positions[idx].records_init=1.0; /* This is a const table */
|
|
join->positions[idx].cond_selectivity= 1.0;
|
|
join->positions[idx].ref_depend_map= 0;
|
|
join->positions[idx].partial_join_cardinality= 1;
|
|
|
|
// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
|
|
join->positions[idx].sj_strategy= SJ_OPT_NONE;
|
|
join->positions[idx].use_join_buffer= FALSE;
|
|
join->positions[idx].range_rowid_filter_info= 0;
|
|
|
|
/* Move the const table as down as possible in best_ref */
|
|
JOIN_TAB **pos=join->best_ref+idx+1;
|
|
JOIN_TAB *next=join->best_ref[idx];
|
|
for (;next != table ; pos++)
|
|
{
|
|
JOIN_TAB *tmp=pos[0];
|
|
pos[0]=next;
|
|
next=tmp;
|
|
}
|
|
join->best_ref[idx]=table;
|
|
join->positions[idx].spl_plan= 0;
|
|
join->positions[idx].spl_pd_boundary= 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Estimate how many records we will get if we read just this table and apply
|
|
a part of WHERE that can be checked using only the current table and
|
|
const tables.
|
|
|
|
@param s Current JOIN_TAB
|
|
@param use_cond_selectivity Value of optimizer_use_condition_selectivity.
|
|
If > 1 then use table->cond_selecitivity.
|
|
@return 0.0 No matching rows
|
|
@return >= 1.0 Number of expected matching rows
|
|
|
|
Estimate how many records we will get if we
|
|
- read the given table with its "independent" access method (either quick
|
|
select or full table/index scan),
|
|
- apply the part of WHERE that refers only to this table and const tables.
|
|
- The result cannot be bigger than table records
|
|
|
|
@see also
|
|
table_after_join_selectivity() produces selectivity of condition that is
|
|
checked after joining rows from this table to rows from preceding tables.
|
|
*/
|
|
|
|
static double apply_selectivity_for_table(JOIN_TAB *s,
|
|
uint use_cond_selectivity)
|
|
{
|
|
double dbl_records;
|
|
|
|
if (use_cond_selectivity > 1)
|
|
{
|
|
TABLE *table= s->table;
|
|
double sel= table->cond_selectivity;
|
|
double table_records= rows2double(s->records);
|
|
DBUG_ASSERT(sel >= 0 && sel <= 1.0);
|
|
/*
|
|
table->cond_selectivity will include data from opt_range.
|
|
Here we check that this is indeeded the case.
|
|
Note that if table_records == 0, then 'sel' is probably 1
|
|
*/
|
|
DBUG_ASSERT(table_records == 0 ||
|
|
sel <= s->table->opt_range_condition_rows /
|
|
table_records);
|
|
dbl_records= table_records * sel;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
This is only taking into considering constant key parts used with
|
|
this table!
|
|
If no such conditions existed the following should normally hold:
|
|
s->table->opt_range_condition_rows == s->found_rows ==
|
|
s->records.
|
|
The case when this does not hold is when using 'best splitting'
|
|
in which case s->records may be less than s->found_rows;
|
|
*/
|
|
DBUG_ASSERT(s->table->opt_range_condition_rows <= s->found_records);
|
|
dbl_records= rows2double(MY_MIN(s->table->opt_range_condition_rows,
|
|
s->records));
|
|
}
|
|
|
|
DBUG_ASSERT(dbl_records <= s->records);
|
|
/*
|
|
Ensure we return at least one row if there is any possibility to have
|
|
a matching row. Having rows >= 1.0 helps ensure that when we calculate
|
|
total rows of joins, the number of resulting rows will not be less
|
|
after the join. In other words, we assume there is at least one matching
|
|
row when joining a row with the next table.
|
|
0.0 is returned only if it is guaranteed there are no matching rows
|
|
(for example if the table is empty).
|
|
*/
|
|
return dbl_records ? MY_MAX(dbl_records, MIN_ROWS_AFTER_FILTERING) : 0.0;
|
|
}
|
|
|
|
|
|
/*
|
|
Take into account that the table's WHERE clause has conditions on earlier
|
|
tables that can reduce the number of accepted rows.
|
|
|
|
@param records Number of original rows (after selectivity)
|
|
|
|
If there is a filtering condition on the table (i.e. ref analyzer found
|
|
at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
|
|
preceding this table in the join order we're now considering), then
|
|
assume that 25% of the rows will be filtered out by this condition.
|
|
|
|
This heuristic is supposed to force tables used in exprZ to be before
|
|
this table in join order.
|
|
*/
|
|
inline double use_found_constraint(double records)
|
|
{
|
|
records-= records/4;
|
|
return records ? MY_MAX(records, MIN_ROWS_AFTER_FILTERING) : 0.0;
|
|
}
|
|
|
|
|
|
/*
|
|
Calculate the cost of reading a set of rows trough an index
|
|
|
|
@param eq_ref True if there is only one matching key (EQ_REF)
|
|
|
|
Logically this is identical to the code in multi_range_read_info_const()
|
|
excepts the function also takes into account io_blocks and multiple
|
|
ranges.
|
|
|
|
One main difference between the functions is that
|
|
multi_range_read_info_const() adds a very small cost per range
|
|
MULTI_RANGE_READ_SETUP_COST, to ensure that 'ref' is preferred
|
|
over ranges.
|
|
|
|
Note that this function assumes that index_only_cost is only to be
|
|
used with filtering (as cost.read_cost takes into account both
|
|
clustering and covered keys). index_only_cost does not include
|
|
KEY_COPY_COST as for filtering there is no copying of not accepted
|
|
keys.
|
|
|
|
If eq_ref is not set, it means that we have to do one extra 'read_next'
|
|
on the index to verify that there is not more keys with the same value.
|
|
|
|
WHERE_COST cost is not added to any result.
|
|
*/
|
|
|
|
static ALL_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
|
|
uint key, ha_rows records,
|
|
bool eq_ref)
|
|
{
|
|
ALL_READ_COST cost;
|
|
handler *file= table->file;
|
|
ha_rows max_seeks;
|
|
ha_rows extra_reads= eq_ref ? 0 : 1;
|
|
DBUG_ENTER("cost_for_index_read");
|
|
|
|
max_seeks= (ha_rows) thd->variables.max_seeks_for_key;
|
|
set_if_bigger(records, 1);
|
|
|
|
if (file->is_clustering_key(key))
|
|
{
|
|
cost.index_cost=
|
|
file->ha_keyread_clustered_time(key, 1, records+extra_reads, 0);
|
|
cost.copy_cost= rows2double(records) * file->ROW_COPY_COST;
|
|
/* There is no 'index_only_read' with a clustered index */
|
|
cost.row_cost= {0,0};
|
|
/* Caping of index_blocks will happen in handler::cost() */
|
|
cost.max_index_blocks= MY_MIN(file->row_blocks(), max_seeks);
|
|
cost.max_row_blocks= 0;
|
|
}
|
|
else if (table->covering_keys.is_set(key) && !table->no_keyread)
|
|
{
|
|
cost.index_cost= file->ha_keyread_time(key, 1, records + extra_reads, 0);
|
|
cost.row_cost= {0,0};
|
|
cost.copy_cost= rows2double(records) * file->KEY_COPY_COST;
|
|
cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
|
|
cost.max_row_blocks= 0;
|
|
}
|
|
else
|
|
{
|
|
cost.index_cost= file->ha_keyread_time(key, 1, records + extra_reads, 0);
|
|
/* ha_rnd_pos_time() includes time for copying the row */
|
|
cost.row_cost= file->ha_rnd_pos_time(records);
|
|
cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
|
|
cost.max_row_blocks= MY_MIN(file->row_blocks(), max_seeks);
|
|
cost.copy_cost= 0;
|
|
}
|
|
DBUG_PRINT("statistics", ("index_cost: %.3f row_cost: %.3f",
|
|
file->cost(cost.index_cost),
|
|
file->cost(cost.row_cost)));
|
|
DBUG_RETURN(cost);
|
|
}
|
|
|
|
|
|
/**
|
|
Apply filter if the filter is better than the current cost
|
|
|
|
@param thd Thread handler
|
|
@param table Table
|
|
@param cost Pointer to cost for current cost, which does not
|
|
include WHERE_COST cost. Will be updated to
|
|
new cost if filter is chosen.
|
|
Will be updated to new cost if filter is used.
|
|
@param records_arg Pointer to number of records for the current key.
|
|
Will be updated to records after filter, if filter is
|
|
used.
|
|
@param startup_cost Startup cost. Will be updated if filter is used.
|
|
@param fetch_cost Cost of finding the row, without where compare cost
|
|
@param index_only_cost Cost if fetching '*records_arg' key values
|
|
@param prev_records Number of record combinations in previous tables
|
|
|
|
@return 'this' Filter is used (and variables are updated)
|
|
@return 0 Filter is worse than old plan
|
|
*/
|
|
|
|
Range_rowid_filter_cost_info* Range_rowid_filter_cost_info::
|
|
apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost,
|
|
double *records_arg,
|
|
double *startup_cost,
|
|
uint ranges, double prev_records)
|
|
{
|
|
handler *file= table->file;
|
|
bool use_filter;
|
|
double new_cost, org_cost, records= *records_arg, new_records;
|
|
double filter_startup_cost= get_setup_cost();
|
|
double filter_lookup_cost= records * lookup_cost();
|
|
double tmp;
|
|
ALL_READ_COST adjusted_cost;
|
|
|
|
/*
|
|
Calculate number of resulting rows after filtering
|
|
Here we trust selectivity and do not adjust rows up even if
|
|
the end result is low. This means that new_records is allowed to be
|
|
be < 1.0
|
|
*/
|
|
new_records= records * selectivity;
|
|
|
|
/*
|
|
Calculate the cost of the filter based on that we had originally
|
|
'records' rows and after the filter only 'new_records' accepted
|
|
rows.
|
|
Note that the rejected rows, we have only done a key read. We only
|
|
fetch the row and compare the where if the filter accepts the
|
|
row id.
|
|
In case of index only read, fetch_cost == index_only_cost. Even in this
|
|
the filter can give a better plan as we have to do less comparisons
|
|
with the WHERE clause.
|
|
|
|
The io_cost is used to take into account that we have to do 1 key
|
|
lookup to find the first matching key in each range.
|
|
*/
|
|
|
|
adjusted_cost= *cost;
|
|
/* We are going to read 'selectivity' fewer rows */
|
|
adjusted_cost.row_cost.io*= selectivity;
|
|
adjusted_cost.row_cost.cpu*= selectivity;
|
|
adjusted_cost.copy_cost*= selectivity; // Cost of copying row or key
|
|
adjusted_cost.index_cost.cpu+= filter_lookup_cost;
|
|
|
|
tmp= prev_records * WHERE_COST_THD(thd);
|
|
org_cost= (file->cost_for_reading_multiple_times(prev_records,
|
|
cost) +
|
|
records * tmp);
|
|
|
|
new_cost= (file->cost_for_reading_multiple_times(prev_records,
|
|
&adjusted_cost) +
|
|
new_records * tmp + filter_startup_cost);
|
|
|
|
DBUG_ASSERT(new_cost >= 0 && new_records >= 0);
|
|
use_filter= new_cost < org_cost;
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_filter(thd, "filter");
|
|
trace_filter.add("rowid_filter_index",
|
|
table->key_info[get_key_no()].name).
|
|
add("index_only_cost", file->cost(cost->index_cost)).
|
|
add("filter_startup_cost", filter_startup_cost).
|
|
add("find_key_and_filter_lookup_cost", filter_lookup_cost).
|
|
add("filter_selectivity", selectivity).
|
|
add("original_rows", records).
|
|
add("new_rows", new_records).
|
|
add("original_access_cost", file->cost(cost)).
|
|
add("with_filter_access_cost", file->cost(&adjusted_cost)).
|
|
add("original_found_rows_cost", file->cost(cost->row_cost)).
|
|
add("with_filter_found_rows_cost", file->cost(adjusted_cost.row_cost)).
|
|
add("org_cost", org_cost).
|
|
add("filter_cost", new_cost).
|
|
add("filter_used", use_filter);
|
|
}
|
|
if (use_filter)
|
|
{
|
|
cost->row_cost= adjusted_cost.row_cost;
|
|
cost->index_cost= adjusted_cost.index_cost;
|
|
cost->copy_cost= adjusted_cost.copy_cost;
|
|
*records_arg= new_records;
|
|
(*startup_cost)+= filter_startup_cost;
|
|
return this;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Compute the fanout of hash join operation using EITS data
|
|
|
|
@param join JOIN structure
|
|
@param tab JOIN_TAB for the current table
|
|
@param remaining_tables Map of tables not yet accessable
|
|
@param rnd_records Number of accepted rows in the table, after taking
|
|
selectivity into account.
|
|
@param hj_start_key Pointer to hash key
|
|
@param stats_found Is set to 1 if we found any usable hash key part
|
|
with statistics from analyze.
|
|
*/
|
|
|
|
double hash_join_fanout(JOIN *join, JOIN_TAB *tab, table_map remaining_tables,
|
|
double rnd_records, KEYUSE *hj_start_key,
|
|
bool *stats_found)
|
|
{
|
|
THD *thd= join->thd;
|
|
/*
|
|
Before doing the hash join, we will scan the table and apply the local part
|
|
of the WHERE condition. This will produce rnd_records.
|
|
|
|
The EITS statistics describes the entire table. Calling
|
|
|
|
table->field[N]->get_avg_frequency()
|
|
|
|
produces average #rows in the table with some value.
|
|
|
|
What happens if we filter out rows so that rnd_records rows are left?
|
|
Something between the two outcomes:
|
|
A. filtering removes a fraction of rows for each value:
|
|
avg_frequency=avg_frequency * condition_selectivity
|
|
|
|
B. filtering removes entire groups of rows with the same value, but
|
|
the remaining groups remain of the same size.
|
|
|
|
We make pessimistic assumption and assume B.
|
|
We also handle an edge case: if rnd_records is less than avg_frequency,
|
|
assume we'll get rnd_records rows with the same value, and return
|
|
rnd_records as the fanout estimate.
|
|
*/
|
|
double min_freq= (double) tab->table->stat_records();
|
|
bool found_not_usable_field= 0;
|
|
bool found_usable_field __attribute__((unused))= 0;
|
|
DBUG_ENTER("hash_join_cardinality");
|
|
DBUG_ASSERT(rnd_records > 0 && min_freq > 0);
|
|
|
|
Json_writer_object trace_obj(thd, "hash_join_cardinality");
|
|
|
|
/*
|
|
There can be multiple KEYUSE referring to same or different columns
|
|
|
|
KEYUSE(tbl.col1 = ...)
|
|
KEYUSE(tbl.col1 = ...)
|
|
KEYUSE(tbl.col2 = ...)
|
|
|
|
Hash join code can use multiple columns: (col1, col2) for joining.
|
|
We need n_distinct({col1, col2}).
|
|
|
|
EITS only has statistics on individual columns: n_distinct(col1),
|
|
n_distinct(col2).
|
|
|
|
Our current solution is to be very conservative and use selectivity
|
|
of one column with the lowest avg_frequency.
|
|
|
|
In the future, we should an approach that cautiosly takes into account
|
|
multiple KEYUSEs either multiply by number of equalities or by sqrt
|
|
of the second most selective equality.
|
|
*/
|
|
Json_writer_array trace_arr(thd, "hash_join_columns");
|
|
for (KEYUSE *keyuse= hj_start_key;
|
|
keyuse->table == tab->table && is_hash_join_key_no(keyuse->key);
|
|
keyuse++)
|
|
{
|
|
if (!(remaining_tables & keyuse->used_tables) &&
|
|
(!keyuse->validity_ref || *keyuse->validity_ref) &&
|
|
tab->access_from_tables_is_allowed(keyuse->used_tables,
|
|
join->sjm_lookup_tables))
|
|
{
|
|
Field *field= tab->table->field[keyuse->keypart];
|
|
found_usable_field= 1;
|
|
if (is_eits_usable(field))
|
|
{
|
|
double freq= field->read_stats->get_avg_frequency();
|
|
|
|
Json_writer_object trace_field(thd);
|
|
trace_field.add("field",field->field_name.str).
|
|
add("avg_frequency", freq);
|
|
if (freq < min_freq)
|
|
min_freq= freq;
|
|
*stats_found= 1;
|
|
continue;
|
|
}
|
|
}
|
|
if (!keyuse->validity_ref || *keyuse->validity_ref)
|
|
found_not_usable_field= 1;
|
|
}
|
|
/* Ensure that some part of hash_key is usable */
|
|
DBUG_ASSERT(found_usable_field);
|
|
|
|
trace_arr.end();
|
|
if (found_not_usable_field)
|
|
{
|
|
/*
|
|
We did not't have data for all key fields. Assume that the hash
|
|
will at least limit the number of matched rows to HASH_FANOUT.
|
|
This makes the cost same as when 'hash_join_cardinality=off'
|
|
in the case when no analyze of the tables have been made.
|
|
|
|
However, it may cause problems when min_freq is higher than
|
|
HASH_FANOUT as the optimizer will then assume it is better to
|
|
put the table earlier in the plan when all key parts are not
|
|
usable.
|
|
Note that min_freq can become less than 1.0. This is intentional
|
|
as it matches what happens if OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY
|
|
is not used.
|
|
*/
|
|
double max_expected_records= rnd_records * HASH_FANOUT;
|
|
set_if_smaller(min_freq, max_expected_records);
|
|
trace_obj.add("using_default_hash_fanout", HASH_FANOUT);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Before joining the table with the contents of join buffer, we will
|
|
use the quick select and/or apply the table condition.
|
|
|
|
This will reduce the number of rows joined to rnd_records.
|
|
How will this affect n_distinct?
|
|
Depending on which rows are removed, this can either leave n_distinct as
|
|
is (for some value X, some rows are removed but some are left, leaving the
|
|
number of distinct values the same), or reduce n_distinct in proportion
|
|
with the fraction of rows removed (for some values of X, either all or
|
|
none of the rows with that value are removed).
|
|
|
|
We assume the latter: n_distinct is reduced in proportion the condition
|
|
and quick select's selectivity.
|
|
This is in effect same as applying apply_selectivity_for_table() on
|
|
min_freq as we have already done on rnd_records
|
|
*/
|
|
min_freq*= rnd_records / tab->table->stat_records();
|
|
set_if_bigger(min_freq, HASH_FANOUT);
|
|
}
|
|
|
|
trace_obj.add("rows", min_freq);
|
|
DBUG_RETURN(min_freq);
|
|
}
|
|
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
static char dbug_join_prefix_buf[256];
|
|
|
|
const char* dbug_print_join_prefix(const POSITION *join_positions,
|
|
uint idx,
|
|
JOIN_TAB *s)
|
|
{
|
|
char *buf= dbug_join_prefix_buf;
|
|
String str(buf, sizeof(dbug_join_prefix_buf), &my_charset_bin);
|
|
str.length(0);
|
|
for (uint i=0; i!=idx; i++)
|
|
{
|
|
str.append(join_positions[i].table->table->alias);
|
|
str.append(',');
|
|
}
|
|
str.append(s->table->alias);
|
|
if (str.c_ptr_safe() == buf)
|
|
return buf;
|
|
else
|
|
return "Couldn't fit into buffer";
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
Find the best access path for an extension of a partial execution
|
|
plan and add this path to the plan.
|
|
|
|
The function finds the best access path to table 's' from the passed
|
|
partial plan where an access path is the general term for any means to
|
|
cacess the data in 's'. An access path may use either an index or a scan,
|
|
whichever is cheaper. The input partial plan is passed via the array
|
|
'join->positions' of length 'idx'. The chosen access method for 's' and its
|
|
cost are stored in 'join->positions[idx]'.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param s the table to be joined by the function
|
|
@param thd thread for the connection that submitted the query
|
|
@param remaining_tables set of tables not included into the partial plan yet
|
|
@param idx the length of the partial plan
|
|
@param disable_jbuf TRUE<=> Don't use join buffering
|
|
@param record_count estimate for the number of records returned by the
|
|
partial plan
|
|
@param pos OUT Table access plan
|
|
@param loose_scan_pos OUT Table plan that uses loosescan, or set cost to
|
|
DBL_MAX if not possible.
|
|
@detail
|
|
Use this to print the current join prefix:
|
|
|
|
dbug_print_join_prefix(join_positions, idx, s)
|
|
|
|
Use this as breakpoint condition to stop at join prefix "t1,t2,t3":
|
|
|
|
$_streq(dbug_print_join_prefix(join_positions, idx, s), "t1,t2,t3")
|
|
|
|
@return
|
|
None
|
|
*/
|
|
|
|
struct best_plan
|
|
{
|
|
double cost; // Smallest cost found
|
|
double records; // Old 'Records'
|
|
double records_read; // Records accessed
|
|
double records_after_filter; // Records_read + filter
|
|
double records_out; // Smallest record count seen
|
|
double prev_record_reads; // Save value from prev_record_reads
|
|
double identical_keys; // Save value from prev_record_reads
|
|
Range_rowid_filter_cost_info *filter; // Best filter
|
|
KEYUSE *key; // Best key
|
|
SplM_plan_info *spl_plan;
|
|
table_map ref_depends_map;
|
|
ulonglong refills; // Join cache refills
|
|
enum join_type type;
|
|
uint forced_index;
|
|
uint max_key_part;
|
|
table_map found_ref;
|
|
bool use_join_buffer;
|
|
};
|
|
|
|
|
|
void
|
|
best_access_path(JOIN *join,
|
|
JOIN_TAB *s,
|
|
table_map remaining_tables,
|
|
const POSITION *join_positions,
|
|
uint idx,
|
|
bool disable_jbuf,
|
|
double record_count,
|
|
POSITION *pos,
|
|
POSITION *loose_scan_pos)
|
|
{
|
|
THD *thd= join->thd;
|
|
uint use_cond_selectivity=
|
|
thd->variables.optimizer_use_condition_selectivity;
|
|
TABLE *table= s->table;
|
|
handler *file= table->file;
|
|
my_bool found_constraint= 0;
|
|
/*
|
|
key_dependent is 0 if all key parts could be used or if there was an
|
|
EQ_REF table found (which uses all key parts). In other words, we cannot
|
|
find a better key for the table even if remaining_tables is reduced.
|
|
Otherwise it's a bitmap of tables that could improve key usage.
|
|
*/
|
|
table_map key_dependent= 0;
|
|
ALL_READ_COST tmp;
|
|
ha_rows rec;
|
|
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
|
|
KEYUSE *hj_start_key= 0;
|
|
table_map spl_pd_boundary= 0;
|
|
Loose_scan_opt loose_scan_opt;
|
|
struct best_plan best;
|
|
Json_writer_object trace_wrapper(thd, "best_access_path");
|
|
DBUG_ENTER("best_access_path");
|
|
|
|
/*
|
|
Assume that there is at least one accepted row from previous table
|
|
combinations.
|
|
This fixes a problem when the selectivity for the preceding table
|
|
combinations becomes so high that record_count becomes << 1.0,
|
|
which makes the cost for the current table so low that it does not
|
|
matter when calculating the best plans.
|
|
*/
|
|
set_if_bigger(record_count, 1.0);
|
|
|
|
best.cost= DBL_MAX;
|
|
best.records= DBL_MAX;
|
|
best.records_read= DBL_MAX;
|
|
best.records_after_filter= DBL_MAX;
|
|
best.records_out= MY_MIN(table->stat_records() * table->cond_selectivity,
|
|
table->opt_range_condition_rows);
|
|
best.prev_record_reads= best.identical_keys= 0;
|
|
best.filter= 0;
|
|
best.key= 0;
|
|
best.max_key_part= 0;
|
|
best.type= JT_UNKNOWN;
|
|
best.forced_index= MAX_KEY;
|
|
best.found_ref= 0;
|
|
best.ref_depends_map= 0;
|
|
best.refills= 0;
|
|
best.use_join_buffer= FALSE;
|
|
best.spl_plan= 0;
|
|
|
|
disable_jbuf= disable_jbuf || idx == join->const_tables;
|
|
|
|
trace_wrapper.add_table_name(s);
|
|
|
|
bitmap_clear_all(eq_join_set);
|
|
|
|
loose_scan_opt.init(join, s, remaining_tables);
|
|
|
|
if (table->is_splittable())
|
|
best.spl_plan= s->choose_best_splitting(idx,
|
|
remaining_tables,
|
|
join_positions,
|
|
&spl_pd_boundary);
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object info(thd, "plan_details");
|
|
info.add("record_count", record_count);
|
|
}
|
|
|
|
Json_writer_array trace_paths(thd, "considered_access_paths");
|
|
if (s->keyuse)
|
|
{ /* Use key if possible */
|
|
KEYUSE *keyuse, *start_key= 0;
|
|
const char *cause= NULL;
|
|
uint max_key_part=0;
|
|
enum join_type type= JT_UNKNOWN;
|
|
double cur_cost, copy_cost, cached_prev_record_reads= 0.0;
|
|
table_map cached_prev_ref= ~(table_map) 0;
|
|
|
|
/* Test how we can use keys */
|
|
rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
|
|
for (keyuse=s->keyuse ; keyuse->table == table ;)
|
|
{
|
|
KEY *keyinfo;
|
|
ulong key_flags;
|
|
uint key_parts;
|
|
key_part_map found_part= 0;
|
|
/* key parts which won't have NULL in lookup tuple */
|
|
key_part_map notnull_part=0;
|
|
table_map found_ref= 0;
|
|
uint key= keyuse->key;
|
|
uint max_const_parts;
|
|
bool ft_key= (keyuse->keypart == FT_KEYPART);
|
|
/* Bitmap of keyparts where the ref access is over 'keypart=const': */
|
|
key_part_map const_part= 0;
|
|
/* The or-null keypart in ref-or-null access: */
|
|
key_part_map ref_or_null_part= 0;
|
|
key_part_map all_parts= 0;
|
|
double startup_cost= s->startup_cost;
|
|
double records_after_filter, records_best_filter, records;
|
|
Range_rowid_filter_cost_info *filter= 0;
|
|
double prev_record_count= record_count;
|
|
double identical_keys= 0;
|
|
|
|
if (is_hash_join_key_no(key))
|
|
{
|
|
/*
|
|
Hash join as any join employing join buffer can be used to join
|
|
only those tables that are joined after the first non const table
|
|
*/
|
|
if (!(remaining_tables & keyuse->used_tables) &&
|
|
idx > join->const_tables)
|
|
{
|
|
if (!hj_start_key)
|
|
hj_start_key= keyuse;
|
|
bitmap_set_bit(eq_join_set, keyuse->keypart);
|
|
}
|
|
keyuse++;
|
|
continue;
|
|
}
|
|
|
|
keyinfo= table->key_info+key;
|
|
key_parts= table->actual_n_key_parts(keyinfo);
|
|
key_flags= table->actual_key_flags(keyinfo);
|
|
|
|
/* Calculate how many key segments of the current key we can use */
|
|
start_key= keyuse;
|
|
|
|
loose_scan_opt.next_ref_key();
|
|
DBUG_PRINT("info", ("Considering ref access on key %s",
|
|
keyuse->table->key_info[keyuse->key].name.str));
|
|
|
|
do /* For each keypart */
|
|
{
|
|
uint keypart= keyuse->keypart;
|
|
table_map best_part_found_ref= 0, key_parts_dependent= 0;
|
|
double best_prev_record_reads= DBL_MAX;
|
|
|
|
do /* For each way to access the keypart */
|
|
{
|
|
/*
|
|
If 1. expression does not refer to forward tables
|
|
2. we won't get two ref-or-null's
|
|
*/
|
|
double ignore;
|
|
all_parts|= keyuse->keypart_map;
|
|
if (!(remaining_tables & keyuse->used_tables) &&
|
|
(!keyuse->validity_ref || *keyuse->validity_ref) &&
|
|
s->access_from_tables_is_allowed(keyuse->used_tables,
|
|
join->sjm_lookup_tables) &&
|
|
!(ref_or_null_part && (keyuse->optimize &
|
|
KEY_OPTIMIZE_REF_OR_NULL)))
|
|
{
|
|
found_part|= keyuse->keypart_map;
|
|
key_parts_dependent= 0;
|
|
if (!(keyuse->used_tables & ~join->const_table_map))
|
|
const_part|= keyuse->keypart_map;
|
|
|
|
if (!keyuse->val->maybe_null() || keyuse->null_rejecting)
|
|
notnull_part|=keyuse->keypart_map;
|
|
|
|
if ((found_ref | keyuse->used_tables) != cached_prev_ref)
|
|
{
|
|
cached_prev_ref= (found_ref | keyuse->used_tables);
|
|
cached_prev_record_reads=
|
|
prev_record_reads(join_positions, idx,
|
|
cached_prev_ref, record_count,
|
|
&ignore);
|
|
}
|
|
if (cached_prev_record_reads < best_prev_record_reads)
|
|
{
|
|
best_prev_record_reads= cached_prev_record_reads;
|
|
best_part_found_ref= (keyuse->used_tables &
|
|
~join->const_table_map);
|
|
}
|
|
if (rec > keyuse->ref_table_rows)
|
|
rec= keyuse->ref_table_rows;
|
|
/*
|
|
If there is one 'key_column IS NULL' expression, we can
|
|
use this ref_or_null optimisation of this field
|
|
*/
|
|
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
|
|
ref_or_null_part |= keyuse->keypart_map;
|
|
|
|
/*
|
|
Remember if there is a WHERE condition that contains
|
|
'key_part=expression_with_only_accessible_tables'
|
|
We ignore const tables as these are handled by selectivity
|
|
code (const table fields are treated as constants).
|
|
*/
|
|
found_constraint|= (keyuse->used_tables &
|
|
~(remaining_tables |
|
|
join->const_table_map));
|
|
}
|
|
else if (!(found_part & keyuse->keypart_map))
|
|
key_parts_dependent|= keyuse->used_tables;
|
|
|
|
loose_scan_opt.add_keyuse(remaining_tables, keyuse);
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->key == key &&
|
|
keyuse->keypart == keypart);
|
|
/* If we found a usable key, remember the dependent tables */
|
|
if (all_parts & 1)
|
|
key_dependent|= key_parts_dependent;
|
|
found_ref|= best_part_found_ref;
|
|
/* Remember if the key expression used previous non const tables */
|
|
} while (keyuse->table == table && keyuse->key == key);
|
|
|
|
/*
|
|
Assume that that each key matches a proportional part of table.
|
|
*/
|
|
if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
|
|
continue; // Nothing usable found
|
|
|
|
if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
|
|
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
|
|
|
|
Json_writer_object trace_access_idx(thd);
|
|
max_const_parts= max_part_bit(const_part);
|
|
|
|
/*
|
|
full text keys require special treatment
|
|
*/
|
|
if (ft_key)
|
|
{
|
|
/*
|
|
Fulltext indexes are preformed the following way:
|
|
- In the prepare step it performs the search, collects all positions
|
|
in an array, sorts it.
|
|
- If optimizer decides to use the ft index access method it simply'
|
|
returns positions from the array one by one
|
|
- If optimizer decides to use something else (another index, table
|
|
scan), then it'll use binary search in the array to find the
|
|
position.
|
|
|
|
The following code puts the cost down to very small as the prep
|
|
step will always be done and the cost to fetch the row from memory
|
|
is very small.
|
|
Alternatively we could use the cost of an EQ_REF here.
|
|
*/
|
|
tmp.reset();
|
|
tmp.row_cost.cpu= file->ROW_COPY_COST;
|
|
/*
|
|
We don't know how many records will match. However, we want to have
|
|
the fulltext search done early, so we put the number of records
|
|
to be very low.
|
|
*/
|
|
records= 1.0;
|
|
type= JT_FT;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("full-text index", keyinfo->name);
|
|
}
|
|
else
|
|
{
|
|
loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
|
|
|
|
/* Check if we found full key */
|
|
const key_part_map all_key_parts= PREV_BITS(uint, key_parts);
|
|
if (found_part == all_key_parts && !ref_or_null_part)
|
|
{ /* use eq key */
|
|
max_key_part= (uint) ~0;
|
|
/*
|
|
If the index is a unique index (1), and
|
|
- all its columns are not null (2), or
|
|
- equalities we are using reject NULLs (3)
|
|
then the estimate is rows=1.
|
|
*/
|
|
if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)) && // (1)
|
|
(!(key_flags & HA_NULL_PART_KEY) || // (2)
|
|
all_key_parts == notnull_part)) // (3)
|
|
{
|
|
/* Check that eq_ref_tables are correctly updated */
|
|
DBUG_ASSERT(join->eq_ref_tables & table->map);
|
|
type= JT_EQ_REF;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("index", keyinfo->name);
|
|
if (!found_ref && table->opt_range_keys.is_set(key))
|
|
{
|
|
/* Ensure that the cost is identical to the range cost */
|
|
table->opt_range[key].get_costs(&tmp);
|
|
}
|
|
else
|
|
{
|
|
tmp= cost_for_index_read(thd, table, key, 1, 1);
|
|
}
|
|
/*
|
|
Calculate how many record read calls will be made taking
|
|
into account that we will cache the last read row.
|
|
*/
|
|
prev_record_count= prev_record_reads(join_positions, idx,
|
|
found_ref, record_count,
|
|
&identical_keys);
|
|
records= 1.0;
|
|
}
|
|
else
|
|
{
|
|
type= JT_REF;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("index", keyinfo->name);
|
|
if (!found_ref)
|
|
{ /* We found a const key */
|
|
/*
|
|
ReuseRangeEstimateForRef-1:
|
|
We get here if we've found a ref(const) (c_i are constants):
|
|
"(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond]
|
|
|
|
If range optimizer was able to construct a "range"
|
|
access on this index, then its condition "quick_cond" was
|
|
eqivalent to ref_const_cond (*), and we can re-use E(#rows)
|
|
from the range optimizer.
|
|
|
|
Proof of (*): By properties of range and ref optimizers
|
|
quick_cond will be equal or tighther than ref_const_cond.
|
|
ref_const_cond already covers "smallest" possible interval -
|
|
a singlepoint interval over all keyparts. Therefore,
|
|
quick_cond is equivalent to ref_const_cond (if it was an
|
|
empty interval we wouldn't have got here).
|
|
*/
|
|
if (table->opt_range_keys.is_set(key))
|
|
{
|
|
/* Ensure that the cost is identical to the range cost */
|
|
records= (double) table->opt_range[key].rows;
|
|
trace_access_idx.add("used_range_estimates", true);
|
|
|
|
table->opt_range[key].get_costs(&tmp);
|
|
goto got_cost2;
|
|
}
|
|
/* quick_range couldn't use key! */
|
|
records= (double) s->records/rec;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("used_range_estimates", false).
|
|
add("reason", "not available");
|
|
}
|
|
else
|
|
{
|
|
if (!(records= keyinfo->actual_rec_per_key(key_parts-1)))
|
|
{ /* Prefer longer keys */
|
|
trace_access_idx.add("rec_per_key_stats_missing", true);
|
|
records=
|
|
((double) s->records / (double) rec *
|
|
(1.0 +
|
|
((double) (table->s->max_key_length-keyinfo->key_length) /
|
|
(double) table->s->max_key_length)));
|
|
set_if_smaller(records, (double)s->records);
|
|
if (records < 1.0)
|
|
records= 1.0; /* Can't be as good as a unique */
|
|
}
|
|
|
|
/*
|
|
ReuseRangeEstimateForRef-2: We get here if we could not reuse
|
|
E(#rows) from range optimizer. Make another try:
|
|
|
|
If range optimizer produced E(#rows) for a prefix of the ref
|
|
access we're considering, and that E(#rows) is lower then our
|
|
current estimate, make an adjustment. The criteria of when we
|
|
can make an adjustment is a special case of the criteria used
|
|
in ReuseRangeEstimateForRef-3.
|
|
*/
|
|
if (table->opt_range_keys.is_set(key) &&
|
|
table->opt_range[key].key_parts <= max_const_parts &&
|
|
table->opt_range[key].ranges == 1 &&
|
|
records > (double) table->opt_range[key].rows)
|
|
{
|
|
records= (double) table->opt_range[key].rows;
|
|
trace_access_idx.add("used_range_estimates", "clipped down");
|
|
}
|
|
else if (unlikely(trace_access_idx.trace_started()))
|
|
{
|
|
if (table->opt_range_keys.is_set(key))
|
|
{
|
|
trace_access_idx.
|
|
add("used_range_estimates",false).
|
|
add("reason", "not better than ref estimates");
|
|
}
|
|
else
|
|
{
|
|
trace_access_idx.
|
|
add("used_range_estimates", false).
|
|
add("reason", "not available");
|
|
}
|
|
}
|
|
}
|
|
/* Calculate the cost of the index access */
|
|
tmp= cost_for_index_read(thd, table, key,
|
|
(ha_rows) records, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("index", keyinfo->name);
|
|
/*
|
|
Use as much key-parts as possible and a uniq key is better
|
|
than a not unique key
|
|
Set tmp to the cost of the accessing the expected number of
|
|
records.
|
|
*/
|
|
if ((found_part & 1) &&
|
|
(!(table->key_info[key].index_flags & HA_ONLY_WHOLE_INDEX) ||
|
|
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
|
|
{
|
|
double extra_cost= 0;
|
|
|
|
max_key_part= max_part_bit(found_part);
|
|
bool all_used_equalities_are_const= (max_key_part ==
|
|
max_const_parts);
|
|
/*
|
|
ReuseRangeEstimateForRef-3:
|
|
We're now considering a ref[or_null] access via
|
|
(t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
|
|
(same-as-above but with one cond replaced
|
|
with "t.keypart_i IS NULL")] (**)
|
|
|
|
Try re-using E(#rows) from "range" optimizer:
|
|
We can do so if "range" optimizer used the same intervals as
|
|
in (**). The intervals used by range optimizer may be not
|
|
available at this point (as "range" access might have chosen to
|
|
create quick select over another index), so we can't compare
|
|
them to (**). We'll make indirect judgements instead.
|
|
The sufficient conditions for re-use are:
|
|
(C1) All e_i in (**) are constants (if
|
|
this is not satisfied we have no way to know which ranges
|
|
will be actually scanned by 'ref' until we execute the
|
|
join)
|
|
(C2) max #key parts in 'range' access == K == max_key_part (this
|
|
is apparently a necessary requirement)
|
|
|
|
We also have a property that "range optimizer produces equal or
|
|
tighter set of scan intervals than ref(const) optimizer". Each
|
|
of the intervals in (**) are "tightest possible" intervals when
|
|
one limits itself to using keyparts 1..K (which we do in #2).
|
|
From here it follows that range access used either one, or
|
|
both of the (I1) and (I2) intervals:
|
|
|
|
(t.keypart1=c1 AND ... AND t.keypartK=eK) (I1)
|
|
(same-as-above but with one cond replaced
|
|
with "t.keypart_i IS NULL") (I2)
|
|
|
|
The remaining part is to exclude the situation where range
|
|
optimizer used one interval while we're considering
|
|
ref-or-null and looking for estimate for two intervals. This
|
|
is done by last limitation:
|
|
|
|
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
|
|
*/
|
|
if (table->opt_range_keys.is_set(key) &&
|
|
all_used_equalities_are_const && // (C1)
|
|
table->opt_range[key].key_parts == max_key_part && //(C2)
|
|
(table->opt_range[key].ranges ==
|
|
1 + MY_TEST(ref_or_null_part))) //(C3)
|
|
{
|
|
records= (double) table->opt_range[key].rows;
|
|
table->opt_range[key].get_costs(&tmp);
|
|
/*
|
|
TODO: Disable opt_range testing below for this range as we can
|
|
always use this ref instead.
|
|
*/
|
|
trace_access_idx.add("used_range_estimates", true);
|
|
goto got_cost2;
|
|
}
|
|
else
|
|
{
|
|
/* Check if we have statistic about the distribution */
|
|
if ((records= keyinfo->actual_rec_per_key(max_key_part-1)))
|
|
{
|
|
/*
|
|
Fix for the case where the index statistics is too
|
|
optimistic: If
|
|
(1) We're considering ref(const) and there is quick select
|
|
on the same index,
|
|
(2) and that quick select uses more keyparts (i.e. it will
|
|
scan equal/smaller interval then this ref(const))
|
|
(3) and E(#rows) for quick select is higher then our
|
|
estimate,
|
|
Then
|
|
We'll use E(#rows) from quick select.
|
|
|
|
Q: Why do we choose to use 'ref'? Won't quick select be
|
|
cheaper in some cases ?
|
|
TODO: figure this out and adjust the plan choice if needed.
|
|
*/
|
|
if (table->opt_range_keys.is_set(key))
|
|
{
|
|
double rows;
|
|
if (table->opt_range[key].key_parts >= max_key_part) // (2)
|
|
{
|
|
/*
|
|
Choose range over REF in the case range will always be
|
|
as good or better than REF.
|
|
This is the case when we have only one const range
|
|
and it consist of more parts than what we used for REF.
|
|
*/
|
|
if (all_used_equalities_are_const &&
|
|
table->opt_range[key].key_parts > max_key_part &&
|
|
table->opt_range[key].ranges <=
|
|
(uint) (1 + MY_TEST(ref_or_null_part)))
|
|
{
|
|
trace_access_idx.
|
|
add("chosen", false).
|
|
add("cause", "range is simple and more selective");
|
|
continue; // continue with next key
|
|
}
|
|
}
|
|
rows= (double) table->opt_range[key].rows;
|
|
if (all_used_equalities_are_const && // (1)
|
|
records < rows) // (3)
|
|
{
|
|
trace_access_idx.add("used_range_estimates",
|
|
"clipped up");
|
|
records= rows;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
trace_access_idx.add("rec_per_key_stats_missing", true);
|
|
/*
|
|
Assume that the first key part matches 1% of the file
|
|
and that the whole key matches 10 (duplicates) or 1
|
|
(unique) records.
|
|
Assume also that more key matches proportionally more
|
|
records
|
|
This gives the formula:
|
|
records = (x * (b-a) + a*c-b)/(c-1)
|
|
|
|
b = records matched by whole key
|
|
a = records matched by first key part (1% of all records?)
|
|
c = number of key parts in key
|
|
x = used key parts (1 <= x <= c)
|
|
*/
|
|
double rec_per_key;
|
|
if (!(rec_per_key=(double)
|
|
keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1]))
|
|
rec_per_key=(double) s->records/rec+1;
|
|
|
|
if (!s->records)
|
|
records= 0;
|
|
else if (rec_per_key/(double) s->records >= 0.01)
|
|
records= rec_per_key;
|
|
else
|
|
{
|
|
double a=s->records*0.01;
|
|
if (keyinfo->user_defined_key_parts > 1)
|
|
records= (max_key_part * (rec_per_key - a) +
|
|
a*keyinfo->user_defined_key_parts - rec_per_key)/
|
|
(keyinfo->user_defined_key_parts-1);
|
|
else
|
|
records= rows2double(s->records);
|
|
set_if_bigger(records, MIN_ROWS_AFTER_FILTERING);
|
|
}
|
|
}
|
|
|
|
if (ref_or_null_part)
|
|
{
|
|
/* We need to do two key searches to find row */
|
|
records *= 2.0;
|
|
extra_cost= s->table->file->KEY_LOOKUP_COST;
|
|
}
|
|
|
|
/*
|
|
ReuseRangeEstimateForRef-4: We get here if we could not reuse
|
|
E(#rows) from range optimizer. Make another try:
|
|
|
|
If range optimizer produced E(#rows) for a prefix of the ref
|
|
access we're considering, and that E(#rows) is lower then our
|
|
current estimate, make the adjustment.
|
|
|
|
The decision whether we can re-use the estimate from the range
|
|
optimizer is the same as in ReuseRangeEstimateForRef-3,
|
|
applied to first table->quick_key_parts[key] key parts.
|
|
*/
|
|
if (table->opt_range_keys.is_set(key) &&
|
|
table->opt_range[key].key_parts <= max_const_parts &&
|
|
table->opt_range[key].ranges == (1 +
|
|
MY_TEST(ref_or_null_part &
|
|
const_part)) &&
|
|
records > (double) table->opt_range[key].rows)
|
|
{
|
|
// psergey-merge-sept: remove: if (table->opt_range[key].key_parts <= max_const_parts)
|
|
{
|
|
trace_access_idx.add("used_range_estimates", true);
|
|
records= (double) table->opt_range[key].rows;
|
|
}
|
|
}
|
|
}
|
|
|
|
set_if_smaller(records, (double) s->records);
|
|
tmp= cost_for_index_read(thd, table, key, (ha_rows)records, 0);
|
|
tmp.copy_cost+= extra_cost;
|
|
}
|
|
else
|
|
{
|
|
if (!(found_part & 1))
|
|
cause= "no predicate for first keypart";
|
|
else
|
|
cause= "No full key found";
|
|
trace_access_idx.add("chosen", false).add("cause", cause);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
got_cost2:
|
|
loose_scan_opt.check_ref_access_part2(key, start_key, records,
|
|
file->cost(&tmp) + startup_cost,
|
|
found_ref);
|
|
} /* not ft_key */
|
|
|
|
if (records == DBL_MAX) // Key not usable
|
|
continue;
|
|
|
|
records_best_filter= records_after_filter= records;
|
|
|
|
/*
|
|
Check if we can use a filter.
|
|
Records can be 0 in case of empty tables.
|
|
*/
|
|
if ((found_part & 1) && records &&
|
|
table->can_use_rowid_filter(start_key->key))
|
|
{
|
|
/*
|
|
If we use filter F with selectivity s the the cost of fetching data
|
|
by key using this filter will be
|
|
cost_of_fetching_1_row * rows * s +
|
|
cost_of_fetching_1_key_tuple * rows * (1 - s) +
|
|
cost_of_1_lookup_into_filter * rows
|
|
Without using any filter the cost would be just
|
|
cost_of_fetching_1_row * rows
|
|
|
|
So the gain in access cost per row will be
|
|
cost_of_fetching_1_row * (1 - s) -
|
|
cost_of_fetching_1_key_tuple * (1 - s) -
|
|
cost_of_1_lookup_into_filter
|
|
=
|
|
(cost_of_fetching_1_row - cost_of_fetching_1_key_tuple) * (1 - s)
|
|
- cost_of_1_lookup_into_filter
|
|
|
|
Here we have:
|
|
cost_of_fetching_1_row = tmp/rows
|
|
cost_of_fetching_1_key_tuple = keyread_tmp/rows
|
|
Here's a more detailed explanation that uses the formulas behind
|
|
the function the call filter->get_adjusted_gain(). The function
|
|
takes as a parameter the number of probes/look-ups into the filter
|
|
that is equal to the number of fetched key entries that is equal to
|
|
the number of row fetches when no filter is used (assuming no
|
|
index condition pushdown is employed for the used key access).
|
|
Let this number be N. Then the total gain from using the filter is
|
|
N*a_adj - b where b is the cost of building the filter and
|
|
a_adj is calcilated as follows:
|
|
a - (1-access_cost_factor)*(1-s) =
|
|
(1+1_cond_eval_cost)*(1-s)-1_probe_cost - (1-access_cost_factor)*(1-s)
|
|
= (1-s)*(1_cond_eval_cost+access_cost_factor) - 1_probe_cost.
|
|
Here ((1-s)*(1_cond_eval_cost) * N is the gain from checking less
|
|
conditions pushed into the table, 1_probe_cost*N is the cost of the
|
|
probes and (1*s) * access_cost_factor * N must be the gain from
|
|
accessing less rows.
|
|
It does not matter how we calculate the cost of N full row fetches
|
|
cost_of_fetching_N_rows or
|
|
how we calculate the cost of fetching N key entries
|
|
cost_of_fetching_N_key_entries
|
|
the gain from less row fetches will be
|
|
(cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) * (1-s)
|
|
and this should be equal to (1*s) * access_cost_factor * N.
|
|
Thus access_cost_factor must be calculated as
|
|
(cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) / N.
|
|
|
|
For safety we clip cost_of_fetching_N_key_entries by the value
|
|
of cost_of_fetching_N_row though formally it's not necessary.
|
|
|
|
We cannot use filter with JT_EQ_REF as in this case 'tmp' is
|
|
number of rows from prev_record_read() and keyread_tmp is 0. These
|
|
numbers are not usable with rowid filter code.
|
|
*/
|
|
filter= table->best_range_rowid_filter(start_key->key,
|
|
records,
|
|
file->cost(&tmp),
|
|
file->cost(tmp.index_cost),
|
|
prev_record_count,
|
|
&records_best_filter);
|
|
set_if_smaller(best.records_out, records_best_filter);
|
|
|
|
if (filter)
|
|
filter= filter->apply_filter(thd, table, &tmp,
|
|
&records_after_filter,
|
|
&startup_cost,
|
|
1, prev_record_count);
|
|
}
|
|
|
|
/*
|
|
Take into account WHERE and setup cost.
|
|
We have to check the WHERE for all previous row combinations
|
|
(record_count).
|
|
'prev_record_count' is either 'record_count', or in case of
|
|
EQ_REF the estimated number of index_read() calls to the
|
|
engine when taking the one row read cache into account.
|
|
*/
|
|
copy_cost= (record_count * records_after_filter * WHERE_COST_THD(thd) +
|
|
startup_cost);
|
|
|
|
cur_cost= (file->cost_for_reading_multiple_times(prev_record_count,
|
|
&tmp) +
|
|
copy_cost);
|
|
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
{
|
|
if (prev_record_count != record_count)
|
|
trace_access_idx.add("prev_record_count", prev_record_count);
|
|
trace_access_idx.
|
|
add("rows", records_after_filter).
|
|
add("cost", cur_cost);
|
|
}
|
|
|
|
|
|
/*
|
|
The COST_EPS is here to ensure we use the first key if there are
|
|
two 'identical keys' that could be used.
|
|
*/
|
|
if (cur_cost + COST_EPS < best.cost)
|
|
{
|
|
trace_access_idx.add("chosen", true);
|
|
best.cost= cur_cost;
|
|
/*
|
|
We use 'records' instead of 'records_after_filter' here as we want
|
|
to have EXPLAIN print the number of rows found by the key access.
|
|
*/
|
|
best.records= records; // Records before filter!
|
|
best.records_read= records;
|
|
/*
|
|
If we are using 'use_cond_selectivity > 1' then
|
|
table_after_join_selectivity() may take into account other
|
|
filters that what is currently used so we have to use
|
|
records_after_filter. If 'use_cond_selectivity <= 1 then we
|
|
can use information from the best filter.
|
|
*/
|
|
best.records_after_filter= ((use_cond_selectivity > 1) ?
|
|
records_after_filter :
|
|
records_best_filter);
|
|
best.prev_record_reads= prev_record_count;
|
|
best.identical_keys= identical_keys;
|
|
best.key= start_key;
|
|
best.found_ref= found_ref;
|
|
best.max_key_part= max_key_part;
|
|
best.ref_depends_map= found_ref;
|
|
best.filter= filter;
|
|
best.type= type;
|
|
}
|
|
else if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_access_idx.
|
|
add("chosen", false).
|
|
add("cause", cause ? cause : "cost");
|
|
}
|
|
set_if_smaller(best.records_out, records);
|
|
} /* for each key */
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
No usable keys found. However, there may still be an option to use
|
|
"Range checked for each record" when all depending tables has
|
|
been read. s->key_dependent tells us which tables these could be and
|
|
s->key_start_dependent tells us if a first key part was used.
|
|
s->key_dependent may include more tables than could be used,
|
|
but this is ok as not having any usable keys is a rare thing and
|
|
the performance penalty for extra table bits is that
|
|
best_extension_by_limited_search() would not be able to prune tables
|
|
earlier.
|
|
Example query:
|
|
SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1
|
|
*/
|
|
if (s->key_start_dependent)
|
|
key_dependent= s->key_dependent;
|
|
|
|
/* Add dependency for sub queries */
|
|
key_dependent|= s->embedded_dependent;
|
|
|
|
} /* if (s->keyuse) */
|
|
|
|
|
|
/* Check that s->key_dependent contains all used_tables found in s->keyuse */
|
|
key_dependent&= ~PSEUDO_TABLE_BITS;
|
|
DBUG_ASSERT((key_dependent & (s->key_dependent | s->embedded_dependent)) ==
|
|
key_dependent);
|
|
|
|
/*
|
|
If there is no key to access the table, but there is an equi-join
|
|
predicate connecting the table with the privious tables then we
|
|
consider the possibility of using hash join.
|
|
We need also to check that:
|
|
(1) s is inner table of semi-join -> join cache is allowed for semijoins
|
|
(2) s is inner table of outer join -> join cache is allowed for outer joins
|
|
*/
|
|
if (idx > join->const_tables && best.key == 0 &&
|
|
(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
|
|
join->max_allowed_join_cache_level > 2 &&
|
|
!bitmap_is_clear_all(eq_join_set) && !disable_jbuf &&
|
|
(!s->emb_sj_nest ||
|
|
join->allowed_semijoin_with_cache) && // (1)
|
|
(!(table->map & join->outer_join) ||
|
|
join->allowed_outer_join_with_cache)) // (2)
|
|
{
|
|
Json_writer_object trace_access_hash(thd);
|
|
double refills, row_copy_cost, copy_cost, cur_cost, where_cost;
|
|
double matching_combinations, fanout= 0.0, join_sel;
|
|
trace_access_hash.add("type", "hash");
|
|
trace_access_hash.add("index", "hj-key");
|
|
/* Estimate the cost of the hash join access to the table */
|
|
double rnd_records;
|
|
bool stats_found= 0;
|
|
|
|
rnd_records= apply_selectivity_for_table(s, use_cond_selectivity);
|
|
DBUG_ASSERT(rnd_records <= rows2double(s->found_records) + 0.5);
|
|
DBUG_ASSERT(hj_start_key);
|
|
|
|
fanout= rnd_records;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY) &&
|
|
rnd_records > 0)
|
|
{
|
|
/*
|
|
Starting from this point, rnd_records should not be used anymore.
|
|
Use "fanout" for an estimate of # matching records.
|
|
*/
|
|
fanout= hash_join_fanout(join, s, remaining_tables, rnd_records,
|
|
hj_start_key, &stats_found);
|
|
set_if_smaller(best.records_out, fanout);
|
|
join_sel= 1.0;
|
|
}
|
|
if (!stats_found)
|
|
{
|
|
/*
|
|
No OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY or no field statistics
|
|
found.
|
|
|
|
Take into account if there is non constant constraints used with
|
|
earlier tables in the where expression.
|
|
If yes, this will set fanout to rnd_records/4.
|
|
We estimate that there will be HASH_FANOUT (10%)
|
|
hash matches / row.
|
|
*/
|
|
fanout= ((found_constraint) ?
|
|
use_found_constraint(rnd_records) :
|
|
rnd_records);
|
|
set_if_smaller(best.records_out, fanout * HASH_FANOUT);
|
|
join_sel= HASH_FANOUT;
|
|
}
|
|
|
|
/*
|
|
The following cost calculation is identical to the cost calculation for
|
|
the join cache later on, except for the HASH_FANOUT
|
|
*/
|
|
if (s->quick)
|
|
{
|
|
/*
|
|
Cost of reading rows through opt_range including comparing the rows
|
|
with the attached WHERE clause.
|
|
*/
|
|
cur_cost= s->quick->read_time;
|
|
}
|
|
else
|
|
cur_cost= s->cached_scan_and_compare_time;
|
|
|
|
/* We read the table as many times as join buffer becomes full. */
|
|
refills= (1.0 + floor((double) cache_record_length(join,idx) *
|
|
record_count /
|
|
(double) thd->variables.join_buff_size));
|
|
cur_cost= COST_MULT(cur_cost, refills);
|
|
|
|
|
|
/*
|
|
Cost of doing the hash lookup and check all matching rows with the
|
|
WHERE clause.
|
|
We assume here that, thanks to the hash, we don't have to compare all
|
|
row combinations, only a fanout or HASH_FANOUT (10%) rows in the cache.
|
|
*/
|
|
row_copy_cost= (ROW_COPY_COST_THD(thd) *
|
|
JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
|
|
matching_combinations= fanout * join_sel * record_count;
|
|
copy_cost= (record_count * row_copy_cost +
|
|
matching_combinations *
|
|
((idx - join->const_tables) * row_copy_cost));
|
|
where_cost= matching_combinations * WHERE_COST_THD(thd);
|
|
cur_cost= COST_ADD(cur_cost, copy_cost + where_cost);
|
|
|
|
best.cost= cur_cost;
|
|
best.records_read= best.records_after_filter= rows2double(s->records);
|
|
best.records= rnd_records; // Records after where (Legacy value)
|
|
best.key= hj_start_key;
|
|
best.ref_depends_map= 0;
|
|
best.use_join_buffer= TRUE;
|
|
best.filter= 0;
|
|
best.type= JT_HASH;
|
|
best.refills= double_to_ulonglong(ceil(refills));
|
|
if (unlikely(trace_access_hash.trace_started()))
|
|
trace_access_hash.
|
|
add("rows", rnd_records).
|
|
add("rows_after_hash", fanout * join_sel).
|
|
add("refills", refills).
|
|
add("jbuf_use_cost", copy_cost).
|
|
add("extra_cond_check_cost", where_cost).
|
|
add("total_cost", best.cost).
|
|
add("chosen", true);
|
|
}
|
|
|
|
/*
|
|
Don't test table scan if it can't be better.
|
|
Prefer key lookup if we would use the same key for scanning.
|
|
|
|
Don't do a table scan on InnoDB tables, if we can read the used
|
|
parts of the row from any of the used index.
|
|
This is because table scans uses index and we would not win
|
|
anything by using a table scan.
|
|
|
|
A word for word translation of the below if-statement in sergefp's
|
|
understanding: we check if we should use table scan if:
|
|
(1) The found 'ref' access produces more records than a table scan
|
|
(or index scan, or quick select), or 'ref' is more expensive than
|
|
any of them.
|
|
(2) This doesn't hold: the best way to perform table scan is to to perform
|
|
'range' access using index IDX, and the best way to perform 'ref'
|
|
access is to use the same index IDX, with the same or more key parts.
|
|
(note: it is not clear how this rule is/should be extended to
|
|
index_merge quick selects). Also if we have a hash join we prefer that
|
|
over a table scan. This heuristic doesn't apply if the quick select
|
|
uses the group-by min-max optimization.
|
|
(3) See above note about InnoDB.
|
|
(4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access
|
|
path, but there is no quick select)
|
|
If the condition in the above brackets holds, then the only possible
|
|
"table scan" access method is ALL/index (there is no quick select).
|
|
Since we have a 'ref' access path, and FORCE INDEX instructs us to
|
|
choose it over ALL/index, there is no need to consider a full table
|
|
scan.
|
|
(5) Non-flattenable semi-joins: don't consider doing a scan of temporary
|
|
table if we had an option to make lookups into it. In real-world cases,
|
|
lookups are cheaper than full scans, but when the table is small, they
|
|
can be [considered to be] more expensive, which causes lookups not to
|
|
be used for cases with small datasets, which is annoying.
|
|
*/
|
|
Json_writer_object trace_access_scan(thd);
|
|
if ((best.records_read >= s->found_records ||
|
|
best.cost > s->read_time) && // (1)
|
|
!(best.key && best.key->key == MAX_KEY) && // (2)
|
|
!(s->quick &&
|
|
s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2)
|
|
best.key && s->quick->index == best.key->key && // (2)
|
|
table->opt_range_keys.is_set(best.key->key) && // (2)
|
|
best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2)
|
|
!((file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
|
|
!table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3)
|
|
!(table->force_index_join && best.key && !s->quick) && // (4)
|
|
!(best.key && table->pos_in_table_list->jtbm_subselect)) // (5)
|
|
{ // Check full join
|
|
double records_after_filter, org_records;
|
|
double records_best_filter, cur_cost;
|
|
Range_rowid_filter_cost_info *filter= 0;
|
|
double startup_cost= s->startup_cost;
|
|
const char *scan_type= "";
|
|
enum join_type type;
|
|
uint forced_index= MAX_KEY;
|
|
bool force_plan= 0, use_join_buffer= 0;
|
|
ulonglong refills= 1;
|
|
ALL_READ_COST cost;
|
|
|
|
/*
|
|
Range optimizer never proposes a RANGE if it isn't better
|
|
than FULL: so if RANGE is present, it's always preferred to FULL.
|
|
Here we estimate its cost.
|
|
*/
|
|
|
|
if (s->quick)
|
|
{
|
|
/*
|
|
For each record we:
|
|
- read record range through 'quick'
|
|
- skip rows which does not satisfy WHERE constraints
|
|
*/
|
|
|
|
/*
|
|
Use record count from range optimizer.
|
|
This is done to make records found comparable to what we get with
|
|
'ref' access.
|
|
*/
|
|
org_records= records_after_filter= rows2double(s->found_records);
|
|
records_best_filter= org_records;
|
|
set_if_smaller(best.records_out, records_best_filter);
|
|
|
|
if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
|
|
{
|
|
uint key_no= s->quick->index;
|
|
TABLE::OPT_RANGE *range= &table->opt_range[key_no];
|
|
|
|
/*
|
|
Ensure that 'range' and 's' are coming from the same source
|
|
The complex 'double' comparison is there because floating point
|
|
registers complications when costs are calculated.
|
|
*/
|
|
DBUG_ASSERT(range->rows >= s->found_records);
|
|
DBUG_ASSERT((range->cost.total_cost() == 0.0 &&
|
|
s->quick->read_time == 0.0) ||
|
|
compare_cost(range->cost.total_cost(),
|
|
s->quick->read_time));
|
|
DBUG_ASSERT(compare_cost(range->cost.comp_cost,
|
|
range->rows * file->WHERE_COST));
|
|
|
|
/* Get range cost. This does not include cost of the WHERE */
|
|
range->get_costs(&cost);
|
|
/* Ensure that cost from opt_range are correct */
|
|
DBUG_ASSERT(compare_cost(file->cost_no_capping(&cost) +
|
|
range->cost.comp_cost +
|
|
range->cost.setup_cost,
|
|
s->quick->read_time));
|
|
|
|
if (table->can_use_rowid_filter(key_no))
|
|
{
|
|
filter= table->best_range_rowid_filter(key_no,
|
|
rows2double(range->rows),
|
|
file->cost(&cost),
|
|
file->cost(cost.index_cost),
|
|
record_count,
|
|
&records_best_filter);
|
|
set_if_smaller(best.records_out, records_best_filter);
|
|
if (filter)
|
|
{
|
|
filter= filter->apply_filter(thd, table, &cost,
|
|
&records_after_filter,
|
|
&startup_cost,
|
|
range->ranges,
|
|
record_count);
|
|
if (filter)
|
|
{
|
|
set_if_smaller(best.records_out, records_after_filter);
|
|
table->opt_range[key_no].selectivity= filter->selectivity;
|
|
}
|
|
}
|
|
}
|
|
if (best.key && key_no == best.key->key &&
|
|
!best.found_ref &&
|
|
best.max_key_part < table->opt_range[best.key->key].key_parts &&
|
|
table->opt_range[best.key->key].ranges == 1)
|
|
{
|
|
/*
|
|
Force to use range as it is using the 'best key' and using more
|
|
key parts (and thus will read less rows)
|
|
*/
|
|
force_plan= 1;
|
|
}
|
|
type= JT_RANGE;
|
|
/*
|
|
We cannot use range->cost.cmp_cost here as records_after_filter
|
|
is be different if filter is used.
|
|
*/
|
|
cost.copy_cost+= (records_after_filter * file->WHERE_COST +
|
|
range->cost.setup_cost);
|
|
}
|
|
else
|
|
{
|
|
type= JT_INDEX_MERGE;
|
|
/*
|
|
We don't know exactly from where the costs comes from.
|
|
Let's store it in copy_cost.
|
|
Note that s->quick->read_time includes the cost of comparing
|
|
the row with the where clause (WHERE_COST)
|
|
*/
|
|
cost.reset();
|
|
cost.copy_cost= s->quick->read_time;
|
|
}
|
|
loose_scan_opt.check_range_access(join, idx, s->quick);
|
|
}
|
|
else
|
|
{
|
|
double records_table_filter;
|
|
|
|
/* We will now calculate cost of scan, with or without join buffer */
|
|
records_best_filter= records_after_filter=
|
|
apply_selectivity_for_table(s, use_cond_selectivity);
|
|
records_table_filter= ((found_constraint) ?
|
|
use_found_constraint(records_after_filter) :
|
|
records_after_filter);
|
|
|
|
DBUG_ASSERT(records_after_filter <= s->records);
|
|
DBUG_ASSERT(records_after_filter <= s->found_records);
|
|
|
|
set_if_smaller(best.records_out, records_table_filter);
|
|
|
|
org_records= rows2double(s->records);
|
|
|
|
/* Estimate cost of reading table. */
|
|
if (s->cached_forced_index_type)
|
|
{
|
|
type= s->cached_forced_index_type;
|
|
cost= s->cached_forced_index_cost;
|
|
forced_index= s->cached_forced_index;
|
|
}
|
|
else
|
|
{
|
|
if (table->force_index_join && !best.key)
|
|
{
|
|
/*
|
|
The query is using 'forced_index' and we did not find a usable key.
|
|
Calculate cost of a table scan with the forced index.
|
|
*/
|
|
type= JT_NEXT;
|
|
if (s->cached_covering_key != MAX_KEY)
|
|
{
|
|
/* Use value from estimate_scan_time */
|
|
forced_index= s->cached_covering_key;
|
|
cost= s->cached_scan_and_compare_cost;
|
|
}
|
|
else
|
|
{
|
|
#ifdef FORCE_INDEX_SHOULD_FORCE_INDEX_SCAN
|
|
/* No cached key, use shortest allowed key */
|
|
key_map keys= *file->keys_to_use_for_scanning();
|
|
keys.intersect(table->keys_in_use_for_query);
|
|
if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY)
|
|
{
|
|
cost= cost_for_index_read(thd, table,
|
|
forced_index,
|
|
s->records, 0);
|
|
/* Calculate cost of checking the attached WHERE */
|
|
cost.copy_cost+= s->records * file->WHERE_COST;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
/* No usable key, use table scan */
|
|
cost= s->cached_scan_and_compare_cost;
|
|
type= JT_ALL;
|
|
}
|
|
}
|
|
}
|
|
else // table scan
|
|
{
|
|
cost= s->cached_scan_and_compare_cost;
|
|
type= JT_ALL;
|
|
}
|
|
/* Cache result for other calls */
|
|
s->cached_forced_index_type= type;
|
|
s->cached_forced_index_cost= cost;
|
|
s->cached_forced_index= forced_index;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Note: the condition checked here is very out of date and incorrect.
|
|
Below, we use a more accurate check when assigning the value of
|
|
best.use_join_buffer.
|
|
*/
|
|
if ((s->table->map & join->outer_join) || disable_jbuf)
|
|
{
|
|
/*
|
|
Simple scan
|
|
We estimate we have to read org_records rows.
|
|
records_after_filter rows will survive the where check of constants.
|
|
'best.records_out' rows will survive after the check against columns
|
|
from previous tables.
|
|
*/
|
|
scan_type= "scan";
|
|
|
|
/*
|
|
We have to compare each row set against all previous row combinations
|
|
*/
|
|
cur_cost= file->cost_for_reading_multiple_times(record_count,
|
|
&cost);
|
|
}
|
|
else
|
|
{
|
|
/* Scan trough join cache */
|
|
double cmp_time, row_copy_cost, tmp_refills;
|
|
|
|
/*
|
|
Note that the cost of checking all rows against the table specific
|
|
WHERE is already included in cur_cost.
|
|
*/
|
|
scan_type= "scan_with_join_cache";
|
|
|
|
/* Calculate cost of refills */
|
|
tmp_refills= (1.0 + floor((double) cache_record_length(join,idx) *
|
|
(record_count /
|
|
(double) thd->variables.join_buff_size)));
|
|
cur_cost= file->cost_for_reading_multiple_times(tmp_refills,
|
|
&cost);
|
|
refills= double_to_ulonglong(ceil(tmp_refills));
|
|
|
|
/* We come here only if there are already rows in the join cache */
|
|
DBUG_ASSERT(idx != join->const_tables);
|
|
/*
|
|
records_after_filter is the number of rows that have survived
|
|
the table specific WHERE check that only involves constants.
|
|
|
|
Calculate cost of:
|
|
- Copying all previous record combinations to the join cache
|
|
- Copying the tables from the join cache to table records
|
|
- Checking the WHERE against the final row combination
|
|
*/
|
|
row_copy_cost= (ROW_COPY_COST_THD(thd) *
|
|
JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
|
|
cmp_time= (record_count * row_copy_cost +
|
|
records_after_filter * record_count *
|
|
((idx - join->const_tables) * row_copy_cost +
|
|
WHERE_COST_THD(thd)));
|
|
cur_cost= COST_ADD(cur_cost, cmp_time);
|
|
use_join_buffer= 1;
|
|
}
|
|
|
|
/* Splitting technique cannot be used with join cache */
|
|
if (table->is_splittable())
|
|
startup_cost+= table->get_materialization_cost();
|
|
cur_cost+= startup_cost;
|
|
|
|
if (unlikely(trace_access_scan.trace_started()))
|
|
{
|
|
trace_access_scan.
|
|
add("access_type",
|
|
type == JT_ALL ? scan_type : join_type_str[type]);
|
|
if (type == JT_RANGE)
|
|
trace_access_scan.
|
|
add("range_index", table->key_info[s->quick->index].name);
|
|
trace_access_scan.
|
|
add("rows", org_records).
|
|
add("rows_after_filter", records_after_filter).
|
|
add("rows_out", best.records_out).
|
|
add("cost", cur_cost);
|
|
if (use_join_buffer)
|
|
trace_access_scan.
|
|
add("cost_without_join_buffer",
|
|
file->cost_for_reading_multiple_times(record_count, &cost));
|
|
if (type == JT_ALL)
|
|
{
|
|
trace_access_scan.add("index_only",
|
|
(s->cached_covering_key != MAX_KEY));
|
|
}
|
|
}
|
|
|
|
if (cur_cost + COST_EPS < best.cost || force_plan)
|
|
{
|
|
/*
|
|
If the table has a range (s->quick is set) make_join_select()
|
|
will ensure that this will be used
|
|
*/
|
|
best.cost= cur_cost;
|
|
best.records_read= org_records; // Records accessed
|
|
best.records= records_after_filter; // Records to be checked against
|
|
// previous row combinations
|
|
|
|
/*
|
|
If we are using 'use_cond_selectivity > 1' then
|
|
table_after_join_selectivity may take into account other
|
|
filters that what is currently used so we have to use
|
|
records_after_filter. If 'use_cond_selectivity <= 1 then we
|
|
can use information from the best filter.
|
|
*/
|
|
best.records_after_filter= ((use_cond_selectivity > 1) ?
|
|
records_after_filter :
|
|
records_best_filter);
|
|
best.key= 0;
|
|
best.forced_index= forced_index;
|
|
/*
|
|
filter is only set if
|
|
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE
|
|
*/
|
|
best.filter= filter;
|
|
/* range/index_merge/ALL/index access method are "independent", so: */
|
|
best.ref_depends_map= 0;
|
|
best.use_join_buffer= use_join_buffer ||
|
|
MY_TEST(!disable_jbuf &&
|
|
(join->allowed_outer_join_with_cache ||
|
|
!(s->table->map & join->outer_join)));
|
|
best.refills= refills;
|
|
best.spl_plan= 0;
|
|
best.type= type;
|
|
trace_access_scan.add("chosen", true);
|
|
}
|
|
else
|
|
trace_access_scan.add("chosen", false);
|
|
}
|
|
else
|
|
{
|
|
if (unlikely(trace_access_scan.trace_started()))
|
|
trace_access_scan.
|
|
add("type", "scan").
|
|
add("chosen", false).
|
|
add("cause", "cost");
|
|
}
|
|
|
|
crash_if_first_double_is_bigger(best.records_out, best.records);
|
|
crash_if_first_double_is_bigger(best.records_out, best.records_read);
|
|
|
|
/* Update the cost information for the current partial plan */
|
|
pos->loops= record_count;
|
|
pos->records_init= best.records_read;
|
|
pos->records_after_filter= best.records_after_filter;
|
|
pos->records_read= best.records;
|
|
pos->records_out= best.records_out;
|
|
pos->prev_record_reads= best.prev_record_reads;
|
|
pos->identical_keys= best.identical_keys;
|
|
pos->read_time= best.cost;
|
|
pos->key= best.key;
|
|
pos->forced_index= best.forced_index;
|
|
pos->type= best.type;
|
|
pos->table= s;
|
|
pos->ref_depend_map= best.ref_depends_map;
|
|
pos->loosescan_picker.loosescan_key= MAX_KEY;
|
|
pos->use_join_buffer= best.use_join_buffer;
|
|
pos->firstmatch_with_join_buf= 0;
|
|
pos->spl_plan= best.spl_plan;
|
|
pos->spl_pd_boundary= best.spl_plan ? spl_pd_boundary: 0;
|
|
pos->range_rowid_filter_info= best.filter;
|
|
pos->key_dependent= (best.type == JT_EQ_REF ? (table_map) 0 :
|
|
key_dependent & remaining_tables);
|
|
pos->refills= best.refills;
|
|
|
|
loose_scan_opt.save_to_position(s, record_count, pos->records_out,
|
|
loose_scan_pos);
|
|
|
|
if (!best.key &&
|
|
idx == join->const_tables && // First table
|
|
table == join->sort_by_table &&
|
|
join->unit->lim.get_select_limit() >= best.records) // QQQ Why?
|
|
{
|
|
trace_access_scan.add("use_tmp_table", true);
|
|
join->sort_by_table= (TABLE*) 1; // Must use temporary table
|
|
}
|
|
trace_access_scan.end();
|
|
trace_paths.end();
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
print_best_access_for_table(thd, pos);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Find JOIN_TAB's embedding (i.e, parent) subquery.
|
|
- For merged semi-joins, tables inside the semi-join nest have their
|
|
semi-join nest as parent. We intentionally ignore results of table
|
|
pullout action here.
|
|
- For non-merged semi-joins (JTBM tabs), the embedding subquery is the
|
|
JTBM join tab itself.
|
|
*/
|
|
|
|
static TABLE_LIST* get_emb_subq(JOIN_TAB *tab)
|
|
{
|
|
TABLE_LIST *tlist= tab->table->pos_in_table_list;
|
|
if (tlist->jtbm_subselect)
|
|
return tlist;
|
|
TABLE_LIST *embedding= tlist->embedding;
|
|
if (!embedding || !embedding->sj_subq_pred)
|
|
return NULL;
|
|
return embedding;
|
|
}
|
|
|
|
|
|
/*
|
|
Choose initial table order that "helps" semi-join optimizations.
|
|
|
|
The idea is that we should start with the order that is the same as the one
|
|
we would have had if we had semijoin=off:
|
|
- Top-level tables go first
|
|
- subquery tables are grouped together by the subquery they are in,
|
|
- subquery tables are attached where the subquery predicate would have been
|
|
attached if we had semi-join off.
|
|
|
|
This function relies on join_tab_cmp()/join_tab_cmp_straight() to produce
|
|
certain pre-liminary ordering, see compare_embedding_subqueries() for its
|
|
description.
|
|
*/
|
|
|
|
static void choose_initial_table_order(JOIN *join)
|
|
{
|
|
TABLE_LIST *emb_subq;
|
|
JOIN_TAB **tab= join->best_ref + join->const_tables;
|
|
JOIN_TAB **tabs_end= tab + join->table_count - join->const_tables;
|
|
DBUG_ENTER("choose_initial_table_order");
|
|
|
|
/* Find where the top-level JOIN_TABs end and subquery JOIN_TABs start */
|
|
for (; tab != tabs_end; tab++)
|
|
{
|
|
if ((emb_subq= get_emb_subq(*tab)))
|
|
break;
|
|
}
|
|
uint n_subquery_tabs= (uint)(tabs_end - tab);
|
|
|
|
if (!n_subquery_tabs)
|
|
DBUG_VOID_RETURN;
|
|
|
|
/* Copy the subquery JOIN_TABs to a separate array */
|
|
JOIN_TAB *subquery_tabs[MAX_TABLES];
|
|
memcpy(subquery_tabs, tab, sizeof(JOIN_TAB*) * n_subquery_tabs);
|
|
|
|
JOIN_TAB **last_top_level_tab= tab;
|
|
JOIN_TAB **subq_tab= subquery_tabs;
|
|
JOIN_TAB **subq_tabs_end= subquery_tabs + n_subquery_tabs;
|
|
TABLE_LIST *cur_subq_nest= NULL;
|
|
for (; subq_tab < subq_tabs_end; subq_tab++)
|
|
{
|
|
if (get_emb_subq(*subq_tab)!= cur_subq_nest)
|
|
{
|
|
/*
|
|
Reached the part of subquery_tabs that covers tables in some subquery.
|
|
*/
|
|
cur_subq_nest= get_emb_subq(*subq_tab);
|
|
|
|
/* Determine how many tables the subquery has */
|
|
JOIN_TAB **last_tab_for_subq;
|
|
for (last_tab_for_subq= subq_tab;
|
|
last_tab_for_subq < subq_tabs_end &&
|
|
get_emb_subq(*last_tab_for_subq) == cur_subq_nest;
|
|
last_tab_for_subq++) {}
|
|
uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab);
|
|
|
|
/*
|
|
Walk the original array and find where this subquery would have been
|
|
attached to
|
|
*/
|
|
table_map need_tables= cur_subq_nest->original_subq_pred_used_tables;
|
|
need_tables &= ~(join->const_table_map | PSEUDO_TABLE_BITS);
|
|
for (JOIN_TAB **top_level_tab= join->best_ref + join->const_tables;
|
|
top_level_tab < last_top_level_tab;
|
|
//top_level_tab < join->best_ref + join->table_count;
|
|
top_level_tab++)
|
|
{
|
|
need_tables &= ~(*top_level_tab)->table->map;
|
|
/* Check if this is the place where subquery should be attached */
|
|
if (!need_tables)
|
|
{
|
|
/* Move away the top-level tables that are after top_level_tab */
|
|
size_t top_tail_len= last_top_level_tab - top_level_tab - 1;
|
|
memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1,
|
|
sizeof(JOIN_TAB*)*top_tail_len);
|
|
last_top_level_tab += n_subquery_tables;
|
|
memcpy(top_level_tab + 1, subq_tab, sizeof(JOIN_TAB*)*n_subquery_tables);
|
|
break;
|
|
}
|
|
}
|
|
DBUG_ASSERT(!need_tables);
|
|
subq_tab += n_subquery_tables - 1;
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Selects and invokes a search strategy for an optimal query plan.
|
|
|
|
The function checks user-configurable parameters that control the search
|
|
strategy for an optimal plan, selects the search method and then invokes
|
|
it. Each specific optimization procedure stores the final optimal plan in
|
|
the array 'join->best_positions', and the cost of the plan in
|
|
'join->best_read'.
|
|
|
|
@param join pointer to the structure providing all context info for
|
|
the query
|
|
@param join_tables set of the tables in the query
|
|
@param emb_sjm_nest List of tables in case of materialized semi-join nest
|
|
|
|
@retval
|
|
FALSE ok
|
|
@retval
|
|
TRUE Fatal error
|
|
*/
|
|
|
|
bool
|
|
choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest)
|
|
{
|
|
uint search_depth= join->thd->variables.optimizer_search_depth;
|
|
uint use_cond_selectivity=
|
|
join->thd->variables.optimizer_use_condition_selectivity;
|
|
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
|
|
THD *thd= join->thd;
|
|
qsort2_cmp jtab_sort_func;
|
|
DBUG_ENTER("choose_plan");
|
|
|
|
join->limit_optimization_mode= false;
|
|
join->cur_embedding_map= 0;
|
|
join->extra_heuristic_pruning= false;
|
|
join->prune_level= join->thd->variables.optimizer_prune_level;
|
|
|
|
reset_nj_counters(join, join->join_list);
|
|
|
|
if ((join->emb_sjm_nest= emb_sjm_nest))
|
|
{
|
|
/* We're optimizing semi-join materialization nest, so put the
|
|
tables from this semi-join as first
|
|
*/
|
|
jtab_sort_func= join_tab_cmp_embedded_first;
|
|
/*
|
|
If we are searching for the execution plan of a materialized semi-join
|
|
nest then allowed_tables contains bits only for the tables from this
|
|
nest.
|
|
*/
|
|
join->allowed_tables= (emb_sjm_nest->sj_inner_tables &
|
|
~join->const_table_map);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
if (SELECT_STRAIGHT_JOIN option is set)
|
|
reorder tables so dependent tables come after tables they depend
|
|
on, otherwise keep tables in the order they were specified in the query
|
|
else
|
|
Apply heuristic: pre-sort all access plans with respect to the number
|
|
of records accessed.
|
|
*/
|
|
jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
|
|
join->allowed_tables= ~join->const_table_map;
|
|
}
|
|
|
|
/*
|
|
psergey-todo: if we're not optimizing an SJM nest,
|
|
- sort that outer tables are first, and each sjm nest follows
|
|
- then, put each [sjm_table1, ... sjm_tableN] sub-array right where
|
|
WHERE clause pushdown would have put it.
|
|
*/
|
|
my_qsort2(join->best_ref + join->const_tables,
|
|
join->table_count - join->const_tables, sizeof(JOIN_TAB*),
|
|
jtab_sort_func, (void*) emb_sjm_nest);
|
|
|
|
Json_writer_object wrapper(thd);
|
|
Json_writer_array trace_plan(thd,"considered_execution_plans");
|
|
|
|
if (!emb_sjm_nest)
|
|
choose_initial_table_order(join);
|
|
|
|
/*
|
|
Note: constant tables are already in the join prefix. We don't
|
|
put them into the cur_sj_inner_tables, though.
|
|
*/
|
|
|
|
join->cur_sj_inner_tables= 0;
|
|
|
|
if (straight_join)
|
|
{
|
|
optimize_straight_join(join, join_tables);
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(search_depth <= MAX_TABLES + 1);
|
|
if (search_depth == 0)
|
|
/* Automatically determine a reasonable value for 'search_depth' */
|
|
search_depth= determine_search_depth(join);
|
|
|
|
if (join->prune_level >= 1 &&
|
|
search_depth >= thd->variables.optimizer_extra_pruning_depth)
|
|
{
|
|
join->extra_heuristic_pruning= true;
|
|
}
|
|
|
|
double limit_cost= DBL_MAX;
|
|
double limit_record_count;
|
|
POSITION *limit_plan= NULL;
|
|
|
|
/* First, build a join plan that can short-cut ORDER BY...LIMIT */
|
|
if (join->limit_shortcut_applicable && !join->emb_sjm_nest)
|
|
{
|
|
bool res;
|
|
Json_writer_object wrapper(join->thd);
|
|
Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search");
|
|
join->limit_optimization_mode= true;
|
|
res= greedy_search(join, join_tables, search_depth,
|
|
use_cond_selectivity);
|
|
join->limit_optimization_mode= false;
|
|
|
|
if (res)
|
|
DBUG_RETURN(TRUE);
|
|
DBUG_ASSERT(join->best_read != DBL_MAX);
|
|
|
|
/*
|
|
We've built a join order. Adjust its cost based on ORDER BY...LIMIT
|
|
short-cutting.
|
|
*/
|
|
limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost);
|
|
limit_record_count= join->join_record_count;
|
|
}
|
|
|
|
/* The main call to search for the query plan: */
|
|
if (greedy_search(join, join_tables, search_depth, use_cond_selectivity))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
DBUG_ASSERT(join->best_read != DBL_MAX);
|
|
if (limit_plan && limit_cost < join->best_read)
|
|
{
|
|
/* Plan that uses ORDER BY ... LIMIT shortcutting is better. */
|
|
memcpy((uchar*)join->best_positions, (uchar*)limit_plan,
|
|
sizeof(POSITION)*join->table_count);
|
|
join->best_read= limit_cost;
|
|
join->join_record_count= limit_record_count;
|
|
}
|
|
}
|
|
|
|
join->emb_sjm_nest= 0;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
Compare two join tabs based on the subqueries they are from.
|
|
- top-level join tabs go first
|
|
- then subqueries are ordered by their select_id (we're using this
|
|
criteria because we need a cross-platform, deterministic ordering)
|
|
|
|
@return
|
|
0 - equal
|
|
-1 - jt1 < jt2
|
|
1 - jt1 > jt2
|
|
*/
|
|
|
|
static int compare_embedding_subqueries(JOIN_TAB *jt1, JOIN_TAB *jt2)
|
|
{
|
|
/* Determine if the first table is originally from a subquery */
|
|
TABLE_LIST *tbl1= jt1->table->pos_in_table_list;
|
|
uint tbl1_select_no;
|
|
if (tbl1->jtbm_subselect)
|
|
{
|
|
tbl1_select_no=
|
|
tbl1->jtbm_subselect->unit->first_select()->select_number;
|
|
}
|
|
else if (tbl1->embedding && tbl1->embedding->sj_subq_pred)
|
|
{
|
|
tbl1_select_no=
|
|
tbl1->embedding->sj_subq_pred->unit->first_select()->select_number;
|
|
}
|
|
else
|
|
tbl1_select_no= 1; /* Top-level */
|
|
|
|
/* Same for the second table */
|
|
TABLE_LIST *tbl2= jt2->table->pos_in_table_list;
|
|
uint tbl2_select_no;
|
|
if (tbl2->jtbm_subselect)
|
|
{
|
|
tbl2_select_no=
|
|
tbl2->jtbm_subselect->unit->first_select()->select_number;
|
|
}
|
|
else if (tbl2->embedding && tbl2->embedding->sj_subq_pred)
|
|
{
|
|
tbl2_select_no=
|
|
tbl2->embedding->sj_subq_pred->unit->first_select()->select_number;
|
|
}
|
|
else
|
|
tbl2_select_no= 1; /* Top-level */
|
|
|
|
/*
|
|
Put top-level tables in front. Tables from within subqueries must follow,
|
|
grouped by their owner subquery. We don't care about the order that
|
|
subquery groups are in, because choose_initial_table_order() will re-order
|
|
the groups.
|
|
*/
|
|
if (tbl1_select_no != tbl2_select_no)
|
|
return tbl1_select_no > tbl2_select_no ? 1 : -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Compare two JOIN_TAB objects based on the number of accessed records.
|
|
|
|
@param ptr1 pointer to first JOIN_TAB object
|
|
@param ptr2 pointer to second JOIN_TAB object
|
|
|
|
NOTES
|
|
The order relation implemented by join_tab_cmp() is not transitive,
|
|
i.e. it is possible to choose such a, b and c that (a < b) && (b < c)
|
|
but (c < a). This implies that result of a sort using the relation
|
|
implemented by join_tab_cmp() depends on the order in which
|
|
elements are compared, i.e. the result is implementation-specific.
|
|
Example:
|
|
a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0
|
|
b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838
|
|
c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0
|
|
|
|
As for subqueries, this function must produce order that can be fed to
|
|
choose_initial_table_order().
|
|
|
|
@retval
|
|
1 if first is bigger
|
|
@retval
|
|
-1 if second is bigger
|
|
@retval
|
|
0 if equal
|
|
*/
|
|
|
|
static int
|
|
join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
|
|
{
|
|
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
|
|
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
|
|
int cmp;
|
|
|
|
if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
|
|
return cmp;
|
|
/*
|
|
After that do ordering according to numbers of
|
|
records in the table.
|
|
*/
|
|
if (jt1->found_records > jt2->found_records)
|
|
return 1;
|
|
if (jt1->found_records < jt2->found_records)
|
|
return -1;
|
|
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
|
|
}
|
|
|
|
|
|
/**
|
|
Same as join_tab_cmp, but for use with SELECT_STRAIGHT_JOIN.
|
|
*/
|
|
|
|
static int
|
|
join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
|
|
{
|
|
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
|
|
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
|
|
|
|
/*
|
|
We don't do subquery flattening if the parent or child select has
|
|
STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
|
|
is hardly useful.
|
|
*/
|
|
DBUG_ASSERT(!jt1->emb_sj_nest);
|
|
DBUG_ASSERT(!jt2->emb_sj_nest);
|
|
|
|
int cmp;
|
|
if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
|
|
return cmp;
|
|
|
|
/*
|
|
We have to check dependency with straight_join as we don't reorder
|
|
later as we do for other plans in best_extension_by_limited_search().
|
|
*/
|
|
if (jt1->dependent & jt2->table->map)
|
|
return 1;
|
|
if (jt2->dependent & jt1->table->map)
|
|
return -1;
|
|
|
|
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
|
|
}
|
|
|
|
|
|
/*
|
|
Same as join_tab_cmp but tables from within the given semi-join nest go
|
|
first. Used when the optimizing semi-join materialization nests.
|
|
*/
|
|
|
|
static int
|
|
join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void* ptr2)
|
|
{
|
|
const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
|
|
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
|
|
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
|
|
|
|
if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
|
|
return -1;
|
|
if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
|
|
return 1;
|
|
|
|
if (jt1->found_records > jt2->found_records)
|
|
return 1;
|
|
if (jt1->found_records < jt2->found_records)
|
|
return -1;
|
|
|
|
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
|
|
}
|
|
|
|
|
|
/**
|
|
Heuristic procedure to automatically guess a reasonable degree of
|
|
exhaustiveness for the greedy search procedure.
|
|
|
|
The procedure estimates the optimization time and selects a search depth
|
|
big enough to result in a near-optimal QEP, that doesn't take too long to
|
|
find. If the number of tables in the query exceeds some constant, then
|
|
search_depth is set to this constant.
|
|
|
|
@param join pointer to the structure providing all context info for
|
|
the query
|
|
|
|
@note
|
|
This is an extremely simplistic implementation that serves as a stub for a
|
|
more advanced analysis of the join. Ideally the search depth should be
|
|
determined by learning from previous query optimizations, because it will
|
|
depend on the CPU power (and other factors).
|
|
|
|
@todo
|
|
this value should be determined dynamically, based on statistics:
|
|
uint max_tables_for_exhaustive_opt= 7;
|
|
|
|
@todo
|
|
this value could be determined by some mapping of the form:
|
|
depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
|
|
|
|
@return
|
|
A positive integer that specifies the search depth (and thus the
|
|
exhaustiveness) of the depth-first search algorithm used by
|
|
'greedy_search'.
|
|
*/
|
|
|
|
static uint
|
|
determine_search_depth(JOIN *join)
|
|
{
|
|
uint table_count= join->table_count - join->const_tables;
|
|
uint search_depth;
|
|
/* TODO: this value should be determined dynamically, based on statistics: */
|
|
uint max_tables_for_exhaustive_opt= 7;
|
|
|
|
if (table_count <= max_tables_for_exhaustive_opt)
|
|
search_depth= table_count+1; // use exhaustive for small number of tables
|
|
else
|
|
/*
|
|
TODO: this value could be determined by some mapping of the form:
|
|
depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
|
|
*/
|
|
search_depth= max_tables_for_exhaustive_opt; // use greedy search
|
|
|
|
return search_depth;
|
|
}
|
|
|
|
|
|
/**
|
|
Select the best ways to access the tables in a query without reordering them.
|
|
|
|
Find the best access paths for each query table and compute their costs
|
|
according to their order in the array 'join->best_ref' (thus without
|
|
reordering the join tables). The function calls sequentially
|
|
'best_access_path' for each table in the query to select the best table
|
|
access method. The final optimal plan is stored in the array
|
|
'join->best_positions', and the corresponding cost in 'join->best_read'.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param remaining_tables set of the tables in the query
|
|
|
|
@note
|
|
This function can be applied to:
|
|
- queries with STRAIGHT_JOIN
|
|
- internally to compute the cost of an arbitrary QEP
|
|
@par
|
|
Thus 'optimize_straight_join' can be used at any stage of the query
|
|
optimization process to finalize a QEP as it is.
|
|
*/
|
|
|
|
static void
|
|
optimize_straight_join(JOIN *join, table_map remaining_tables)
|
|
{
|
|
JOIN_TAB *s;
|
|
uint idx= join->const_tables;
|
|
bool disable_jbuf= join->thd->variables.join_cache_level == 0;
|
|
double record_count= 1.0;
|
|
double read_time= 0.0;
|
|
uint use_cond_selectivity=
|
|
join->thd->variables.optimizer_use_condition_selectivity;
|
|
POSITION loose_scan_pos;
|
|
THD *thd= join->thd;
|
|
|
|
for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
|
|
{
|
|
POSITION *position= join->positions + idx;
|
|
Json_writer_object trace_one_table(thd);
|
|
double original_record_count, current_record_count;
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
trace_plan_prefix(&trace_one_table, join, idx, remaining_tables);
|
|
/* Find the best access method from 's' to the current partial plan */
|
|
best_access_path(join, s, remaining_tables, join->positions, idx,
|
|
disable_jbuf, record_count,
|
|
position, &loose_scan_pos);
|
|
|
|
/* Compute the cost of the new plan extended with 's' */
|
|
current_record_count= COST_MULT(record_count, position->records_out);
|
|
read_time= COST_ADD(read_time, position->read_time);
|
|
original_record_count= current_record_count;
|
|
optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count,
|
|
&read_time, &loose_scan_pos);
|
|
if (position->sj_strategy != SJ_OPT_NONE && original_record_count)
|
|
{
|
|
/* Adjust records_out to contain the final number of rows */
|
|
double ratio= current_record_count / original_record_count;
|
|
if (ratio < 1)
|
|
{
|
|
position->records_out*= ratio;
|
|
}
|
|
if (unlikely(trace_one_table.trace_started()))
|
|
{
|
|
trace_one_table.
|
|
add("sj_rows_out", position->records_out).
|
|
add("sj_rows_for_plan", current_record_count).
|
|
add("sj_filtered", safe_filtered(position->records_out,
|
|
position->records_init));
|
|
}
|
|
}
|
|
|
|
remaining_tables&= ~(s->table->map);
|
|
if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
|
|
{
|
|
double pushdown_cond_selectivity, records_out;
|
|
pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s,
|
|
remaining_tables,
|
|
&records_out);
|
|
if (unlikely(thd->trace_started()) &&
|
|
pushdown_cond_selectivity != 1.0)
|
|
{
|
|
trace_one_table.
|
|
add("rows_out", records_out).
|
|
add("pushdown_cond_selectivity", pushdown_cond_selectivity).
|
|
add("filtered", safe_filtered(position->records_out,
|
|
position->records_init));
|
|
}
|
|
position->cond_selectivity= pushdown_cond_selectivity;
|
|
position->records_out= records_out;
|
|
current_record_count= COST_MULT(record_count, records_out);
|
|
}
|
|
else
|
|
position->cond_selectivity= 1.0;
|
|
|
|
position->partial_join_cardinality= current_record_count;
|
|
++idx;
|
|
record_count= current_record_count;
|
|
}
|
|
|
|
if (join->sort_by_table &&
|
|
join->sort_by_table != join->positions[join->const_tables].table->table)
|
|
{
|
|
/*
|
|
We may have to make a temp table, note that this is only a
|
|
heuristic since we cannot know for sure at this point if we
|
|
we are going to use addon fields or to have flush sorting to
|
|
disk. We also don't know the temporary table will be in memory
|
|
or disk.
|
|
The following calculation takes a middle ground where assume
|
|
we can sort the keys in memory but have to use a disk based
|
|
temporary table to retrive the rows.
|
|
This cost is probably much bigger than it has to be...
|
|
*/
|
|
double sort_cost;
|
|
sort_cost= (get_qsort_sort_cost((ha_rows)record_count, 0) +
|
|
record_count *
|
|
DISK_TEMPTABLE_LOOKUP_COST(thd));
|
|
{
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_one_table(thd);
|
|
trace_one_table.add("estimated_cost_for_sorting", sort_cost);
|
|
}
|
|
}
|
|
read_time= COST_ADD(read_time, sort_cost);
|
|
}
|
|
memcpy((uchar*) join->best_positions, (uchar*) join->positions,
|
|
sizeof(POSITION)*idx);
|
|
join->join_record_count= record_count;
|
|
join->best_read= read_time;
|
|
}
|
|
|
|
|
|
/**
|
|
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
|
|
|
|
The search procedure uses a hybrid greedy/exhaustive search with controlled
|
|
exhaustiveness. The search is performed in N = card(remaining_tables)
|
|
steps. Each step evaluates how promising is each of the unoptimized tables,
|
|
selects the most promising table, and extends the current partial QEP with
|
|
that table. Currenly the most 'promising' table is the one with least
|
|
expensive extension.\
|
|
|
|
There are two extreme cases:
|
|
-# When (card(remaining_tables) < search_depth), the estimate finds the
|
|
best complete continuation of the partial QEP. This continuation can be
|
|
used directly as a result of the search.
|
|
-# When (search_depth == 1) the 'best_extension_by_limited_search'
|
|
consideres the extension of the current QEP with each of the remaining
|
|
unoptimized tables.
|
|
|
|
All other cases are in-between these two extremes. Thus the parameter
|
|
'search_depth' controlls the exhaustiveness of the search. The higher the
|
|
value, the longer the optimization time and possibly the better the
|
|
resulting plan. The lower the value, the fewer alternative plans are
|
|
estimated, but the more likely to get a bad QEP.
|
|
|
|
All intermediate and final results of the procedure are stored in 'join':
|
|
- join->positions : modified for every partial QEP that is explored
|
|
- join->best_positions: modified for the current best complete QEP
|
|
- join->best_read : modified for the current best complete QEP
|
|
- join->best_ref : might be partially reordered
|
|
|
|
The final optimal plan is stored in 'join->best_positions', and its
|
|
corresponding cost in 'join->best_read'.
|
|
|
|
@note
|
|
The following pseudocode describes the algorithm of 'greedy_search':
|
|
|
|
@code
|
|
procedure greedy_search
|
|
input: remaining_tables
|
|
output: pplan;
|
|
{
|
|
pplan = <>;
|
|
do {
|
|
(t, a) = best_extension(pplan, remaining_tables);
|
|
pplan = concat(pplan, (t, a));
|
|
remaining_tables = remaining_tables - t;
|
|
} while (remaining_tables != {})
|
|
return pplan;
|
|
}
|
|
|
|
@endcode
|
|
where 'best_extension' is a placeholder for a procedure that selects the
|
|
most "promising" of all tables in 'remaining_tables'.
|
|
Currently this estimate is performed by calling
|
|
'best_extension_by_limited_search' to evaluate all extensions of the
|
|
current QEP of size 'search_depth', thus the complexity of 'greedy_search'
|
|
mainly depends on that of 'best_extension_by_limited_search'.
|
|
|
|
@par
|
|
If 'best_extension()' == 'best_extension_by_limited_search()', then the
|
|
worst-case complexity of this algorithm is <=
|
|
O(N*N^search_depth/search_depth). When serch_depth >= N, then the
|
|
complexity of greedy_search is O(N!).
|
|
|
|
@par
|
|
In the future, 'greedy_search' might be extended to support other
|
|
implementations of 'best_extension', e.g. some simpler quadratic procedure.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param remaining_tables set of tables not included into the partial plan yet
|
|
@param search_depth controlls the exhaustiveness of the search
|
|
@param use_cond_selectivity specifies how the selectivity of the conditions
|
|
pushed to a table should be taken into account
|
|
|
|
@retval
|
|
FALSE ok
|
|
@retval
|
|
TRUE Fatal error
|
|
*/
|
|
|
|
static bool
|
|
greedy_search(JOIN *join,
|
|
table_map remaining_tables,
|
|
uint search_depth,
|
|
uint use_cond_selectivity)
|
|
{
|
|
double record_count= 1.0;
|
|
double read_time= 0.0;
|
|
uint idx= join->const_tables; // index into 'join->best_ref'
|
|
uint best_idx;
|
|
uint size_remain; // cardinality of remaining_tables
|
|
table_map usable_tables, eq_ref_tables;
|
|
POSITION best_pos;
|
|
JOIN_TAB *best_table; // the next plan node to be added to the curr QEP
|
|
// ==join->tables or # tables in the sj-mat nest we're optimizing
|
|
uint n_tables __attribute__((unused));
|
|
DBUG_ENTER("greedy_search");
|
|
DBUG_ASSERT(!(remaining_tables & join->const_table_map));
|
|
|
|
/* number of tables that remain to be optimized */
|
|
usable_tables= (join->emb_sjm_nest ?
|
|
(join->emb_sjm_nest->sj_inner_tables &
|
|
~join->const_table_map & remaining_tables):
|
|
remaining_tables);
|
|
n_tables= size_remain= my_count_bits(usable_tables);
|
|
|
|
join->next_sort_position= join->sort_positions;
|
|
do {
|
|
/*
|
|
Find the extension of the current QEP with the lowest cost
|
|
We are using remaining_table instead of usable tables here as
|
|
in case of an emb_sjm_nest, we want to be able to check if
|
|
an embedded table is depending on an outer table.
|
|
*/
|
|
join->best_read= DBL_MAX;
|
|
if ((int) best_extension_by_limited_search(join, remaining_tables, idx,
|
|
record_count,
|
|
read_time, search_depth,
|
|
use_cond_selectivity,
|
|
&eq_ref_tables) <
|
|
(int) SEARCH_OK)
|
|
DBUG_RETURN(TRUE);
|
|
/*
|
|
'best_read < DBL_MAX' means that optimizer managed to find
|
|
some plan and updated 'best_positions' array accordingly.
|
|
*/
|
|
DBUG_ASSERT(join->best_read < DBL_MAX);
|
|
|
|
if (size_remain <= search_depth)
|
|
{
|
|
/*
|
|
'join->best_positions' contains a complete optimal extension of the
|
|
current partial QEP.
|
|
*/
|
|
DBUG_EXECUTE("opt", print_plan(join, n_tables,
|
|
record_count, read_time, read_time,
|
|
"optimal"););
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/* select the first table in the optimal extension as most promising */
|
|
best_pos= join->best_positions[idx];
|
|
best_table= best_pos.table;
|
|
/*
|
|
Each subsequent loop of 'best_extension_by_limited_search' uses
|
|
'join->positions' for cost estimates, therefore we have to update its
|
|
value.
|
|
*/
|
|
join->positions[idx]= best_pos;
|
|
|
|
/*
|
|
Update the interleaving state after extending the current partial plan
|
|
with a new table.
|
|
We are doing this here because best_extension_by_limited_search reverts
|
|
the interleaving state to the one of the non-extended partial plan
|
|
on exit.
|
|
*/
|
|
bool is_interleave_error __attribute__((unused))=
|
|
check_interleaving_with_nj (best_table);
|
|
/* This has been already checked by best_extension_by_limited_search */
|
|
DBUG_ASSERT(!is_interleave_error);
|
|
|
|
/*
|
|
Also, update the semi-join optimization state. Information about the
|
|
picked semi-join operation is in best_pos->...picker, but we need to
|
|
update the global state in the JOIN object, too.
|
|
*/
|
|
if (!join->emb_sjm_nest)
|
|
update_sj_state(join, best_table, idx, remaining_tables);
|
|
|
|
/* find the position of 'best_table' in 'join->best_ref' */
|
|
best_idx= idx;
|
|
JOIN_TAB *pos= join->best_ref[best_idx];
|
|
while (pos && best_table != pos)
|
|
pos= join->best_ref[++best_idx];
|
|
DBUG_ASSERT((pos != NULL)); // should always find 'best_table'
|
|
|
|
/*
|
|
Move 'best_table' at the first free position in the array of joins
|
|
We don't need to keep the array sorted as
|
|
best_extension_by_limited_search() will sort them.
|
|
*/
|
|
swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]);
|
|
|
|
/* compute the cost of the new plan extended with 'best_table' */
|
|
record_count= COST_MULT(record_count, join->positions[idx].records_read);
|
|
read_time= COST_ADD(read_time, join->positions[idx].read_time);
|
|
|
|
remaining_tables&= ~(best_table->table->map);
|
|
--size_remain;
|
|
++idx;
|
|
|
|
DBUG_EXECUTE("opt", print_plan(join, idx,
|
|
record_count, read_time, read_time,
|
|
"extended"););
|
|
} while (TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
Get cost of execution and fanout produced by selected tables in the join
|
|
prefix (where prefix is defined as prefix in depth-first traversal)
|
|
|
|
@param end_tab_idx The number of last tab to be taken into
|
|
account (in depth-first traversal prefix)
|
|
@param filter_map Bitmap of tables whose cost/fanout are to
|
|
be taken into account.
|
|
@param read_time_arg [out] store read time here
|
|
@param record_count_arg [out] store record count here
|
|
|
|
@note
|
|
|
|
@returns
|
|
read_time_arg and record_count_arg contain the computed cost and fanout
|
|
*/
|
|
|
|
void JOIN::get_partial_cost_and_fanout(int end_tab_idx,
|
|
table_map filter_map,
|
|
double *read_time_arg,
|
|
double *record_count_arg)
|
|
{
|
|
double record_count= 1;
|
|
double read_time= 0.0;
|
|
double sj_inner_fanout= 1.0;
|
|
JOIN_TAB *end_tab= NULL;
|
|
JOIN_TAB *tab;
|
|
int i;
|
|
int last_sj_table= MAX_TABLES;
|
|
|
|
/*
|
|
Handle a special case where the join is degenerate, and produces no
|
|
records
|
|
*/
|
|
if (table_count == const_tables)
|
|
{
|
|
*read_time_arg= 0.0;
|
|
/*
|
|
We return 1, because
|
|
- it is the pessimistic estimate (there might be grouping)
|
|
- it's safer, as we're less likely to hit the edge cases in
|
|
calculations.
|
|
*/
|
|
*record_count_arg=1.0;
|
|
return;
|
|
}
|
|
|
|
for (tab= first_depth_first_tab(this), i= const_tables;
|
|
tab;
|
|
tab= next_depth_first_tab(this, tab), i++)
|
|
{
|
|
end_tab= tab;
|
|
if (i == end_tab_idx)
|
|
break;
|
|
}
|
|
|
|
for (tab= first_depth_first_tab(this), i= const_tables;
|
|
;
|
|
tab= next_depth_first_tab(this, tab), i++)
|
|
{
|
|
if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab)
|
|
{
|
|
/*
|
|
We've entered the SJM nest that contains the end_tab. The caller is
|
|
- interested in fanout inside the nest (because that's how many times
|
|
we'll invoke the attached WHERE conditions)
|
|
- not interested in cost
|
|
*/
|
|
record_count= 1.0;
|
|
read_time= 0.0;
|
|
}
|
|
|
|
/*
|
|
Ignore fanout (but not cost) from sj-inner tables, as long as
|
|
the range that processes them finishes before the end_tab
|
|
*/
|
|
if (tab->sj_strategy != SJ_OPT_NONE)
|
|
{
|
|
sj_inner_fanout= 1.0;
|
|
last_sj_table= i + tab->n_sj_tables;
|
|
}
|
|
|
|
table_map cur_table_map;
|
|
if (tab->table)
|
|
cur_table_map= tab->table->map;
|
|
else
|
|
{
|
|
/* This is a SJ-Materialization nest. Check all of its tables */
|
|
TABLE *first_child= tab->bush_children->start->table;
|
|
TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding;
|
|
cur_table_map= sjm_nest->nested_join->used_tables;
|
|
}
|
|
if (tab->records_read && (cur_table_map & filter_map))
|
|
{
|
|
record_count= COST_MULT(record_count, tab->records_read);
|
|
read_time= COST_ADD(read_time, tab->read_time);
|
|
if (tab->emb_sj_nest)
|
|
sj_inner_fanout= COST_MULT(sj_inner_fanout, tab->records_read);
|
|
}
|
|
|
|
if (i == last_sj_table)
|
|
{
|
|
record_count /= sj_inner_fanout;
|
|
sj_inner_fanout= 1.0;
|
|
last_sj_table= MAX_TABLES;
|
|
}
|
|
|
|
if (tab == end_tab)
|
|
break;
|
|
}
|
|
*read_time_arg= read_time;
|
|
*record_count_arg= record_count;
|
|
}
|
|
|
|
|
|
/*
|
|
Get prefix cost and fanout. This function is different from
|
|
get_partial_cost_and_fanout:
|
|
- it operates on a JOIN that haven't yet finished its optimization phase (in
|
|
particular, fix_semijoin_strategies_for_picked_join_order() and
|
|
get_best_combination() haven't been called)
|
|
- it assumes the the join prefix doesn't have any semi-join plans
|
|
|
|
These assumptions are met by the caller of the function.
|
|
*/
|
|
|
|
void JOIN::get_prefix_cost_and_fanout(uint n_tables,
|
|
double *read_time_arg,
|
|
double *record_count_arg)
|
|
{
|
|
double record_count= 1;
|
|
double read_time= 0.0;
|
|
for (uint i= const_tables; i < n_tables + const_tables ; i++)
|
|
{
|
|
if (best_positions[i].records_read)
|
|
{
|
|
record_count= COST_MULT(record_count, best_positions[i].records_read);
|
|
read_time= COST_ADD(read_time, best_positions[i].read_time);
|
|
}
|
|
}
|
|
*read_time_arg= read_time;
|
|
*record_count_arg= record_count;
|
|
}
|
|
|
|
|
|
/**
|
|
Estimate the number of rows that query execution will read.
|
|
|
|
@todo This is a very pessimistic upper bound. Use join selectivity
|
|
when available to produce a more realistic number.
|
|
*/
|
|
|
|
double JOIN::get_examined_rows()
|
|
{
|
|
double examined_rows;
|
|
double prev_fanout= 1;
|
|
double records;
|
|
JOIN_TAB *tab= first_breadth_first_tab();
|
|
JOIN_TAB *prev_tab= tab;
|
|
|
|
records= (double)tab->get_examined_rows();
|
|
|
|
while ((tab= next_breadth_first_tab(first_breadth_first_tab(),
|
|
top_join_tab_count, tab)))
|
|
{
|
|
prev_fanout= COST_MULT(prev_fanout, prev_tab->records_read);
|
|
records=
|
|
COST_ADD(records,
|
|
COST_MULT((double) (tab->get_examined_rows()), prev_fanout));
|
|
prev_tab= tab;
|
|
}
|
|
examined_rows= records;
|
|
return examined_rows;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Get the selectivity of equalities between columns when joining a table
|
|
|
|
@param join The optimized join
|
|
@param idx The number of tables in the evaluated partual join
|
|
@param s The table to be joined for evaluation
|
|
@param rem_tables The bitmap of tables to be joined later
|
|
@param keyparts The number of key parts to used when joining s
|
|
@param ref_keyuse_steps Array of references to keyuses employed to join s
|
|
*/
|
|
|
|
static
|
|
double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|
table_map rem_tables, uint keyparts,
|
|
uint16 *ref_keyuse_steps)
|
|
{
|
|
double sel= 1.0;
|
|
COND_EQUAL *cond_equal= join->cond_equal;
|
|
|
|
if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse)
|
|
return sel;
|
|
|
|
Item_equal *item_equal;
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
TABLE *table= s->table;
|
|
table_map table_bit= table->map;
|
|
POSITION *pos= &join->positions[idx];
|
|
|
|
while ((item_equal= it++))
|
|
{
|
|
/*
|
|
Check whether we need to take into account the selectivity of
|
|
multiple equality item_equal. If this is the case multiply
|
|
the current value of sel by this selectivity
|
|
*/
|
|
table_map used_tables= item_equal->used_tables();
|
|
if (!(used_tables & table_bit))
|
|
continue;
|
|
if (item_equal->get_const())
|
|
continue;
|
|
|
|
bool adjust_sel= FALSE;
|
|
Item_equal_fields_iterator fi(*item_equal);
|
|
while((fi++) && !adjust_sel)
|
|
{
|
|
Field *fld= fi.get_curr_field();
|
|
if (fld->table->map != table_bit)
|
|
continue;
|
|
if (pos->key == 0)
|
|
adjust_sel= TRUE;
|
|
else
|
|
{
|
|
uint i;
|
|
KEYUSE *keyuse= pos->key;
|
|
uint key= keyuse->key;
|
|
for (i= 0; i < keyparts; i++)
|
|
{
|
|
if (i > 0)
|
|
keyuse+= ref_keyuse_steps[i-1];
|
|
uint fldno;
|
|
if (is_hash_join_key_no(key))
|
|
fldno= keyuse->keypart;
|
|
else
|
|
fldno= table->key_info[key].key_part[i].fieldnr - 1;
|
|
if (fld->field_index == fldno)
|
|
break;
|
|
}
|
|
keyuse= pos->key;
|
|
|
|
if (i == keyparts)
|
|
{
|
|
/*
|
|
Field fld is included in multiple equality item_equal
|
|
and is not a part of the ref key.
|
|
The selectivity of the multiple equality must be taken
|
|
into account unless one of the ref arguments is
|
|
equal to fld.
|
|
*/
|
|
adjust_sel= TRUE;
|
|
for (uint j= 0; j < keyparts && adjust_sel; j++)
|
|
{
|
|
if (j > 0)
|
|
keyuse+= ref_keyuse_steps[j-1];
|
|
Item *ref_item= keyuse->val;
|
|
if (ref_item->real_item()->type() == Item::FIELD_ITEM)
|
|
{
|
|
Item_field *field_item= (Item_field *) (ref_item->real_item());
|
|
if (item_equal->contains(field_item->field))
|
|
adjust_sel= FALSE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (adjust_sel)
|
|
{
|
|
/*
|
|
If ref == 0 and there are no fields in the multiple equality
|
|
item_equal that belong to the tables joined prior to s
|
|
then the selectivity of multiple equality will be set to 1.0.
|
|
*/
|
|
double eq_fld_sel= 1.0;
|
|
fi.rewind();
|
|
while ((fi++))
|
|
{
|
|
double curr_eq_fld_sel;
|
|
Field *fld= fi.get_curr_field();
|
|
if (!(fld->table->map & ~(table_bit | rem_tables)))
|
|
continue;
|
|
curr_eq_fld_sel= get_column_avg_frequency(fld) /
|
|
fld->table->stat_records();
|
|
if (curr_eq_fld_sel < 1.0)
|
|
set_if_bigger(eq_fld_sel, curr_eq_fld_sel);
|
|
}
|
|
sel*= eq_fld_sel;
|
|
}
|
|
}
|
|
return sel;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Get the selectivity of conditions when joining a table
|
|
|
|
@param join The optimized join
|
|
@param s The table to be joined for evaluation
|
|
@param rem_tables The bitmap of tables to be joined later
|
|
@param new_records_out OUT Set to number of rows accepted
|
|
|
|
@detail
|
|
Get selectivity of conditions that can be applied when joining this table
|
|
with previous tables.
|
|
|
|
For quick selects and full table scans, selectivity of COND(this_table)
|
|
is accounted for in apply_selectivity_for_table(). Here, we only count
|
|
selectivity of COND(this_table, previous_tables).
|
|
|
|
For other access methods, we need to calculate selectivity of the whole
|
|
condition, "COND(this_table) AND COND(this_table, previous_tables)".
|
|
|
|
@retval
|
|
selectivity of the conditions imposed on the rows of s related to
|
|
the rows that we are expected to read (position->records_init).
|
|
*/
|
|
|
|
static
|
|
double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|
table_map rem_tables,
|
|
double *new_records_out)
|
|
{
|
|
uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
|
|
uint ref_keyuse_size= MAX_REF_PARTS;
|
|
uint16 *ref_keyuse_steps= ref_keyuse_steps_buf;
|
|
Field *field;
|
|
TABLE *table= s->table;
|
|
MY_BITMAP *read_set= table->read_set;
|
|
POSITION *pos= &join->positions[idx];
|
|
double sel, records_out= pos->records_out;
|
|
uint keyparts= 0;
|
|
uint found_part_ref_or_null= 0;
|
|
|
|
if (pos->key != 0)
|
|
{
|
|
sel= table->cond_selectivity;
|
|
/*
|
|
A ref access or hash join is used for this table. ref access is created
|
|
from
|
|
|
|
tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ...
|
|
|
|
and it will only return rows for which this condition is satisified.
|
|
Suppose, certain expr{i} is a constant. Since ref access only returns
|
|
rows that satisfy
|
|
|
|
tbl.keypart{i}=const (*)
|
|
|
|
then selectivity of this equality should not be counted in return value
|
|
of this function. This function uses the value of
|
|
|
|
table->cond_selectivity=selectivity(COND(tbl)) (**)
|
|
|
|
as a starting point. This value includes selectivity of equality (*). We
|
|
should somehow discount it.
|
|
|
|
Looking at calculate_cond_selectivity_for_table(), one can see that that
|
|
the value is not necessarily a direct multiplicand in
|
|
table->cond_selectivity
|
|
|
|
There are three possible ways to discount
|
|
1. There is a potential range access on t.keypart{i}=const.
|
|
(an important special case: the used ref access has a const prefix for
|
|
which a range estimate is available)
|
|
|
|
2. The field has a histogram. field[x]->cond_selectivity has the data.
|
|
|
|
3. Use index stats on this index:
|
|
rec_per_key[key_part+1]/rec_per_key[key_part]
|
|
|
|
(TODO: more details about the "t.key=othertable.col" case)
|
|
*/
|
|
KEYUSE *keyuse= pos->key;
|
|
KEYUSE *prev_ref_keyuse= keyuse;
|
|
uint key= keyuse->key;
|
|
bool used_range_selectivity= false;
|
|
|
|
/*
|
|
Check if we have a prefix of key=const that matches a quick select.
|
|
*/
|
|
if (!is_hash_join_key_no(key) && table->opt_range_keys.is_set(key))
|
|
{
|
|
key_part_map quick_key_map= (key_part_map(1) <<
|
|
table->opt_range[key].key_parts) - 1;
|
|
if (s->type == JT_RANGE ||
|
|
(table->opt_range[key].rows && (table->const_key_parts[key] & 1)))
|
|
{
|
|
/*
|
|
We are either using a range or we are using a REF which the
|
|
same key as an active range and the first key part is a constant.
|
|
|
|
In both cases we have to discount the selectivity for the range
|
|
as otherwise we are using the selectivity twice.
|
|
*/
|
|
for (; quick_key_map & 1 ; quick_key_map>>= 1)
|
|
{
|
|
while (keyuse->table == table && keyuse->key == key &&
|
|
keyuse->keypart == keyparts)
|
|
{
|
|
keyuse++;
|
|
}
|
|
keyparts++;
|
|
}
|
|
/*
|
|
Here we discount selectivity of the constant range CR. To calculate
|
|
this selectivity we use elements from the quick_rows[] array.
|
|
If we have indexes i1,...,ik with the same prefix compatible
|
|
with CR any of the estimate quick_rows[i1], ... quick_rows[ik] could
|
|
be used for this calculation but here we don't know which one was
|
|
actually used. So sel could be greater than 1 and we have to cap it.
|
|
However if sel becomes greater than 2 then with high probability
|
|
something went wrong.
|
|
*/
|
|
DBUG_ASSERT(sel <= 1.0);
|
|
DBUG_ASSERT(table->opt_range[key].rows <=
|
|
(double) table->stat_records());
|
|
sel /= ((double) table->opt_range[key].rows /
|
|
(double) table->stat_records());
|
|
set_if_smaller(sel, 1.0);
|
|
used_range_selectivity= true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Go through the "keypart{N}=..." equalities and find those that were
|
|
already taken into account in table->cond_selectivity.
|
|
*/
|
|
keyuse= pos->key;
|
|
keyparts=0;
|
|
while (keyuse->table == table && keyuse->key == key)
|
|
{
|
|
if (!(keyuse->used_tables & (rem_tables | table->map)))
|
|
{
|
|
if (are_tables_local(s, keyuse->val->used_tables()))
|
|
{
|
|
if (is_hash_join_key_no(key))
|
|
{
|
|
if (keyparts == keyuse->keypart)
|
|
keyparts++;
|
|
}
|
|
else
|
|
{
|
|
if (keyparts == keyuse->keypart &&
|
|
!((keyuse->val->used_tables()) & ~pos->ref_depend_map) &&
|
|
!(found_part_ref_or_null & keyuse->optimize))
|
|
{
|
|
/* Found a KEYUSE object that will be used by ref access */
|
|
keyparts++;
|
|
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
|
|
}
|
|
}
|
|
|
|
if (keyparts > keyuse->keypart)
|
|
{
|
|
/* Ok this is the keyuse that will be used for ref access */
|
|
if (!used_range_selectivity && keyuse->val->const_item())
|
|
{
|
|
uint fldno;
|
|
if (is_hash_join_key_no(key))
|
|
fldno= keyuse->keypart;
|
|
else
|
|
fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
|
|
|
|
if (table->field[fldno]->cond_selectivity > 0)
|
|
{
|
|
sel /= table->field[fldno]->cond_selectivity;
|
|
set_if_smaller(sel, 1.0);
|
|
}
|
|
/*
|
|
TODO: we could do better here:
|
|
1. cond_selectivity might be =1 (the default) because quick
|
|
select on some index prevented us from analyzing
|
|
histogram for this column.
|
|
2. we could get an estimate through this?
|
|
rec_per_key[key_part-1] / rec_per_key[key_part]
|
|
*/
|
|
}
|
|
if (keyparts > 1)
|
|
{
|
|
/*
|
|
Prepare to set ref_keyuse_steps[keyparts-2]: resize the array
|
|
if it is not large enough
|
|
*/
|
|
if (keyparts - 2 >= ref_keyuse_size)
|
|
{
|
|
uint new_size= MY_MAX(ref_keyuse_size*2, keyparts);
|
|
void *new_buf;
|
|
if (!(new_buf= my_malloc(PSI_INSTRUMENT_ME,
|
|
sizeof(*ref_keyuse_steps)*new_size,
|
|
MYF(0))))
|
|
{
|
|
sel= 1.0; // As if no selectivity was computed
|
|
goto exit;
|
|
}
|
|
memcpy(new_buf, ref_keyuse_steps,
|
|
sizeof(*ref_keyuse_steps)*ref_keyuse_size);
|
|
if (ref_keyuse_steps != ref_keyuse_steps_buf)
|
|
my_free(ref_keyuse_steps);
|
|
|
|
ref_keyuse_steps= (uint16*)new_buf;
|
|
ref_keyuse_size= new_size;
|
|
}
|
|
|
|
ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse);
|
|
prev_ref_keyuse= keyuse;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
keyuse++;
|
|
}
|
|
/*
|
|
If the field f from the table is equal to a field from one the
|
|
earlier joined tables then the selectivity of the range conditions
|
|
over the field f must be discounted.
|
|
|
|
We need to discount selectivity only if we're using ref-based
|
|
access method (and have sel!=1).
|
|
If we use ALL/range/index_merge, then sel==1, and no need to discount.
|
|
*/
|
|
for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
|
|
{
|
|
if (!bitmap_is_set(read_set, field->field_index) ||
|
|
!field->next_equal_field)
|
|
continue;
|
|
for (Field *next_field= field->next_equal_field;
|
|
next_field != field;
|
|
next_field= next_field->next_equal_field)
|
|
{
|
|
if (!(next_field->table->map & rem_tables) &&
|
|
next_field->table != table)
|
|
{
|
|
if (field->cond_selectivity > 0)
|
|
{
|
|
sel/= field->cond_selectivity;
|
|
set_if_smaller(sel, 1.0);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
We have now calculated a more exact 'records_out' taking more index
|
|
costs into account.
|
|
pos->records_out previously contained the smallest record count for
|
|
all range or ref access, which should not be smaller than what we
|
|
calculated above.
|
|
*/
|
|
records_out= pos->records_init * sel;
|
|
set_if_smaller(records_out, pos->records_out);
|
|
}
|
|
|
|
sel= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
|
|
keyparts, ref_keyuse_steps);
|
|
records_out*= sel;
|
|
|
|
/*
|
|
Update sel to be relative pos->records_read as that is what some old
|
|
code expects. Newer code should just use 'position->records_out' instead.
|
|
*/
|
|
if (pos->records_read == 0)
|
|
sel= 1.0;
|
|
else
|
|
{
|
|
sel= records_out / pos->records_read;
|
|
DBUG_ASSERT(sel >= 0.0 && sel <= 1.00001);
|
|
if (sel > 1.0)
|
|
sel= 1.0;
|
|
}
|
|
|
|
exit:
|
|
*new_records_out= records_out;
|
|
if (ref_keyuse_steps != ref_keyuse_steps_buf)
|
|
my_free(ref_keyuse_steps);
|
|
return sel;
|
|
}
|
|
|
|
|
|
/*
|
|
Check if the table is an EQ_REF or similar table and there is no cost
|
|
to gain by moveing it to a later stage.
|
|
We call such a table a edge table (or hanging leaf) as it will read at
|
|
most one row and will not add to the number of row combinations in the join.
|
|
*/
|
|
|
|
static inline enum_best_search
|
|
check_if_edge_table(POSITION *pos,
|
|
double pushdown_cond_selectivity)
|
|
{
|
|
|
|
if ((pos->type == JT_EQ_REF ||
|
|
(pos->type == JT_REF &&
|
|
pos->records_init == 1 &&
|
|
!pos->range_rowid_filter_info)) &&
|
|
pushdown_cond_selectivity >= 0.999)
|
|
return SEARCH_FOUND_EDGE;
|
|
return SEARCH_OK;
|
|
}
|
|
|
|
|
|
struct SORT_POSITION
|
|
{
|
|
JOIN_TAB **join_tab;
|
|
POSITION *position;
|
|
};
|
|
|
|
|
|
/*
|
|
Sort SORT_POSITIONS according to expected number of rows found
|
|
If number of combinations are the same sort according to join_tab order
|
|
(same table order as used in the original SQL query)
|
|
*/
|
|
|
|
static int
|
|
sort_positions(SORT_POSITION *a, SORT_POSITION *b)
|
|
{
|
|
int cmp;
|
|
if ((cmp= compare_embedding_subqueries(*a->join_tab, *b->join_tab)) != 0)
|
|
return cmp;
|
|
|
|
if (a->position->records_read > b->position->records_read)
|
|
return 1;
|
|
if (a->position->records_read < b->position->records_read)
|
|
return -1;
|
|
return CMP_NUM(*a->join_tab, *b->join_tab);
|
|
}
|
|
|
|
|
|
/*
|
|
Call best_access_path() for a set of tables and collect results
|
|
|
|
@param join JOIN object
|
|
@param trace_one_table Current optimizer_trace
|
|
@param pos Pointer to remanining tables
|
|
@param allowed_tables bitmap of allowed tables. On return set to
|
|
the collected tables.
|
|
@param store_poisition Points to where to store next found SORT_POSITION.
|
|
Will be updated to next free position.
|
|
@param stop_on_eq_ref Stop searching for more tables if we found an EQ_REF
|
|
table.
|
|
|
|
@return
|
|
0 Normal
|
|
1 Eq_ref table found (only if stop_on_eq_ref is used)
|
|
|
|
join->next_sort_position will be update to next free position.
|
|
*/
|
|
|
|
static bool
|
|
get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
|
|
double record_count,
|
|
Json_writer_object *trace_one_table,
|
|
JOIN_TAB **pos, SORT_POSITION **store_position,
|
|
table_map *allowed_tables,
|
|
bool stop_on_eq_ref)
|
|
{
|
|
THD *thd= join->thd;
|
|
POSITION *sort_position= join->next_sort_position;
|
|
SORT_POSITION *sort_end= *store_position;
|
|
JOIN_TAB *s;
|
|
table_map found_tables= 0;
|
|
bool found_eq_ref= 0;
|
|
bool disable_jbuf= join->thd->variables.join_cache_level == 0;
|
|
DBUG_ENTER("get_plans_for_tables");
|
|
|
|
s= *pos;
|
|
do
|
|
{
|
|
table_map real_table_bit= s->table->map;
|
|
if ((*allowed_tables & real_table_bit) &&
|
|
!(remaining_tables & s->dependent))
|
|
{
|
|
#ifdef DBUG_ASSERT_EXISTS
|
|
DBUG_ASSERT(!check_interleaving_with_nj(s));
|
|
restore_prev_nj_state(s); // Revert effect of check_... call
|
|
#endif
|
|
sort_end->join_tab= pos;
|
|
sort_end->position= sort_position;
|
|
|
|
|
|
Json_writer_object wrapper(thd);
|
|
/* Find the best access method from 's' to the current partial plan */
|
|
best_access_path(join, s, remaining_tables, join->positions, idx,
|
|
disable_jbuf, record_count,
|
|
sort_position, sort_position + 1);
|
|
found_tables|= s->table->map;
|
|
sort_end++;
|
|
sort_position+= 2;
|
|
if (unlikely(stop_on_eq_ref) && sort_position[-2].type == JT_EQ_REF)
|
|
{
|
|
/* Found an eq_ref tables. Use this, ignoring the other tables */
|
|
found_eq_ref= 1;
|
|
if (found_tables == s->table->map)
|
|
break; // First table
|
|
|
|
/* Store the found eq_ref table first in store_position */
|
|
sort_position-= 2;
|
|
*allowed_tables= s->table->map;
|
|
(*store_position)->join_tab= pos;
|
|
(*store_position)->position= sort_position;
|
|
(*store_position)++;
|
|
join->next_sort_position[0]= sort_position[0];
|
|
join->next_sort_position[1]= sort_position[1];
|
|
join->next_sort_position+= 2;
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Verify that 'allowed_current_tables' was calculated correctly */
|
|
DBUG_ASSERT((remaining_tables & s->dependent) ||
|
|
!(remaining_tables & real_table_bit) ||
|
|
!(*allowed_tables & real_table_bit) ||
|
|
check_interleaving_with_nj(s));
|
|
}
|
|
} while ((s= *++pos));
|
|
|
|
*allowed_tables= found_tables;
|
|
*store_position= sort_end;
|
|
join->next_sort_position= sort_position;
|
|
DBUG_RETURN(found_eq_ref);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Check if it is potentally possible to short-cut the JOIN execution due to
|
|
ORDER BY ... LIMIT clause
|
|
|
|
@detail
|
|
It is possible when the join has "ORDER BY ... LIMIT n" clause, and the
|
|
sort+limit operation is done right after the join operation (there's no
|
|
grouping or DISTINCT in between).
|
|
Then we can potentially build a join plan that enumerates rows in the
|
|
ORDER BY order and so will be able to terminate as soon as it has produced
|
|
#limit rows.
|
|
|
|
Note that it is not a requirement that sort_by_table has an index that
|
|
matches ORDER BY. If it doesn't have one, the optimizer will pass
|
|
sort_by_table to filesort. Reading from sort_by_table won't use
|
|
short-cutting but the rest of the join will.
|
|
*/
|
|
|
|
static
|
|
bool join_limit_shortcut_is_applicable(const JOIN *join)
|
|
{
|
|
/*
|
|
Any post-join operation like GROUP BY or DISTINCT or window functions
|
|
means we cannot short-cut join execution
|
|
*/
|
|
if (!join->thd->variables.optimizer_join_limit_pref_ratio ||
|
|
!join->order ||
|
|
join->select_limit == HA_POS_ERROR ||
|
|
join->group_list ||
|
|
join->select_distinct ||
|
|
join->select_options & SELECT_BIG_RESULT ||
|
|
join->rollup.state != ROLLUP::STATE_NONE ||
|
|
join->select_lex->have_window_funcs() ||
|
|
join->select_lex->with_sum_func)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Cannot do short-cutting if
|
|
(1) ORDER BY refers to more than one table or
|
|
(2) the table it refers to cannot be first table in the join order
|
|
*/
|
|
if (!join->sort_by_table || // (1)
|
|
join->sort_by_table->reginfo.join_tab->dependent) // (2)
|
|
return false;
|
|
|
|
Json_writer_object wrapper(join->thd);
|
|
Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable");
|
|
trace.add("applicable", 1);
|
|
/* It looks like we can short-cut limit due to join */
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Check if we could use an index-based access method to produce rows
|
|
in the order for ORDER BY ... LIMIT.
|
|
|
|
@detail
|
|
This should do what test_if_skip_sort_order() does. We can't use that
|
|
function directly, because:
|
|
|
|
1. We're at the join optimization stage and have not done query plan
|
|
fix-ups done in get_best_combination() and co.
|
|
|
|
2. The code in test_if_skip_sort_order() does modify query plan structures,
|
|
for example it may change the table's quick select. This is done even if
|
|
it's called with no_changes=true parameter.
|
|
|
|
@param access_method_changed OUT Whether the function changed the access
|
|
method to get rows in desired order.
|
|
@param new_access_cost OUT if access method changed: its cost.
|
|
|
|
@return
|
|
true - Can skip sorting
|
|
false - Cannot skip sorting
|
|
*/
|
|
|
|
bool test_if_skip_sort_order_early(JOIN *join,
|
|
bool *access_method_changed,
|
|
double *new_access_cost)
|
|
{
|
|
const POSITION *pos= &join->best_positions[join->const_tables];
|
|
TABLE *table= pos->table->table;
|
|
key_map usable_keys= table->keys_in_use_for_order_by;
|
|
|
|
*access_method_changed= false;
|
|
|
|
// Step #1: Find indexes that produce the required ordering.
|
|
if (find_indexes_matching_order(join, table, join->order, &usable_keys))
|
|
{
|
|
return false; // Cannot skip sorting
|
|
}
|
|
|
|
// Step #2: Check if the index we're using produces the needed ordering
|
|
uint ref_key;
|
|
if (pos->key)
|
|
{
|
|
// Mirror the (wrong) logic in test_if_skip_sort_order:
|
|
if (pos->spl_plan || pos->type == JT_REF_OR_NULL)
|
|
return false; // Use filesort
|
|
|
|
ref_key= pos->key->key;
|
|
}
|
|
else
|
|
{
|
|
if (pos->table->quick)
|
|
{
|
|
if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
|
|
ref_key= pos->table->quick->index;
|
|
else
|
|
ref_key= MAX_KEY;
|
|
}
|
|
else
|
|
ref_key= MAX_KEY;
|
|
}
|
|
|
|
if (ref_key != MAX_KEY && usable_keys.is_set(ref_key))
|
|
{
|
|
return true; // we're using an index that produces the reqired ordering.
|
|
}
|
|
|
|
/*
|
|
Step #3: check if we can switch to using an index that would produce the
|
|
ordering.
|
|
(But don't actually switch, this will be done by test_if_skip_sort_order)
|
|
*/
|
|
int best_key= -1;
|
|
uint UNINIT_VAR(best_key_parts);
|
|
uint saved_best_key_parts= 0;
|
|
int best_key_direction= 0;
|
|
JOIN_TAB *tab= pos->table;
|
|
ha_rows new_limit;
|
|
double new_read_time;
|
|
if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE,
|
|
tab, join->order, table, usable_keys,
|
|
ref_key, join->select_limit,
|
|
&best_key, &best_key_direction,
|
|
&new_limit, &new_read_time,
|
|
&best_key_parts,
|
|
&saved_best_key_parts))
|
|
{
|
|
// Ok found a way to skip sorting
|
|
*access_method_changed= true;
|
|
*new_access_cost= new_read_time;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
Compute the cost of join assuming we only need fraction of the output.
|
|
*/
|
|
|
|
double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting,
|
|
double *first_table_cost,
|
|
double fraction)
|
|
{
|
|
POSITION *pos= join->best_positions + join->const_tables;
|
|
/*
|
|
Generally, we assume that producing X% of output takes X% of the cost.
|
|
*/
|
|
double partial_join_cost= join->best_read * fraction;
|
|
|
|
if (skip_sorting)
|
|
{
|
|
/*
|
|
First table produces rows in required order. Two options:
|
|
|
|
A. first_table_cost=NULL means we use whatever access method the join
|
|
optimizer has picked. Its cost was included in join->best_read and
|
|
we've already took a fraction of it.
|
|
|
|
B. first_table_cost!=NULL means we will need to switch to another access
|
|
method, we have the cost to read rows to produce #LIMIT rows in join
|
|
output.
|
|
*/
|
|
if (first_table_cost)
|
|
{
|
|
/*
|
|
Subtract the remainder of the first table's cost we had in
|
|
join->best_read:
|
|
*/
|
|
partial_join_cost -= pos->read_time*fraction;
|
|
partial_join_cost -= pos->records_read*fraction * WHERE_COST_THD(join->thd);
|
|
|
|
/* Add the cost of the new access method we've got: */
|
|
partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(!first_table_cost);
|
|
/*
|
|
Cannot skip sorting. We read the first table entirely, then sort it.
|
|
|
|
partial_join_cost includes pos->read_time*fraction. Add to it
|
|
pos->read_time*(1-fraction) so we have the cost to read the entire first
|
|
table. Do the same for costs of checking the WHERE.
|
|
*/
|
|
double extra_first_table_cost= pos->read_time * (1.0 - fraction);
|
|
double extra_first_table_where= pos->records_read * (1.0 - fraction) *
|
|
WHERE_COST_THD(join->thd);
|
|
|
|
partial_join_cost= COST_ADD(partial_join_cost,
|
|
COST_ADD(extra_first_table_cost,
|
|
extra_first_table_where));
|
|
}
|
|
return partial_join_cost;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Finalize building the join order which allows to short-cut the join
|
|
execution.
|
|
|
|
@detail
|
|
This is called after we have produced a join order that allows short-
|
|
cutting.
|
|
Here, we decide if it is cheaper to use this one or the original join
|
|
order.
|
|
*/
|
|
|
|
POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost)
|
|
{
|
|
Json_writer_object wrapper(join->thd);
|
|
Json_writer_object trace(join->thd, "join_limit_shortcut_choice");
|
|
|
|
double fraction= join->select_limit / join->join_record_count;
|
|
trace.add("limit_fraction", fraction);
|
|
|
|
/* Check which fraction of join output we need */
|
|
if (fraction >= 1.0)
|
|
{
|
|
trace.add("skip_adjustment", "no short-cutting");
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
Check if the first table's access method produces the required ordering.
|
|
Possible options:
|
|
1. Yes: we can just take a fraction of the execution cost.
|
|
2A No: change the access method to one that does produce the required
|
|
ordering, update the costs.
|
|
2B No: Need to pass the first table to filesort().
|
|
*/
|
|
bool skip_sorting;
|
|
bool access_method_changed;
|
|
double new_access_cost;
|
|
{
|
|
Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early");
|
|
skip_sorting= test_if_skip_sort_order_early(join,
|
|
&access_method_changed,
|
|
&new_access_cost);
|
|
}
|
|
trace.add("can_skip_filesort", skip_sorting);
|
|
|
|
double cost_with_shortcut=
|
|
recompute_join_cost_with_limit(join, skip_sorting,
|
|
access_method_changed ?
|
|
&new_access_cost : (double*)0,
|
|
fraction);
|
|
double risk_ratio=
|
|
(double)join->thd->variables.optimizer_join_limit_pref_ratio;
|
|
trace.add("full_join_cost", join->best_read);
|
|
trace.add("risk_ratio", risk_ratio);
|
|
trace.add("shortcut_join_cost", cost_with_shortcut);
|
|
cost_with_shortcut *= risk_ratio;
|
|
trace.add("shortcut_cost_with_risk", cost_with_shortcut);
|
|
if (cost_with_shortcut < join->best_read)
|
|
{
|
|
trace.add("use_shortcut_cost", true);
|
|
POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root,
|
|
join->best_positions,
|
|
sizeof(POSITION)*
|
|
(join->table_count + 1));
|
|
*cost= cost_with_shortcut;
|
|
return pos;
|
|
}
|
|
trace.add("use_shortcut_cost", false);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
If we're in Limit Optimization Mode, allow only join->sort_by_table as
|
|
the first table in the join order
|
|
*/
|
|
|
|
static
|
|
bool join_limit_shortcut_limits_tables(const JOIN *join, uint idx, table_map *map)
|
|
{
|
|
if (join->limit_optimization_mode && idx == join->const_tables)
|
|
{
|
|
*map= join->sort_by_table->map;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
Find a good, possibly optimal, query execution plan (QEP) by a possibly
|
|
exhaustive search.
|
|
|
|
The procedure searches for the optimal ordering of the query tables in set
|
|
'remaining_tables' of size N, and the corresponding optimal access paths to
|
|
each table. The choice of a table order and an access path for each table
|
|
constitutes a query execution plan (QEP) that fully specifies how to
|
|
execute the query.
|
|
|
|
The maximal size of the found plan is controlled by the parameter
|
|
'search_depth'. When search_depth == N, the resulting plan is complete and
|
|
can be used directly as a QEP. If search_depth < N, the found plan consists
|
|
of only some of the query tables. Such "partial" optimal plans are useful
|
|
only as input to query optimization procedures, and cannot be used directly
|
|
to execute a query.
|
|
|
|
The algorithm begins with an empty partial plan stored in 'join->positions'
|
|
and a set of N tables - 'remaining_tables'. Each step of the algorithm
|
|
evaluates the cost of the partial plan extended by all access plans for
|
|
each of the relations in 'remaining_tables', expands the current partial
|
|
plan with the access plan that results in lowest cost of the expanded
|
|
partial plan, and removes the corresponding relation from
|
|
'remaining_tables'. The algorithm continues until it either constructs a
|
|
complete optimal plan, or constructs an optimal plartial plan with size =
|
|
search_depth.
|
|
|
|
The final optimal plan is stored in 'join->best_positions'. The
|
|
corresponding cost of the optimal plan is in 'join->best_read'.
|
|
|
|
@note
|
|
The procedure uses a recursive depth-first search where the depth of the
|
|
recursion (and thus the exhaustiveness of the search) is controlled by the
|
|
parameter 'search_depth'.
|
|
|
|
@note
|
|
The pseudocode below describes the algorithm of
|
|
'best_extension_by_limited_search'. The worst-case complexity of this
|
|
algorithm is O(N*N^search_depth/search_depth). When serch_depth >= N, then
|
|
the complexity of greedy_search is O(N!).
|
|
|
|
@code
|
|
procedure best_extension_by_limited_search(
|
|
pplan in, // in, partial plan of tables-joined-so-far
|
|
pplan_cost, // in, cost of pplan
|
|
remaining_tables, // in, set of tables not referenced in pplan
|
|
best_plan_so_far, // in/out, best plan found so far
|
|
best_plan_so_far_cost,// in/out, cost of best_plan_so_far
|
|
search_depth) // in, maximum size of the plans being considered
|
|
{
|
|
for each table T from remaining_tables
|
|
{
|
|
// Calculate the cost of using table T as above
|
|
cost = complex-series-of-calculations;
|
|
|
|
// Add the cost to the cost so far.
|
|
pplan_cost+= cost;
|
|
|
|
if (pplan_cost >= best_plan_so_far_cost)
|
|
// pplan_cost already too great, stop search
|
|
continue;
|
|
|
|
pplan= expand plan by best_access_method;
|
|
remaining_tables= remaining_tables - table T;
|
|
if (remaining_tables is not an empty set
|
|
and
|
|
search_depth > 1)
|
|
{
|
|
best_extension_by_limited_search(pplan, pplan_cost,
|
|
remaining_tables,
|
|
best_plan_so_far,
|
|
best_plan_so_far_cost,
|
|
search_depth - 1);
|
|
}
|
|
else
|
|
{
|
|
best_plan_so_far_cost= pplan_cost;
|
|
best_plan_so_far= pplan;
|
|
}
|
|
}
|
|
}
|
|
@endcode
|
|
|
|
@note
|
|
When 'best_extension_by_limited_search' is called for the first time,
|
|
'join->best_read' must be set to the largest possible value (e.g. DBL_MAX).
|
|
The actual implementation provides a way to optionally use pruning
|
|
heuristic to reduce the search space by skipping some partial plans.
|
|
|
|
@note
|
|
The parameter 'search_depth' provides control over the recursion
|
|
depth, and thus the size of the resulting optimal plan.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param remaining_tables set of tables not included into the partial plan yet
|
|
@param idx length of the partial QEP in 'join->positions';
|
|
since a depth-first search is used, also corresponds
|
|
to the current depth of the search tree;
|
|
also an index in the array 'join->best_ref';
|
|
@param record_count estimate for the number of records returned by the
|
|
best partial plan
|
|
@param read_time the cost of the best partial plan
|
|
@param search_depth maximum depth of the recursion and thus size of the
|
|
found optimal plan
|
|
(0 < search_depth <= join->tables+1).
|
|
(values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
|
|
@param use_cond_selectivity specifies how the selectivity of the conditions
|
|
pushed to a table should be taken into account
|
|
|
|
@retval
|
|
enum_best_search::SEARCH_OK All fine
|
|
@retval
|
|
enum_best_search::SEARCH_FOUND_EDGE All remaning tables are edge tables
|
|
@retval
|
|
enum_best_search::SEARCH_ABORT Killed by user
|
|
@retval
|
|
enum_best_search::SEARCH_ERROR Fatal error
|
|
*/
|
|
|
|
|
|
static enum_best_search
|
|
best_extension_by_limited_search(JOIN *join,
|
|
table_map remaining_tables,
|
|
uint idx,
|
|
double record_count,
|
|
double read_time,
|
|
uint search_depth,
|
|
uint use_cond_selectivity,
|
|
table_map *processed_eq_ref_tables)
|
|
{
|
|
THD *thd= join->thd;
|
|
/*
|
|
'join' is a partial plan with lower cost than the best plan so far,
|
|
so continue expanding it further with the tables in 'remaining_tables'.
|
|
*/
|
|
JOIN_TAB *s;
|
|
double best_record_count= DBL_MAX;
|
|
double best_read_time= DBL_MAX;
|
|
enum_best_search best_res;
|
|
uint tables_left= join->table_count - idx, found_tables;
|
|
uint accepted_tables __attribute__((unused));
|
|
table_map found_eq_ref_tables= 0, used_eq_ref_table= 0;
|
|
table_map allowed_tables, allowed_current_tables;
|
|
SORT_POSITION *sort= (SORT_POSITION*) alloca(sizeof(SORT_POSITION)*tables_left);
|
|
SORT_POSITION *sort_end;
|
|
DBUG_ENTER("best_extension_by_limited_search");
|
|
DBUG_EXECUTE_IF("show_explain_probe_best_ext_lim_search",
|
|
if (dbug_user_var_equals_int(thd,
|
|
"show_explain_probe_select_id",
|
|
join->select_lex->select_number))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
|
|
if (unlikely(thd->check_killed())) // Abort
|
|
DBUG_RETURN(SEARCH_ABORT);
|
|
|
|
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
|
|
"part_plan"););
|
|
status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls);
|
|
|
|
if (join->emb_sjm_nest)
|
|
{
|
|
/*
|
|
If we are searching for the execution plan of a materialized semi-join nest
|
|
then allowed_tables contains bits only for the tables from this nest.
|
|
*/
|
|
allowed_tables= (join->emb_sjm_nest->sj_inner_tables & remaining_tables);
|
|
allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
allowed_tables is used to check if there are tables left that can improve
|
|
a key search and to see if there are more tables to add in next iteration.
|
|
allowed_current_tables tells us which tables we can add to the current
|
|
plan at this stage.
|
|
*/
|
|
allowed_tables= remaining_tables;
|
|
allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables;
|
|
table_map sort_table;
|
|
if (join_limit_shortcut_limits_tables(join, idx, &sort_table))
|
|
allowed_current_tables= sort_table;
|
|
}
|
|
DBUG_ASSERT(allowed_tables & remaining_tables);
|
|
|
|
sort_end= sort;
|
|
{
|
|
Json_writer_object trace_one_table(thd);
|
|
JOIN_TAB **best_ref= join->best_ref + idx;
|
|
if (unlikely(thd->trace_started()))
|
|
trace_plan_prefix(&trace_one_table, join, idx, remaining_tables);
|
|
|
|
Json_writer_array arr(thd, "get_costs_for_tables");
|
|
|
|
if (idx > join->const_tables && join->prune_level >= 2 &&
|
|
join->positions[idx-1].type == JT_EQ_REF &&
|
|
(join->eq_ref_tables & allowed_current_tables))
|
|
{
|
|
/* Previous table was an EQ REF table, only add other possible EQ_REF
|
|
tables to the chain, stop after first one is found.
|
|
*/
|
|
table_map table_map= join->eq_ref_tables & allowed_current_tables;
|
|
if (get_costs_for_tables(join, remaining_tables, idx, record_count,
|
|
&trace_one_table, best_ref, &sort_end,
|
|
&table_map, 1))
|
|
used_eq_ref_table= (*sort->join_tab)->table->map;
|
|
else
|
|
{
|
|
/* We didn't find another EQ_REF table, add remaining tables */
|
|
if ((table_map= allowed_current_tables & ~table_map))
|
|
get_costs_for_tables(join, remaining_tables, idx, record_count,
|
|
&trace_one_table, best_ref, &sort_end, &table_map,
|
|
0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
table_map table_map= allowed_current_tables;
|
|
get_costs_for_tables(join, remaining_tables, idx, record_count,
|
|
&trace_one_table, best_ref, &sort_end, &table_map,
|
|
0);
|
|
}
|
|
found_tables= (uint) (sort_end - sort);
|
|
DBUG_ASSERT(found_tables > 0);
|
|
|
|
/*
|
|
Sort tables in ascending order of generated row combinations
|
|
*/
|
|
if (found_tables > 1)
|
|
my_qsort(sort, found_tables, sizeof(SORT_POSITION),
|
|
(qsort_cmp) sort_positions);
|
|
}
|
|
DBUG_ASSERT(join->next_sort_position <=
|
|
join->sort_positions + join->sort_space);
|
|
|
|
accepted_tables= 0;
|
|
double min_rec_count= DBL_MAX;
|
|
double min_rec_count_read_time= DBL_MAX;
|
|
|
|
double min_cost= DBL_MAX;
|
|
double min_cost_record_count= DBL_MAX;
|
|
|
|
for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++)
|
|
{
|
|
s= *pos->join_tab;
|
|
if (!(found_eq_ref_tables & s->table->map) &&
|
|
!check_interleaving_with_nj(s))
|
|
{
|
|
table_map real_table_bit= s->table->map;
|
|
double current_record_count, current_read_time, original_record_count;
|
|
double partial_join_cardinality;
|
|
POSITION *position= join->positions + idx, *loose_scan_pos;
|
|
double pushdown_cond_selectivity;
|
|
Json_writer_object trace_one_table(thd);
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_plan_prefix(&trace_one_table, join, idx, remaining_tables);
|
|
trace_one_table.add_table_name(s);
|
|
}
|
|
|
|
accepted_tables++;
|
|
*position= *pos->position; // Get stored result
|
|
loose_scan_pos= pos->position+1;
|
|
|
|
/* Compute the cost of the new plan extended with 's' */
|
|
current_record_count= COST_MULT(record_count, position->records_out);
|
|
current_read_time= COST_ADD(read_time, position->read_time);
|
|
|
|
if (unlikely(trace_one_table.trace_started()))
|
|
{
|
|
trace_one_table.
|
|
add("rows_for_plan", current_record_count).
|
|
add("cost_for_plan", current_read_time);
|
|
}
|
|
original_record_count= current_record_count;
|
|
optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count,
|
|
¤t_read_time, loose_scan_pos);
|
|
if (position->sj_strategy != SJ_OPT_NONE)
|
|
{
|
|
/* Adjust records_out and current_record_count after semi join */
|
|
double ratio= current_record_count / original_record_count;
|
|
if (ratio < 1.0)
|
|
position->records_out*= ratio;
|
|
if (unlikely(trace_one_table.trace_started()))
|
|
{
|
|
trace_one_table.
|
|
add("sj_rows_out", position->records_out).
|
|
add("sj_rows_for_plan", current_record_count).
|
|
add("sj_filtered", safe_filtered(position->records_out,
|
|
position->records_init));
|
|
}
|
|
}
|
|
/* Expand only partial plans with lower cost than the best QEP so far */
|
|
if (current_read_time + COST_EPS >= join->best_read)
|
|
{
|
|
DBUG_EXECUTE("opt", print_plan(join, idx+1,
|
|
current_record_count,
|
|
read_time,
|
|
current_read_time,
|
|
"prune_by_cost"););
|
|
trace_one_table
|
|
.add("pruned_by_cost", true)
|
|
.add("current_cost", current_read_time)
|
|
.add("best_cost", join->best_read);
|
|
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
Prune some less promising partial plans. This heuristic may miss
|
|
the optimal QEPs, thus it results in a non-exhaustive search.
|
|
*/
|
|
if (join->prune_level >= 1)
|
|
{
|
|
// Collect the members with min_cost and min_read_time.
|
|
bool min_rec_hit= false;
|
|
bool min_cost_hit= false;
|
|
|
|
if (join->extra_heuristic_pruning &&
|
|
(!(position->key_dependent & allowed_tables) ||
|
|
position->records_read < 2.0))
|
|
{
|
|
if (current_record_count < min_rec_count)
|
|
{
|
|
min_rec_count= current_record_count;
|
|
min_rec_count_read_time= current_read_time;
|
|
min_rec_hit= true;
|
|
}
|
|
|
|
if (current_read_time < min_cost)
|
|
{
|
|
min_cost_record_count= current_record_count;
|
|
min_cost= current_read_time;
|
|
min_cost_hit= true;
|
|
}
|
|
}
|
|
|
|
if (best_record_count > current_record_count ||
|
|
best_read_time > current_read_time ||
|
|
(idx == join->const_tables && // 's' is the first table in the QEP
|
|
s->table == join->sort_by_table))
|
|
{
|
|
/*
|
|
Store the current record count and cost as the best
|
|
possible cost at this level if the following holds:
|
|
- It's the lowest record number and cost so far
|
|
- There is no remaing table that could improve index usage
|
|
or we found an EQ_REF or REF key with less than 2
|
|
matching records (good enough).
|
|
*/
|
|
if (best_record_count >= current_record_count &&
|
|
best_read_time >= current_read_time &&
|
|
(!(position->key_dependent & join->allowed_tables) ||
|
|
position->records_read < 2.0))
|
|
{
|
|
best_record_count= current_record_count;
|
|
best_read_time= current_read_time;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Typically, we get here if:
|
|
best_record_count < current_record_count &&
|
|
best_read_time < current_read_time
|
|
That is, both record_count and read_time are worse than the best_
|
|
ones. This plan doesn't look promising, prune it away.
|
|
*/
|
|
DBUG_EXECUTE("opt", print_plan(join, idx+1,
|
|
current_record_count,
|
|
read_time,
|
|
current_read_time,
|
|
"pruned_by_heuristic"););
|
|
trace_one_table.add("pruned_by_heuristic", true);
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
continue;
|
|
}
|
|
|
|
const char* prune_reason= NULL;
|
|
if (!min_rec_hit &&
|
|
current_record_count >= min_rec_count &&
|
|
current_read_time >= min_rec_count_read_time)
|
|
prune_reason= "min_record_count";
|
|
|
|
if (!min_cost_hit &&
|
|
current_record_count >= min_cost_record_count &&
|
|
current_read_time >= min_cost)
|
|
prune_reason= "min_read_time";
|
|
|
|
if (prune_reason)
|
|
{
|
|
trace_one_table.add("pruned_by_heuristic", prune_reason);
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
pushdown_cond_selectivity= 1.0;
|
|
/*
|
|
TODO: When a semi-join strategy is applied (sj_strategy!=SJ_OPT_NONE),
|
|
we should account for selectivity from table_after_join_selectivity().
|
|
(Condition filtering is performed before the semi-join removes some
|
|
fanout so this might require moving the code around)
|
|
*/
|
|
if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
|
|
{
|
|
pushdown_cond_selectivity=
|
|
table_after_join_selectivity(join, idx, s,
|
|
remaining_tables & ~real_table_bit,
|
|
&position->records_out);
|
|
|
|
if (unlikely(trace_one_table.trace_started()) &&
|
|
pushdown_cond_selectivity != 1.0)
|
|
trace_one_table.
|
|
add("pushdown_cond_selectivity", pushdown_cond_selectivity).
|
|
add("filtered", safe_filtered(position->records_out,
|
|
position->records_init)).
|
|
add("rows_out", position->records_out);
|
|
}
|
|
join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
|
|
|
|
partial_join_cardinality= record_count * position->records_out;
|
|
join->positions[idx].partial_join_cardinality= partial_join_cardinality;
|
|
|
|
if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0 &&
|
|
partial_join_cardinality < current_record_count)
|
|
trace_one_table
|
|
.add("selectivity", pushdown_cond_selectivity)
|
|
.add("estimated_join_cardinality", partial_join_cardinality);
|
|
|
|
|
|
if ((search_depth > 1) && (remaining_tables & ~real_table_bit) &
|
|
allowed_tables)
|
|
{
|
|
/* Recursively expand the current partial plan */
|
|
Json_writer_array trace_rest(thd, "rest_of_plan");
|
|
|
|
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab);
|
|
best_res=
|
|
best_extension_by_limited_search(join,
|
|
remaining_tables &
|
|
~real_table_bit,
|
|
idx + 1,
|
|
partial_join_cardinality,
|
|
current_read_time,
|
|
search_depth - 1,
|
|
use_cond_selectivity,
|
|
&found_eq_ref_tables);
|
|
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab);
|
|
|
|
if ((int) best_res < (int) SEARCH_OK)
|
|
goto end; // Return best_res
|
|
if (best_res == SEARCH_FOUND_EDGE &&
|
|
check_if_edge_table(join->positions+ idx,
|
|
pushdown_cond_selectivity) !=
|
|
SEARCH_FOUND_EDGE)
|
|
best_res= SEARCH_OK;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
'join' is either the best partial QEP with 'search_depth' relations,
|
|
or the best complete QEP so far, whichever is smaller.
|
|
*/
|
|
if (join->sort_by_table &&
|
|
join->sort_by_table !=
|
|
join->positions[join->const_tables].table->table)
|
|
{
|
|
/*
|
|
We may have to make a temp table, note that this is only a
|
|
heuristic since we cannot know for sure at this point if we
|
|
we are going to use addon fields or to have flush sorting to
|
|
disk. We also don't know the temporary table will be in memory
|
|
or disk.
|
|
The following calculation takes a middle ground where assume
|
|
we can sort the keys in memory but have to use a disk based
|
|
temporary table to retrive the rows.
|
|
This cost is probably much bigger than it has to be...
|
|
*/
|
|
double sort_cost;
|
|
sort_cost= (get_qsort_sort_cost((ha_rows)current_record_count,0) +
|
|
current_record_count *
|
|
DISK_TEMPTABLE_LOOKUP_COST(thd));
|
|
trace_one_table.add("cost_for_sorting", sort_cost);
|
|
current_read_time= COST_ADD(current_read_time, sort_cost);
|
|
}
|
|
if (current_read_time < join->best_read)
|
|
{
|
|
memcpy((uchar*) join->best_positions, (uchar*) join->positions,
|
|
sizeof(POSITION) * (idx + 1));
|
|
join->join_record_count= partial_join_cardinality;
|
|
join->best_read= current_read_time;
|
|
}
|
|
DBUG_EXECUTE("opt", print_plan(join, idx+1,
|
|
current_record_count,
|
|
read_time,
|
|
current_read_time,
|
|
"full_plan"););
|
|
best_res= check_if_edge_table(join->positions + idx,
|
|
pushdown_cond_selectivity);
|
|
}
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
if (best_res == SEARCH_FOUND_EDGE)
|
|
{
|
|
if (pos+1 < sort_end) // If not last table
|
|
trace_one_table.add("pruned_by_hanging_leaf", true);
|
|
goto end;
|
|
}
|
|
}
|
|
}
|
|
DBUG_ASSERT(accepted_tables > 0);
|
|
best_res= SEARCH_OK;
|
|
|
|
end:
|
|
join->next_sort_position-= found_tables*2;
|
|
if (used_eq_ref_table)
|
|
*processed_eq_ref_tables|= used_eq_ref_table | found_eq_ref_tables;
|
|
else
|
|
*processed_eq_ref_tables= 0;
|
|
DBUG_RETURN(best_res);
|
|
}
|
|
|
|
|
|
/**
|
|
Find how much space the prevous read not const tables takes in cache.
|
|
*/
|
|
|
|
void JOIN_TAB::calc_used_field_length(bool max_fl)
|
|
{
|
|
uint null_fields,blobs,fields;
|
|
ulong rec_length;
|
|
Field **f_ptr,*field;
|
|
uint uneven_bit_fields;
|
|
MY_BITMAP *read_set= table->read_set;
|
|
|
|
uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
|
|
for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
|
|
{
|
|
if (bitmap_is_set(read_set, field->field_index))
|
|
{
|
|
uint flags=field->flags;
|
|
fields++;
|
|
rec_length+=field->pack_length();
|
|
if (flags & BLOB_FLAG)
|
|
blobs++;
|
|
if (!(flags & NOT_NULL_FLAG))
|
|
null_fields++;
|
|
if (field->type() == MYSQL_TYPE_BIT &&
|
|
((Field_bit*)field)->bit_len)
|
|
uneven_bit_fields++;
|
|
}
|
|
}
|
|
if (null_fields || uneven_bit_fields)
|
|
rec_length+=(table->s->null_fields+7)/8;
|
|
if (table->maybe_null)
|
|
rec_length+=sizeof(my_bool);
|
|
|
|
/* Take into account that DuplicateElimination may need to store rowid */
|
|
uint rowid_add_size= 0;
|
|
if (keep_current_rowid)
|
|
{
|
|
rowid_add_size= table->file->ref_length;
|
|
rec_length += rowid_add_size;
|
|
fields++;
|
|
}
|
|
|
|
if (max_fl)
|
|
{
|
|
// TODO: to improve this estimate for max expected length
|
|
if (blobs)
|
|
{
|
|
ulong blob_length= table->file->stats.mean_rec_length;
|
|
if (ULONG_MAX - rec_length > blob_length)
|
|
rec_length+= blob_length;
|
|
else
|
|
rec_length= ULONG_MAX;
|
|
}
|
|
max_used_fieldlength= rec_length;
|
|
}
|
|
else if (table->file->stats.mean_rec_length)
|
|
set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size);
|
|
|
|
used_fields=fields;
|
|
used_fieldlength=rec_length;
|
|
used_blobs=blobs;
|
|
used_null_fields= null_fields;
|
|
used_uneven_bit_fields= uneven_bit_fields;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Extract pushdown conditions for a table scan
|
|
|
|
@details
|
|
This functions extracts pushdown conditions usable when this table is scanned.
|
|
The conditions are extracted either from WHERE or from ON expressions.
|
|
The conditions are attached to the field cache_select of this table.
|
|
|
|
@note
|
|
Currently the extracted conditions are used only by BNL and BNLH join.
|
|
algorithms.
|
|
|
|
@retval 0 on success
|
|
1 otherwise
|
|
*/
|
|
|
|
int JOIN_TAB::make_scan_filter()
|
|
{
|
|
COND *tmp;
|
|
DBUG_ENTER("make_scan_filter");
|
|
|
|
Item *cond= is_inner_table_of_outer_join() ?
|
|
*get_first_inner_table()->on_expr_ref : join->conds;
|
|
|
|
if (cond)
|
|
{
|
|
if ((tmp= make_cond_for_table(join->thd, cond,
|
|
join->const_table_map | table->map,
|
|
table->map, -1, FALSE, TRUE)))
|
|
{
|
|
DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
|
|
if (!(cache_select=
|
|
(SQL_SELECT*) join->thd->memdup((uchar*) select,
|
|
sizeof(SQL_SELECT))))
|
|
DBUG_RETURN(1);
|
|
cache_select->cond= tmp;
|
|
cache_select->read_tables=join->const_table_map;
|
|
}
|
|
else if (join->thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Check whether hash join algorithm can be used to join this table
|
|
|
|
@details
|
|
This function finds out whether the ref items that have been chosen
|
|
by the planner to access this table can be used for hash join algorithms.
|
|
The answer depends on a certain property of the the fields of the
|
|
joined tables on which the hash join key is built.
|
|
|
|
@note
|
|
At present the function is supposed to be called only after the function
|
|
get_best_combination has been called.
|
|
|
|
@retval TRUE it's possible to use hash join to join this table
|
|
@retval FALSE otherwise
|
|
*/
|
|
|
|
bool JOIN_TAB::hash_join_is_possible()
|
|
{
|
|
if (type != JT_REF && type != JT_EQ_REF)
|
|
return FALSE;
|
|
if (!is_ref_for_hash_join())
|
|
{
|
|
KEY *keyinfo= table->key_info + ref.key;
|
|
return keyinfo->key_part[0].field->hash_join_is_possible();
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Check whether a KEYUSE can be really used for access this join table
|
|
|
|
@param join Join structure with the best join order
|
|
for which the check is performed
|
|
@param keyuse Evaluated KEYUSE structure
|
|
|
|
@details
|
|
This function is supposed to be used after the best execution plan have been
|
|
already chosen and the JOIN_TAB array for the best join order been already set.
|
|
For a given KEYUSE to access this JOIN_TAB in the best execution plan the
|
|
function checks whether it really can be used. The function first performs
|
|
the check with access_from_tables_is_allowed(). If it succeeds it checks
|
|
whether the keyuse->val does not use some fields of a materialized semijoin
|
|
nest that cannot be used to build keys to access outer tables.
|
|
Such KEYUSEs exists for the query like this:
|
|
select * from ot
|
|
where ot.c in (select it1.c from it1, it2 where it1.c=f(it2.c))
|
|
Here we have two KEYUSEs to access table ot: with val=it1.c and val=f(it2.c).
|
|
However if the subquery was materialized the second KEYUSE cannot be employed
|
|
to access ot.
|
|
|
|
@retval true the given keyuse can be used for ref access of this JOIN_TAB
|
|
@retval false otherwise
|
|
*/
|
|
|
|
bool JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan(JOIN *join,
|
|
KEYUSE *keyuse)
|
|
{
|
|
if (!access_from_tables_is_allowed(keyuse->used_tables,
|
|
join->sjm_lookup_tables))
|
|
return false;
|
|
if (join->sjm_scan_tables & table->map)
|
|
return true;
|
|
table_map keyuse_sjm_scan_tables= keyuse->used_tables &
|
|
join->sjm_scan_tables;
|
|
if (!keyuse_sjm_scan_tables)
|
|
return true;
|
|
uint sjm_tab_nr= 0;
|
|
while (!(keyuse_sjm_scan_tables & table_map(1) << sjm_tab_nr))
|
|
sjm_tab_nr++;
|
|
JOIN_TAB *sjm_tab= join->map2table[sjm_tab_nr];
|
|
TABLE_LIST *emb_sj_nest= sjm_tab->emb_sj_nest;
|
|
if (!(emb_sj_nest->sj_mat_info && emb_sj_nest->sj_mat_info->is_used &&
|
|
emb_sj_nest->sj_mat_info->is_sj_scan))
|
|
return true;
|
|
st_select_lex *sjm_sel= emb_sj_nest->sj_subq_pred->unit->first_select();
|
|
for (uint i= 0; i < sjm_sel->item_list.elements; i++)
|
|
{
|
|
DBUG_ASSERT(sjm_sel->ref_pointer_array[i]->real_item()->type() == Item::FIELD_ITEM);
|
|
if (keyuse->val->real_item()->type() == Item::FIELD_ITEM)
|
|
{
|
|
Field *field = ((Item_field*)sjm_sel->ref_pointer_array[i]->real_item())->field;
|
|
if (field->eq(((Item_field*)keyuse->val->real_item())->field))
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
static uint
|
|
cache_record_length(JOIN *join,uint idx)
|
|
{
|
|
uint length=0;
|
|
JOIN_TAB **pos,**end;
|
|
|
|
for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
|
|
pos != end ;
|
|
pos++)
|
|
{
|
|
JOIN_TAB *join_tab= *pos;
|
|
length+= join_tab->get_used_fieldlength();
|
|
}
|
|
return length;
|
|
}
|
|
|
|
/*
|
|
Estimate the number of engine ha_index_read_calls for EQ_REF tables
|
|
when taking into account the one-row-cache in join_read_always_key()
|
|
|
|
SYNOPSIS
|
|
@param position All previous tables best_access_path() information.
|
|
@param idx Number of (previous) tables in positions.
|
|
@param record_count Number of incoming record combinations
|
|
@param found_ref Bitmap of tables that is used to construct the key
|
|
used with the index read.
|
|
|
|
@return # The number of estimated calls that cannot be cached by the
|
|
the one-row-cache. In other words, number of expected
|
|
calls to engine ha_read_read_map().
|
|
Between 1 and record_count or 0 if record_count == 0
|
|
|
|
DESCRIPTION
|
|
The one-row-cache gives a great benefit when there are multiple consecutive
|
|
calls to ha_index_read() with the same key. In this case we can skip
|
|
calling the engine (and in the future also skip to check the key
|
|
condition), which can notably increase the performance.
|
|
|
|
Assuming most of the rows are cached, there is no notable saving to be
|
|
made trying to calculate the total number of distinct key values that will
|
|
be used. The performance of a ha_index_read_call() is about the same even
|
|
if we repeatedly read the same set of rows.
|
|
|
|
This code works by calculating the number of identical key sequences
|
|
found in the record stream.
|
|
The number of expected distinct calls can then be calculated as
|
|
records_count / sequences.
|
|
|
|
Some things to note:
|
|
- record_count == PRODUCT(records_out) over all tables[0...idx-1]
|
|
- position->prev_record_reads contains the number of identical
|
|
sequences found for previous EQ_REF tables.
|
|
|
|
Assume a join prefix of t1,t2,t3,t4 and t4 is an EQ_REF table.
|
|
We have the following combinations that we have to consider:
|
|
|
|
======
|
|
1) No JOIN_CACHE usage, tables depend only on one previous table
|
|
|
|
Row combinations are generated as:
|
|
- for all rows in t1
|
|
- for all rows in t2
|
|
- for all rows in t3
|
|
or
|
|
t1.1,t2.1,t3.1, t1.1,t2.1,t3.2, t1.1,t2.1,t3.3... # Only t3 row changes
|
|
(until no more rows in t3., ie t3.records_out times)
|
|
t1.1,t2.2,t3.1, t1.1,t2.2,t3.2, t1.1,t2.2,t3.3... # t2.2 read
|
|
(above repeated until no more rows in t2 and t3)
|
|
t1.2,t2.1,t3.1, t1.2,t2.1,t3.2, t1.2,t2.1,t3.3... # t1.2 read
|
|
|
|
If t4 is an EQ_REF table that is depending of one of the
|
|
previous tables, the number of identical keys can be calculated
|
|
as the multiplication of records_out of the tables in between
|
|
the t4 and its first dependency.
|
|
|
|
Let's consider cases where t4 depends on different previous tables:
|
|
WHERE t4.a=t3.a
|
|
no caching as t3 can change for each row
|
|
engine_calls: record_count
|
|
|
|
WHERE t4.a=t2.a
|
|
t4 is not depending on t3. The number of repeated rows are:
|
|
t1.1,t2.1,t3.1 to t1.1,t2.1,t3.last # t3.records_out rows
|
|
t1.1,t2.2,t3.1 to t1.1,t2.2,t3.last # t3.records_out rows
|
|
...
|
|
t1.2,t2.1,t3.1 to t1.2,t2.1,t3.last
|
|
...
|
|
t1.last,t2.last.t3.1 to t1.last,t2.last.1,t3.last
|
|
|
|
For each combination of t1 and t2 there are t3.records_out repeated
|
|
rows with equal key value
|
|
engine_calls: record_count / t3.records_out calls =
|
|
t1.records_out * t2.records_out
|
|
|
|
WHERE t4.a=t1.a
|
|
The repeated sequences:
|
|
t1.1,t2.1,t3.1 to t1.1,t2.last,t3.last
|
|
t1.2,t2.1,t3.1 to t2.1,t2.last,t3.last
|
|
repeated rows: t2.records_out * t3.records_out
|
|
engine_calls: record_count/repeated_rows = t1.records_out
|
|
|
|
If t4 depends on a table that uses EQ_REF access, we can multipy that
|
|
table's repeated_rows with current table's repeated_rows to take that
|
|
into account.
|
|
|
|
=====
|
|
2) Keys depending on multiple tables
|
|
|
|
In this case we have to stop searching after we find the first
|
|
table we depend upon.
|
|
We have to also disregard the number of repeated rows for the
|
|
found table. This can be seen from (assuming tables t1...t6):
|
|
|
|
WHERE t6.a=t4.a and t6.a=t3.a and t4.a= t2.a
|
|
- Here t4 is not depending on t3 (and thus there is a
|
|
t3.records_out identical keys for t4). However t6 key will
|
|
change for each t3 row and t6 cannot thus use
|
|
t3.identical_keys
|
|
|
|
WHERE t4.key_part1=t1.a and t4.key_part2= t3.a
|
|
As t4.key_part2 will change for every row, one-row-cache will not
|
|
be hit
|
|
|
|
WHERE t4.key_part1=t1.a and t4.key_part2= t2.a
|
|
t4.key will change when t1 or t2 changes
|
|
This is the same case as above for WHERE t4.a = t2.a
|
|
engine_calls: record_count / t3.records_out calls
|
|
|
|
=====
|
|
3) JOIN_CACHE is used
|
|
|
|
If any table is using join_cache as this changes the row
|
|
combinations seen by following tables. Using join cache for a
|
|
table T# will have T# rows repeated for the next table as many
|
|
times there are combinations in the cache. The the cache will
|
|
re-read and the operations repeats 'refill-1' number of times.
|
|
|
|
Table rows from table just before T# will come in 'random order',
|
|
from the point of the next tables.
|
|
|
|
Assuming t3 is using a cache, t4 will see the rows coming in the
|
|
following order:
|
|
t1.1,t2.1,t3.1, t1.1,t2.2,t3.1, t1.1,t2.3,t3.1...
|
|
(t3.1 repeated 't2.records_out' times)
|
|
t1.2,t2.1,t3.1, t1.2,t2.2,t3.1, t1.2,t2.3,t3.1...
|
|
(Next row in t1 used)
|
|
t1.1,t2.1,t3.2, t1.1,t2.2,t3.2, t1.1,t2.3,t3.2...
|
|
(Restarting all t1 & t2 combinations for t3.2)
|
|
|
|
WHERE t4.a=t3.a
|
|
- There is a repeated sequence of t3.records_out rows for
|
|
each t1,t2 row combination.
|
|
engine_calls= record_count / t3.records_out
|
|
|
|
WHERE t4.a=t2.a
|
|
t2 changes for each row
|
|
engine_calls= record_count
|
|
|
|
WHERE t4.a=t1.a
|
|
repeated rows= t2.records_out
|
|
engine_calls= record_count / t2.records_out
|
|
|
|
A refill of the join cache will restart the row sequences
|
|
(we have 'refill' more sequences), so we will have to do 'refill' times
|
|
more engine read calls.
|
|
|
|
=====
|
|
Expectations of the accuracy of the return value
|
|
|
|
- The value is always between 1 and record_count
|
|
- The returned value should almost always larger than the true number of
|
|
engine calls.
|
|
|
|
- Assuming that every row has different values for all other columns for
|
|
echo unique key value and record_count is accurate:
|
|
- If a table is depending on multiple tables, the return value may be
|
|
notable larger than real value.
|
|
- If there is no join cache the value should be exact.
|
|
- If there is a join cache, but no refills calculated or done then
|
|
the value should be exact.
|
|
- If there was more join_cache refills than was calculated, the value
|
|
may be slightly to low.
|
|
- If the number of refills is equal or less than was calculated the value
|
|
should be larger than the expected engine read calls. The more refills,
|
|
the less exact the number will be.
|
|
*/
|
|
|
|
static double
|
|
prev_record_reads(const POSITION *position, uint idx, table_map found_ref,
|
|
double record_count, double *identical_keys)
|
|
{
|
|
double found= 1.0;
|
|
const POSITION *pos_end= position - 1;
|
|
const POSITION *cur_pos= position + idx;
|
|
|
|
/* Safety against const tables */
|
|
if (unlikely(!found_ref))
|
|
goto end;
|
|
|
|
for (const POSITION *pos= cur_pos-1; pos != pos_end; pos--)
|
|
{
|
|
if (found_ref & pos->table->table->map)
|
|
{
|
|
/* Found a table we depend on */
|
|
found_ref= ~pos->table->table->map;
|
|
if (!found_ref)
|
|
{
|
|
/*
|
|
No more dependencies. We can use the cached values to improve things
|
|
a bit
|
|
*/
|
|
if (pos->type == JT_EQ_REF)
|
|
found= COST_MULT(found, pos->identical_keys);
|
|
else if (pos->use_join_buffer)
|
|
found= COST_MULT(found, pos->loops / pos->refills);
|
|
}
|
|
break;
|
|
}
|
|
if (unlikely(pos->use_join_buffer))
|
|
{
|
|
/* Each refill can change the cached key */
|
|
found/= pos->refills;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We are not depending on the current table.
|
|
There are 'records_out' rows with identical rows
|
|
value for our depending tables.
|
|
*/
|
|
found= COST_MULT(found, pos->records_out);
|
|
}
|
|
}
|
|
|
|
/*
|
|
In most case found should <= record_count.
|
|
|
|
However if there was a reduction of rows (records_out < 1) before
|
|
the referencing table then found could be >= record_count.
|
|
To get resonable numbers, we limit prev_record_read to be between
|
|
1.0 and record_count as we have to always do at least one read
|
|
anyway.
|
|
*/
|
|
|
|
end:
|
|
if (unlikely(found > record_count))
|
|
found= record_count;
|
|
if (unlikely(found <= 1.0))
|
|
found= 1.0;
|
|
*identical_keys= found;
|
|
return record_count / found;
|
|
}
|
|
|
|
|
|
/*
|
|
Enumerate join tabs in breadth-first fashion, including const tables.
|
|
*/
|
|
|
|
static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
|
|
uint n_top_tabs_count, JOIN_TAB *tab)
|
|
{
|
|
n_top_tabs_count += tab->join->aggr_tables;
|
|
if (!tab->bush_root_tab)
|
|
{
|
|
/* We're at top level. Get the next top-level tab */
|
|
tab++;
|
|
if (tab < first_top_tab + n_top_tabs_count)
|
|
return tab;
|
|
|
|
/* No more top-level tabs. Switch to enumerating SJM nest children */
|
|
tab= first_top_tab;
|
|
}
|
|
else
|
|
{
|
|
/* We're inside of an SJM nest */
|
|
if (!tab->last_leaf_in_bush)
|
|
{
|
|
/* There's one more table in the nest, return it. */
|
|
return ++tab;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
There are no more tables in this nest. Get out of it and then we'll
|
|
proceed to the next nest.
|
|
*/
|
|
tab= tab->bush_root_tab + 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Ok, "tab" points to a top-level table, and we need to find the next SJM
|
|
nest and enter it.
|
|
*/
|
|
for (; tab < first_top_tab + n_top_tabs_count; tab++)
|
|
{
|
|
if (tab->bush_children)
|
|
return tab->bush_children->start;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
Enumerate JOIN_TABs in "EXPLAIN order". This order
|
|
- const tabs are included
|
|
- we enumerate "optimization tabs".
|
|
-
|
|
*/
|
|
|
|
JOIN_TAB *first_explain_order_tab(JOIN* join)
|
|
{
|
|
JOIN_TAB* tab;
|
|
tab= join->join_tab;
|
|
if (!tab)
|
|
return NULL; /* Can happen when when the tables were optimized away */
|
|
return (tab->bush_children) ? tab->bush_children->start : tab;
|
|
}
|
|
|
|
|
|
JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab)
|
|
{
|
|
/* If we're inside SJM nest and have reached its end, get out */
|
|
if (tab->last_leaf_in_bush)
|
|
return tab->bush_root_tab;
|
|
|
|
/* Move to next tab in the array we're traversing */
|
|
tab++;
|
|
|
|
if (tab == join->join_tab + join->top_join_tab_count)
|
|
return NULL; /* Outside SJM nest and reached EOF */
|
|
|
|
if (tab->bush_children)
|
|
return tab->bush_children->start;
|
|
|
|
return tab;
|
|
}
|
|
|
|
|
|
|
|
JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls)
|
|
{
|
|
JOIN_TAB *tab= join->join_tab;
|
|
if (const_tbls == WITHOUT_CONST_TABLES)
|
|
{
|
|
if (join->const_tables == join->table_count || !tab)
|
|
return NULL;
|
|
tab += join->const_tables;
|
|
}
|
|
return tab;
|
|
}
|
|
|
|
|
|
JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
|
|
{
|
|
tab= next_breadth_first_tab(join->first_breadth_first_tab(),
|
|
join->top_join_tab_count, tab);
|
|
if (tab && tab->bush_root_tab)
|
|
tab= NULL;
|
|
return tab;
|
|
}
|
|
|
|
|
|
JOIN_TAB *first_linear_tab(JOIN *join,
|
|
enum enum_with_bush_roots include_bush_roots,
|
|
enum enum_with_const_tables const_tbls)
|
|
{
|
|
JOIN_TAB *first= join->join_tab;
|
|
|
|
if (!first)
|
|
return NULL;
|
|
|
|
if (const_tbls == WITHOUT_CONST_TABLES)
|
|
first+= join->const_tables;
|
|
|
|
if (first >= join->join_tab + join->top_join_tab_count)
|
|
return NULL; /* All are const tables */
|
|
|
|
if (first->bush_children && include_bush_roots == WITHOUT_BUSH_ROOTS)
|
|
{
|
|
/* This JOIN_TAB is a SJM nest; Start from first table in nest */
|
|
return first->bush_children->start;
|
|
}
|
|
|
|
return first;
|
|
}
|
|
|
|
|
|
/*
|
|
A helper function to loop over all join's join_tab in sequential fashion
|
|
|
|
DESCRIPTION
|
|
Depending on include_bush_roots parameter, JOIN_TABs that represent
|
|
SJM-scan/lookups are either returned or omitted.
|
|
|
|
SJM-Bush children are returned right after (or in place of) their container
|
|
join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems
|
|
to)
|
|
|
|
For example, if we have this structure:
|
|
|
|
ot1--ot2--sjm1----------------ot3-...
|
|
|
|
|
+--it1--it2--it3
|
|
|
|
calls to next_linear_tab( include_bush_roots=TRUE) will return:
|
|
|
|
ot1 ot2 sjm1 it1 it2 it3 ot3 ...
|
|
|
|
while calls to next_linear_tab( include_bush_roots=FALSE) will return:
|
|
|
|
ot1 ot2 it1 it2 it3 ot3 ...
|
|
|
|
(note that sjm1 won't be returned).
|
|
*/
|
|
|
|
JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab,
|
|
enum enum_with_bush_roots include_bush_roots)
|
|
{
|
|
if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children)
|
|
{
|
|
/* This JOIN_TAB is a SJM nest; Start from first table in nest */
|
|
return tab->bush_children->start;
|
|
}
|
|
|
|
DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
|
|
|
|
if (tab->bush_root_tab) /* Are we inside an SJM nest */
|
|
{
|
|
/* Inside SJM nest */
|
|
if (!tab->last_leaf_in_bush)
|
|
return tab+1; /* Return next in nest */
|
|
/* Continue from the sjm on the top level */
|
|
tab= tab->bush_root_tab;
|
|
}
|
|
|
|
/* If no more JOIN_TAB's on the top level */
|
|
if (++tab >= join->join_tab + join->exec_join_tab_cnt() + join->aggr_tables)
|
|
return NULL;
|
|
|
|
if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
|
|
{
|
|
/* This JOIN_TAB is a SJM nest; Start from first table in nest */
|
|
tab= tab->bush_children->start;
|
|
}
|
|
return tab;
|
|
}
|
|
|
|
|
|
/*
|
|
Start to iterate over all join tables in bush-children-first order, excluding
|
|
the const tables (see next_depth_first_tab() comment for details)
|
|
*/
|
|
|
|
JOIN_TAB *first_depth_first_tab(JOIN* join)
|
|
{
|
|
JOIN_TAB* tab;
|
|
/* This means we're starting the enumeration */
|
|
if (join->const_tables == join->top_join_tab_count || !join->join_tab)
|
|
return NULL;
|
|
|
|
tab= join->join_tab + join->const_tables;
|
|
|
|
return (tab->bush_children) ? tab->bush_children->start : tab;
|
|
}
|
|
|
|
|
|
/*
|
|
A helper function to iterate over all join tables in bush-children-first order
|
|
|
|
DESCRIPTION
|
|
|
|
For example, for this join plan
|
|
|
|
ot1--ot2--sjm1------------ot3-...
|
|
|
|
|
|
|
|
it1--it2--it3
|
|
|
|
call to first_depth_first_tab() will return ot1, and subsequent calls to
|
|
next_depth_first_tab() will return:
|
|
|
|
ot2 it1 it2 it3 sjm ot3 ...
|
|
*/
|
|
|
|
JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
|
|
{
|
|
/* If we're inside SJM nest and have reached its end, get out */
|
|
if (tab->last_leaf_in_bush)
|
|
return tab->bush_root_tab;
|
|
|
|
/* Move to next tab in the array we're traversing */
|
|
tab++;
|
|
|
|
if (tab == join->join_tab +join->top_join_tab_count)
|
|
return NULL; /* Outside SJM nest and reached EOF */
|
|
|
|
if (tab->bush_children)
|
|
return tab->bush_children->start;
|
|
|
|
return tab;
|
|
}
|
|
|
|
|
|
bool JOIN::check_two_phase_optimization(THD *thd)
|
|
{
|
|
if (check_for_splittable_materialized())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool JOIN::inject_cond_into_where(Item *injected_cond)
|
|
{
|
|
Item *where_item= injected_cond;
|
|
List<Item> *and_args= NULL;
|
|
if (conds && conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
and_args= ((Item_cond*) conds)->argument_list();
|
|
if (cond_equal)
|
|
and_args->disjoin((List<Item> *) &cond_equal->current_level);
|
|
}
|
|
|
|
where_item= and_items(thd, conds, where_item);
|
|
if (where_item->fix_fields_if_needed(thd, 0))
|
|
return true;
|
|
thd->change_item_tree(&select_lex->where, where_item);
|
|
select_lex->where->top_level_item();
|
|
conds= select_lex->where;
|
|
|
|
if (and_args && cond_equal)
|
|
{
|
|
and_args= ((Item_cond*) conds)->argument_list();
|
|
List_iterator<Item_equal> li(cond_equal->current_level);
|
|
Item_equal *elem;
|
|
while ((elem= li++))
|
|
{
|
|
and_args->push_back(elem, thd->mem_root);
|
|
}
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
static Item * const null_ptr= NULL;
|
|
|
|
|
|
/*
|
|
Set up join struct according to the picked join order in
|
|
|
|
SYNOPSIS
|
|
get_best_combination()
|
|
join The join to process (the picked join order is mainly in
|
|
join->best_positions)
|
|
|
|
DESCRIPTION
|
|
Setup join structures according the picked join order
|
|
- finalize semi-join strategy choices (see
|
|
fix_semijoin_strategies_for_picked_join_order)
|
|
- create join->join_tab array and put there the JOIN_TABs in the join order
|
|
- create data structures describing ref access methods.
|
|
|
|
NOTE
|
|
In this function we switch from pre-join-optimization JOIN_TABs to
|
|
post-join-optimization JOIN_TABs. This is achieved by copying the entire
|
|
JOIN_TAB objects.
|
|
|
|
RETURN
|
|
FALSE OK
|
|
TRUE Out of memory
|
|
*/
|
|
|
|
bool JOIN::get_best_combination()
|
|
{
|
|
uint tablenr;
|
|
table_map used_tables;
|
|
JOIN_TAB *j;
|
|
KEYUSE *keyuse;
|
|
JOIN_TAB *sjm_nest_end= NULL;
|
|
JOIN_TAB *sjm_nest_root= NULL;
|
|
DBUG_ENTER("get_best_combination");
|
|
|
|
/*
|
|
Additional plan nodes for postjoin tmp tables:
|
|
1? + // For GROUP BY
|
|
1? + // For DISTINCT
|
|
1? + // For aggregation functions aggregated in outer query
|
|
// when used with distinct
|
|
1? + // For ORDER BY
|
|
1? // buffer result
|
|
Up to 2 tmp tables are actually used, but it's hard to tell exact number
|
|
at this stage.
|
|
*/
|
|
uint aggr_tables= (group_list ? 1 : 0) +
|
|
(select_distinct ?
|
|
(tmp_table_param.using_outer_summary_function ? 2 : 1) : 0) +
|
|
(order ? 1 : 0) +
|
|
(select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ;
|
|
|
|
if (aggr_tables == 0)
|
|
aggr_tables= 1; /* For group by pushdown */
|
|
|
|
if (select_lex->window_specs.elements)
|
|
aggr_tables++;
|
|
|
|
if (aggr_tables > 2)
|
|
aggr_tables= 2;
|
|
|
|
full_join=0;
|
|
hash_join= FALSE;
|
|
|
|
fix_semijoin_strategies_for_picked_join_order(this);
|
|
top_join_tab_count= get_number_of_tables_at_top_level(this);
|
|
|
|
#ifndef DBUG_OFF
|
|
dbug_join_tab_array_size= top_join_tab_count + aggr_tables;
|
|
#endif
|
|
/*
|
|
NOTE: The above computation of aggr_tables can produce wrong result because some
|
|
of the variables it uses may change their values after we leave this function.
|
|
Known examples:
|
|
- Dangerous: using_outer_summary_function=false at this point. Added
|
|
DBUG_ASSERT below to demonstrate. Can this cause us to allocate less
|
|
space than we would need?
|
|
- Not dangerous: select_distinct can be true here but be assigned false
|
|
afterwards.
|
|
*/
|
|
aggr_tables= 2;
|
|
DBUG_ASSERT(!tmp_table_param.using_outer_summary_function);
|
|
if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*
|
|
(top_join_tab_count + aggr_tables))))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (inject_splitting_cond_for_all_tables_with_split_opt())
|
|
goto error;
|
|
|
|
JOIN_TAB_RANGE *root_range;
|
|
if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE))
|
|
goto error;
|
|
root_range->start= join_tab;
|
|
/* root_range->end will be set later */
|
|
join_tab_ranges.empty();
|
|
|
|
if (join_tab_ranges.push_back(root_range, thd->mem_root))
|
|
goto error;
|
|
|
|
for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
|
|
{
|
|
TABLE *form;
|
|
POSITION *cur_pos= &best_positions[tablenr];
|
|
if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE ||
|
|
cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
|
|
{
|
|
/*
|
|
Ok, we've entered an SJ-Materialization semi-join (note that this can't
|
|
be done recursively, semi-joins are not allowed to be nested).
|
|
1. Put into main join order a JOIN_TAB that represents a lookup or scan
|
|
in the temptable.
|
|
*/
|
|
bzero((void*)j, sizeof(JOIN_TAB));
|
|
j->join= this;
|
|
j->table= NULL; //temporary way to tell SJM tables from others.
|
|
j->ref.key = -1;
|
|
j->on_expr_ref= (Item**) &null_ptr;
|
|
/* The unique index is always in 'possible keys' in EXPLAIN */
|
|
j->keys= key_map(1);
|
|
|
|
/*
|
|
2. Proceed with processing SJM nest's join tabs, putting them into the
|
|
sub-order
|
|
*/
|
|
SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
|
|
j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0);
|
|
j->records_init= j->records_out= j->records_read;
|
|
j->records= (ha_rows) j->records_read;
|
|
j->cond_selectivity= 1.0;
|
|
j->join_read_time= 0.0; /* Not saved currently */
|
|
j->join_loops= 0.0;
|
|
JOIN_TAB *jt;
|
|
JOIN_TAB_RANGE *jt_range;
|
|
if (!(jt= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
|
|
!(jt_range= new JOIN_TAB_RANGE))
|
|
goto error;
|
|
jt_range->start= jt;
|
|
jt_range->end= jt + sjm->tables;
|
|
join_tab_ranges.push_back(jt_range, thd->mem_root);
|
|
j->bush_children= jt_range;
|
|
sjm_nest_end= jt + sjm->tables;
|
|
sjm_nest_root= j;
|
|
|
|
j= jt;
|
|
}
|
|
|
|
*j= *cur_pos->table;
|
|
|
|
j->bush_root_tab= sjm_nest_root;
|
|
|
|
form= table[tablenr]= j->table;
|
|
form->reginfo.join_tab=j;
|
|
DBUG_PRINT("info",("type: %d", j->type));
|
|
if (j->type == JT_CONST)
|
|
goto loop_end; // Handled in make_join_stat..
|
|
|
|
j->loosescan_match_tab= NULL; //non-nulls will be set later
|
|
j->inside_loosescan_range= FALSE;
|
|
j->ref.key = -1;
|
|
j->ref.key_parts=0;
|
|
|
|
if (j->type == JT_SYSTEM)
|
|
goto loop_end;
|
|
|
|
if (!(keyuse= cur_pos->key))
|
|
{
|
|
if (cur_pos->type == JT_NEXT) // Forced index
|
|
{
|
|
j->type= JT_NEXT;
|
|
j->index= cur_pos->forced_index;
|
|
}
|
|
else
|
|
j->type= JT_ALL;
|
|
if (cur_pos->use_join_buffer &&
|
|
tablenr != const_tables)
|
|
full_join= 1;
|
|
}
|
|
if ((j->type == JT_REF || j->type == JT_EQ_REF) &&
|
|
is_hash_join_key_no(j->ref.key))
|
|
hash_join= TRUE;
|
|
|
|
j->range_rowid_filter_info=
|
|
cur_pos->range_rowid_filter_info;
|
|
|
|
/*
|
|
Save records_read in JOIN_TAB so that select_describe()/etc don't have
|
|
to access join->best_positions[].
|
|
*/
|
|
j->records_init= cur_pos->records_init;
|
|
j->records_read= cur_pos->records_read;
|
|
j->records_out= cur_pos->records_out;
|
|
j->join_read_time= cur_pos->read_time;
|
|
j->join_loops= cur_pos->loops;
|
|
|
|
loop_end:
|
|
j->cond_selectivity= cur_pos->cond_selectivity;
|
|
DBUG_ASSERT(j->cond_selectivity <= 1.0);
|
|
crash_if_first_double_is_bigger(j->records_out,
|
|
j->records_init *
|
|
(j->range_rowid_filter_info ?
|
|
j->range_rowid_filter_info->selectivity :
|
|
1.0));
|
|
|
|
map2table[j->table->tablenr]= j;
|
|
|
|
/* If we've reached the end of sjm nest, switch back to main sequence */
|
|
if (j + 1 == sjm_nest_end)
|
|
{
|
|
j->last_leaf_in_bush= TRUE;
|
|
j= sjm_nest_root;
|
|
sjm_nest_root= NULL;
|
|
sjm_nest_end= NULL;
|
|
}
|
|
}
|
|
root_range->end= j;
|
|
|
|
used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read
|
|
for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
|
|
{
|
|
if (j->bush_children)
|
|
j= j->bush_children->start;
|
|
|
|
used_tables|= j->table->map;
|
|
if (j->type != JT_CONST && j->type != JT_SYSTEM)
|
|
{
|
|
if ((keyuse= best_positions[tablenr].key) &&
|
|
create_ref_for_key(this, j, keyuse, TRUE, used_tables))
|
|
goto error; // Something went wrong
|
|
}
|
|
if (j->last_leaf_in_bush)
|
|
j= j->bush_root_tab;
|
|
}
|
|
|
|
top_join_tab_count= (uint)(join_tab_ranges.head()->end -
|
|
join_tab_ranges.head()->start);
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
print_final_join_order(this);
|
|
|
|
update_depend_map(this);
|
|
DBUG_RETURN(0);
|
|
|
|
error:
|
|
/* join_tab was not correctly setup. Don't use it */
|
|
join_tab= 0;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/**
|
|
Create a descriptor of hash join key to access a given join table
|
|
|
|
@param join join which the join table belongs to
|
|
@param join_tab the join table to access
|
|
@param org_keyuse beginning of the key uses to join this table
|
|
@param used_tables bitmap of the previous tables
|
|
|
|
@details
|
|
This function first finds key uses that can be utilized by the hash join
|
|
algorithm to join join_tab to the previous tables marked in the bitmap
|
|
used_tables. The tested key uses are taken from the array of all key uses
|
|
for 'join' starting from the position org_keyuse. After all interesting key
|
|
uses have been found the function builds a descriptor of the corresponding
|
|
key that is used by the hash join algorithm would it be chosen to join
|
|
the table join_tab.
|
|
|
|
@retval FALSE the descriptor for a hash join key is successfully created
|
|
@retval TRUE otherwise
|
|
*/
|
|
|
|
static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
|
|
KEYUSE *org_keyuse, table_map used_tables)
|
|
{
|
|
KEY *keyinfo;
|
|
KEY_PART_INFO *key_part_info;
|
|
KEYUSE *keyuse= org_keyuse;
|
|
uint key_parts= 0;
|
|
THD *thd= join->thd;
|
|
TABLE *table= join_tab->table;
|
|
bool first_keyuse= TRUE;
|
|
DBUG_ENTER("create_hj_key_for_table");
|
|
|
|
do
|
|
{
|
|
if (!(~used_tables & keyuse->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) &&
|
|
are_tables_local(join_tab, keyuse->used_tables))
|
|
{
|
|
if (first_keyuse)
|
|
{
|
|
key_parts++;
|
|
}
|
|
else
|
|
{
|
|
KEYUSE *curr= org_keyuse;
|
|
for( ; curr < keyuse; curr++)
|
|
{
|
|
if (curr->keypart == keyuse->keypart &&
|
|
!(~used_tables & curr->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join,
|
|
curr) &&
|
|
are_tables_local(join_tab, curr->used_tables))
|
|
break;
|
|
}
|
|
if (curr == keyuse)
|
|
key_parts++;
|
|
}
|
|
}
|
|
first_keyuse= FALSE;
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->is_for_hash_join());
|
|
if (!key_parts)
|
|
DBUG_RETURN(TRUE);
|
|
/* This memory is allocated only once for the joined table join_tab */
|
|
if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) ||
|
|
!(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)*
|
|
key_parts)))
|
|
DBUG_RETURN(TRUE);
|
|
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts;
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
keyinfo->key_part= key_part_info;
|
|
keyinfo->key_length=0;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
|
keyinfo->flags= HA_GENERATED_KEY;
|
|
keyinfo->is_statistics_from_stat_tables= FALSE;
|
|
keyinfo->name.str= "$hj";
|
|
keyinfo->name.length= 3;
|
|
keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts);
|
|
if (!keyinfo->rec_per_key)
|
|
DBUG_RETURN(TRUE);
|
|
keyinfo->key_part= key_part_info;
|
|
|
|
first_keyuse= TRUE;
|
|
keyuse= org_keyuse;
|
|
do
|
|
{
|
|
if (!(~used_tables & keyuse->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) &&
|
|
are_tables_local(join_tab, keyuse->used_tables))
|
|
{
|
|
bool add_key_part= TRUE;
|
|
if (!first_keyuse)
|
|
{
|
|
for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++)
|
|
{
|
|
if (curr->keypart == keyuse->keypart &&
|
|
!(~used_tables & curr->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join,
|
|
curr) &&
|
|
are_tables_local(join_tab, curr->used_tables))
|
|
{
|
|
keyuse->keypart= NO_KEYPART;
|
|
add_key_part= FALSE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (add_key_part)
|
|
{
|
|
Field *field= table->field[keyuse->keypart];
|
|
uint fieldnr= keyuse->keypart+1;
|
|
table->create_key_part_by_field(key_part_info, field, fieldnr);
|
|
keyinfo->key_length += key_part_info->store_length;
|
|
key_part_info++;
|
|
}
|
|
}
|
|
first_keyuse= FALSE;
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->is_for_hash_join());
|
|
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
|
keyinfo->ext_key_part_map= 0;
|
|
|
|
join_tab->hj_key= keyinfo;
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/*
|
|
Check if a set of tables specified by used_tables can be accessed when
|
|
we're doing scan on join_tab jtab.
|
|
*/
|
|
static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables)
|
|
{
|
|
if (jtab->bush_root_tab)
|
|
{
|
|
/*
|
|
jtab is inside execution join nest. We may not refer to outside tables,
|
|
except the const tables.
|
|
*/
|
|
table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables |
|
|
jtab->join->const_table_map |
|
|
OUTER_REF_TABLE_BIT;
|
|
return !MY_TEST(used_tables & ~local_tables);
|
|
}
|
|
|
|
/*
|
|
If we got here then jtab is at top level.
|
|
- all other tables at top level are accessible,
|
|
- tables in join nests are accessible too, because all their columns that
|
|
are needed at top level will be unpacked when scanning the
|
|
materialization table.
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
|
|
KEYUSE *org_keyuse, bool allow_full_scan,
|
|
table_map used_tables)
|
|
{
|
|
uint keyparts, length, key;
|
|
TABLE *table;
|
|
KEY *keyinfo;
|
|
KEYUSE *keyuse= org_keyuse;
|
|
bool ftkey= (keyuse->keypart == FT_KEYPART);
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("create_ref_for_key");
|
|
|
|
/* Use best key from find_best */
|
|
table= j->table;
|
|
key= keyuse->key;
|
|
if (!is_hash_join_key_no(key))
|
|
keyinfo= table->key_info+key;
|
|
else
|
|
{
|
|
if (create_hj_key_for_table(join, j, org_keyuse, used_tables))
|
|
DBUG_RETURN(TRUE);
|
|
keyinfo= j->hj_key;
|
|
}
|
|
|
|
if (ftkey)
|
|
{
|
|
Item_func_match *ifm=(Item_func_match *)keyuse->val;
|
|
|
|
length=0;
|
|
keyparts=1;
|
|
ifm->join_key=1;
|
|
}
|
|
else
|
|
{
|
|
keyparts=length=0;
|
|
uint found_part_ref_or_null= 0;
|
|
/*
|
|
Calculate length for the used key
|
|
Stop if there is a missing key part or when we find second key_part
|
|
with KEY_OPTIMIZE_REF_OR_NULL
|
|
*/
|
|
do
|
|
{
|
|
if (!(~used_tables & keyuse->used_tables) &&
|
|
(!keyuse->validity_ref || *keyuse->validity_ref) &&
|
|
j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse))
|
|
{
|
|
if (are_tables_local(j, keyuse->val->used_tables()))
|
|
{
|
|
if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) ||
|
|
(!is_hash_join_key_no(key) && keyparts == keyuse->keypart &&
|
|
!(found_part_ref_or_null & keyuse->optimize)))
|
|
{
|
|
length+= keyinfo->key_part[keyparts].store_length;
|
|
keyparts++;
|
|
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
|
|
}
|
|
}
|
|
}
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->key == key);
|
|
|
|
if (!keyparts && allow_full_scan)
|
|
{
|
|
/* It's a LooseIndexScan strategy scanning whole index */
|
|
j->type= JT_ALL; // TODO: Check if this should be JT_NEXT
|
|
j->index= key;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
DBUG_ASSERT(length > 0);
|
|
DBUG_ASSERT(keyparts != 0);
|
|
} /* not ftkey */
|
|
|
|
/* set up fieldref */
|
|
j->ref.key_parts= keyparts;
|
|
j->ref.key_length= length;
|
|
j->ref.key= (int) key;
|
|
if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
|
|
!(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
|
|
(keyparts+1)))) ||
|
|
!(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
|
|
!(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
|
|
{
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length);
|
|
j->ref.key_err=1;
|
|
j->ref.has_record= FALSE;
|
|
j->ref.null_rejecting= 0;
|
|
j->ref.disable_cache= FALSE;
|
|
j->ref.null_ref_part= NO_REF_PART;
|
|
j->ref.const_ref_part_map= 0;
|
|
j->ref.uses_splitting= FALSE;
|
|
keyuse=org_keyuse;
|
|
|
|
store_key **ref_key= j->ref.key_copy;
|
|
uchar *key_buff=j->ref.key_buff, *null_ref_key= 0;
|
|
uint null_ref_part= NO_REF_PART;
|
|
bool keyuse_uses_no_tables= TRUE;
|
|
uint not_null_keyparts= 0;
|
|
if (ftkey)
|
|
{
|
|
j->ref.items[0]=((Item_func*)(keyuse->val))->key_item();
|
|
/* Predicates pushed down into subquery can't be used FT access */
|
|
j->ref.cond_guards[0]= NULL;
|
|
if (keyuse->used_tables)
|
|
DBUG_RETURN(TRUE); // not supported yet. SerG
|
|
|
|
j->type=JT_FT;
|
|
}
|
|
else
|
|
{
|
|
uint i;
|
|
for (i=0 ; i < keyparts ; keyuse++,i++)
|
|
{
|
|
while (((~used_tables) & keyuse->used_tables) ||
|
|
(keyuse->validity_ref && !(*keyuse->validity_ref)) ||
|
|
!j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) ||
|
|
keyuse->keypart == NO_KEYPART ||
|
|
(keyuse->keypart !=
|
|
(is_hash_join_key_no(key) ?
|
|
keyinfo->key_part[i].field->field_index : i)) ||
|
|
!are_tables_local(j, keyuse->val->used_tables()))
|
|
keyuse++; /* Skip other parts */
|
|
|
|
uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit);
|
|
j->ref.items[i]=keyuse->val; // Save for cond removal
|
|
j->ref.cond_guards[i]= keyuse->cond_guard;
|
|
|
|
if (!keyuse->val->maybe_null() || keyuse->null_rejecting)
|
|
not_null_keyparts++;
|
|
/*
|
|
Set ref.null_rejecting to true only if we are going to inject a
|
|
"keyuse->val IS NOT NULL" predicate.
|
|
*/
|
|
Item *real= (keyuse->val)->real_item();
|
|
if (keyuse->null_rejecting && (real->type() == Item::FIELD_ITEM) &&
|
|
((Item_field*)real)->field->maybe_null())
|
|
j->ref.null_rejecting|= (key_part_map)1 << i;
|
|
|
|
keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
|
|
j->ref.uses_splitting |= (keyuse->validity_ref != NULL);
|
|
/*
|
|
We don't want to compute heavy expressions in EXPLAIN, an example would
|
|
select * from t1 where t1.key=(select thats very heavy);
|
|
|
|
(select thats very heavy) => is a constant here
|
|
eg: (select avg(order_cost) from orders) => constant but expensive
|
|
*/
|
|
if (!keyuse->val->used_tables() && !thd->lex->describe)
|
|
{ // Compare against constant
|
|
store_key_item tmp(thd,
|
|
keyinfo->key_part[i].field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
keyinfo->key_part[i].length,
|
|
keyuse->val,
|
|
FALSE);
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(TRUE);
|
|
tmp.copy(thd);
|
|
j->ref.const_ref_part_map |= key_part_map(1) << i ;
|
|
}
|
|
else
|
|
{
|
|
*ref_key++= get_store_key(thd,
|
|
keyuse,join->const_table_map,
|
|
&keyinfo->key_part[i],
|
|
key_buff, maybe_null);
|
|
if (!keyuse->val->used_tables())
|
|
j->ref.const_ref_part_map |= key_part_map(1) << i ;
|
|
}
|
|
/*
|
|
Remember if we are going to use REF_OR_NULL
|
|
But only if field _really_ can be null i.e. we force JT_REF
|
|
instead of JT_REF_OR_NULL in case if field can't be null
|
|
*/
|
|
if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
|
|
{
|
|
null_ref_key= key_buff;
|
|
null_ref_part= i;
|
|
}
|
|
key_buff+= keyinfo->key_part[i].store_length;
|
|
}
|
|
} /* not ftkey */
|
|
*ref_key=0; // end_marker
|
|
if (j->type == JT_FT)
|
|
DBUG_RETURN(0);
|
|
ulong key_flags= j->table->actual_key_flags(keyinfo);
|
|
if (j->type == JT_CONST)
|
|
j->table->const_table= 1;
|
|
else if (!((keyparts == keyinfo->user_defined_key_parts &&
|
|
(
|
|
(key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
|
|
/* Unique key and all keyparts are NULL rejecting */
|
|
((key_flags & HA_NOSAME) && keyparts == not_null_keyparts)
|
|
)) ||
|
|
/* true only for extended keys */
|
|
(MY_TEST(key_flags & HA_EXT_NOSAME) &&
|
|
keyparts == keyinfo->ext_key_parts) ) ||
|
|
null_ref_key)
|
|
{
|
|
/* Must read with repeat */
|
|
j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
|
|
j->ref.null_ref_key= null_ref_key;
|
|
j->ref.null_ref_part= null_ref_part;
|
|
}
|
|
else if (keyuse_uses_no_tables)
|
|
{
|
|
/*
|
|
This happen if we are using a constant expression in the ON part
|
|
of an LEFT JOIN.
|
|
SELECT * FROM a LEFT JOIN b ON b.key=30
|
|
Here we should not mark the table as a 'const' as a field may
|
|
have a 'normal' value or a NULL value.
|
|
*/
|
|
j->type=JT_CONST;
|
|
}
|
|
else
|
|
j->type=JT_EQ_REF;
|
|
|
|
if (j->type == JT_EQ_REF)
|
|
j->read_record.unlock_row= join_read_key_unlock_row;
|
|
else if (j->type == JT_CONST)
|
|
j->read_record.unlock_row= join_const_unlock_row;
|
|
else
|
|
j->read_record.unlock_row= rr_unlock_row;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
|
|
static store_key *
|
|
get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
|
|
KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
|
|
{
|
|
if (!((~used_tables) & keyuse->used_tables)) // if const item
|
|
{
|
|
return new store_key_const_item(thd,
|
|
key_part->field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
key_part->length,
|
|
keyuse->val);
|
|
}
|
|
else if (keyuse->val->type() == Item::FIELD_ITEM ||
|
|
(keyuse->val->type() == Item::REF_ITEM &&
|
|
((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
|
|
(*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
|
|
Item_ref::DIRECT_REF) ||
|
|
((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) &&
|
|
keyuse->val->real_item()->type() == Item::FIELD_ITEM))
|
|
return new store_key_field(thd,
|
|
key_part->field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
key_part->length,
|
|
((Item_field*) keyuse->val->real_item())->field,
|
|
keyuse->val->real_item()->full_name());
|
|
|
|
return new store_key_item(thd,
|
|
key_part->field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
key_part->length,
|
|
keyuse->val, FALSE);
|
|
}
|
|
|
|
|
|
inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
|
|
{
|
|
if (*e1)
|
|
{
|
|
if (!e2)
|
|
return;
|
|
Item *res;
|
|
if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2)))
|
|
{
|
|
res->fix_fields(thd, 0);
|
|
res->update_used_tables();
|
|
*e1= res;
|
|
}
|
|
}
|
|
else
|
|
*e1= e2;
|
|
}
|
|
|
|
|
|
/**
|
|
Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions
|
|
we've inferred from ref/eq_ref access performed.
|
|
|
|
This function is a part of "Early NULL-values filtering for ref access"
|
|
optimization.
|
|
|
|
Example of this optimization:
|
|
For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
|
|
and plan " any-access(t1), ref(t2.key=t1.field) " @n
|
|
add "t1.field IS NOT NULL" to t1's table condition. @n
|
|
|
|
Description of the optimization:
|
|
|
|
We look through equalities chosen to perform ref/eq_ref access,
|
|
pick equalities that have form "tbl.part_of_key = othertbl.field"
|
|
(where othertbl is a non-const table and othertbl.field may be NULL)
|
|
and add them to conditions on correspoding tables (othertbl in this
|
|
example).
|
|
|
|
Exception from that is the case when referred_tab->join != join.
|
|
I.e. don't add NOT NULL constraints from any embedded subquery.
|
|
Consider this query:
|
|
@code
|
|
SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
|
|
WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
|
|
@endocde
|
|
Here condition A.f3 IS NOT NULL is going to be added to the WHERE
|
|
condition of the embedding query.
|
|
Another example:
|
|
SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
|
|
AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
|
|
WHERE t12.b = t10.a ));
|
|
Here condition t10.a IS NOT NULL is going to be added.
|
|
In both cases addition of NOT NULL condition will erroneously reject
|
|
some rows of the result set.
|
|
referred_tab->join != join constraint would disallow such additions.
|
|
|
|
This optimization doesn't affect the choices that ref, range, or join
|
|
optimizer make. This was intentional because this was added after 4.1
|
|
was GA.
|
|
|
|
Implementation overview
|
|
1. update_ref_and_keys() accumulates info about null-rejecting
|
|
predicates in in KEY_FIELD::null_rejecting
|
|
1.1 add_key_part saves these to KEYUSE.
|
|
2. create_ref_for_key copies them to TABLE_REF.
|
|
3. add_not_null_conds adds "x IS NOT NULL" to join_tab->select_cond of
|
|
appropiate JOIN_TAB members.
|
|
*/
|
|
|
|
static void add_not_null_conds(JOIN *join)
|
|
{
|
|
JOIN_TAB *tab;
|
|
DBUG_ENTER("add_not_null_conds");
|
|
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->type == JT_REF || tab->type == JT_EQ_REF ||
|
|
tab->type == JT_REF_OR_NULL)
|
|
{
|
|
for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
|
|
{
|
|
if (tab->ref.null_rejecting & ((key_part_map)1 << keypart))
|
|
{
|
|
Item *item= tab->ref.items[keypart];
|
|
Item *notnull;
|
|
Item *real= item->real_item();
|
|
if (real->can_eval_in_optimize() && real->type() != Item::FIELD_ITEM)
|
|
{
|
|
/*
|
|
It could be constant instead of field after constant
|
|
propagation.
|
|
*/
|
|
continue;
|
|
}
|
|
DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
|
|
Item_field *not_null_item= (Item_field*)real;
|
|
JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
|
|
/*
|
|
For UPDATE queries such as:
|
|
UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
|
|
not_null_item is the t1.f1, but it's referred_tab is 0.
|
|
*/
|
|
if (!(notnull= new (join->thd->mem_root)
|
|
Item_func_isnotnull(join->thd, item)))
|
|
DBUG_VOID_RETURN;
|
|
/*
|
|
We need to do full fix_fields() call here in order to have correct
|
|
notnull->const_item(). This is needed e.g. by test_quick_select
|
|
when it is called from make_join_select after this function is
|
|
called.
|
|
*/
|
|
if (notnull->fix_fields(join->thd, ¬null))
|
|
DBUG_VOID_RETURN;
|
|
|
|
DBUG_EXECUTE("where",print_where(notnull,
|
|
(referred_tab ?
|
|
referred_tab->table->alias.c_ptr() :
|
|
"outer_ref_cond"),
|
|
QT_ORDINARY););
|
|
if (!tab->first_inner)
|
|
{
|
|
COND *new_cond= (referred_tab && referred_tab->join == join) ?
|
|
referred_tab->select_cond :
|
|
join->outer_ref_cond;
|
|
add_cond_and_fix(join->thd, &new_cond, notnull);
|
|
if (referred_tab && referred_tab->join == join)
|
|
referred_tab->set_select_cond(new_cond, __LINE__);
|
|
else
|
|
join->outer_ref_cond= new_cond;
|
|
}
|
|
else
|
|
add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
Build a predicate guarded by match variables for embedding outer joins.
|
|
The function recursively adds guards for predicate cond
|
|
assending from tab to the first inner table next embedding
|
|
nested outer join and so on until it reaches root_tab
|
|
(root_tab can be 0).
|
|
|
|
In other words:
|
|
add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should
|
|
wrap parts of WHERE. The idea is that the part of WHERE should be only
|
|
evaluated after we've finished figuring out whether outer joins.
|
|
^^^ is the above correct?
|
|
|
|
@param tab the first inner table for most nested outer join
|
|
@param cond the predicate to be guarded (must be set)
|
|
@param root_tab the first inner table to stop
|
|
|
|
@return
|
|
- pointer to the guarded predicate, if success
|
|
- 0, otherwise
|
|
*/
|
|
|
|
static COND*
|
|
add_found_match_trig_cond(THD *thd, JOIN_TAB *tab, COND *cond,
|
|
JOIN_TAB *root_tab)
|
|
{
|
|
COND *tmp;
|
|
DBUG_ASSERT(cond != 0);
|
|
if (tab == root_tab)
|
|
return cond;
|
|
if ((tmp= add_found_match_trig_cond(thd, tab->first_upper, cond, root_tab)))
|
|
tmp= new (thd->mem_root) Item_func_trig_cond(thd, tmp, &tab->found);
|
|
if (tmp)
|
|
{
|
|
tmp->quick_fix_field();
|
|
tmp->update_used_tables();
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
|
|
bool TABLE_LIST::is_active_sjm()
|
|
{
|
|
return sj_mat_info && sj_mat_info->is_used;
|
|
}
|
|
|
|
|
|
/**
|
|
Fill in outer join related info for the execution plan structure.
|
|
|
|
For each outer join operation left after simplification of the
|
|
original query the function set up the following pointers in the linear
|
|
structure join->join_tab representing the selected execution plan.
|
|
The first inner table t0 for the operation is set to refer to the last
|
|
inner table tk through the field t0->last_inner.
|
|
Any inner table ti for the operation are set to refer to the first
|
|
inner table ti->first_inner.
|
|
The first inner table t0 for the operation is set to refer to the
|
|
first inner table of the embedding outer join operation, if there is any,
|
|
through the field t0->first_upper.
|
|
The on expression for the outer join operation is attached to the
|
|
corresponding first inner table through the field t0->on_expr_ref.
|
|
Here ti are structures of the JOIN_TAB type.
|
|
|
|
In other words, for each join tab, set
|
|
- first_inner
|
|
- last_inner
|
|
- first_upper
|
|
- on_expr_ref, cond_equal
|
|
|
|
EXAMPLE. For the query:
|
|
@code
|
|
SELECT * FROM t1
|
|
LEFT JOIN
|
|
(t2, t3 LEFT JOIN t4 ON t3.a=t4.a)
|
|
ON (t1.a=t2.a AND t1.b=t3.b)
|
|
WHERE t1.c > 5,
|
|
@endcode
|
|
|
|
given the execution plan with the table order t1,t2,t3,t4
|
|
is selected, the following references will be set;
|
|
t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2]
|
|
t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2],
|
|
on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to
|
|
*t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref.
|
|
|
|
@param join reference to the info fully describing the query
|
|
|
|
@note
|
|
The function assumes that the simplification procedure has been
|
|
already applied to the join query (see simplify_joins).
|
|
This function can be called only after the execution plan
|
|
has been chosen.
|
|
*/
|
|
|
|
static bool
|
|
make_outerjoin_info(JOIN *join)
|
|
{
|
|
DBUG_ENTER("make_outerjoin_info");
|
|
|
|
/*
|
|
Create temp. tables for merged SJ-Materialization nests. We need to do
|
|
this now, because further code relies on tab->table and
|
|
tab->table->pos_in_table_list being set.
|
|
*/
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->bush_children)
|
|
{
|
|
if (setup_sj_materialization_part1(tab))
|
|
DBUG_RETURN(TRUE);
|
|
tab->table->reginfo.join_tab= tab;
|
|
}
|
|
}
|
|
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
TABLE *table= tab->table;
|
|
TABLE_LIST *tbl= table->pos_in_table_list;
|
|
TABLE_LIST *embedding= tbl->embedding;
|
|
|
|
if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT))
|
|
{
|
|
/*
|
|
Table tab is the only one inner table for outer join.
|
|
(Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
|
|
is in the query above.)
|
|
*/
|
|
tab->last_inner= tab->first_inner= tab;
|
|
tab->on_expr_ref= &tbl->on_expr;
|
|
tab->cond_equal= tbl->cond_equal;
|
|
if (embedding && !embedding->is_active_sjm())
|
|
tab->first_upper= embedding->nested_join->first_nested;
|
|
}
|
|
else if (!embedding)
|
|
tab->table->reginfo.not_exists_optimize= 0;
|
|
|
|
for ( ; embedding ; embedding= embedding->embedding)
|
|
{
|
|
if (embedding->is_active_sjm())
|
|
{
|
|
/*
|
|
We're trying to walk out of an SJ-Materialization nest.
|
|
Don't do this.
|
|
*/
|
|
break;
|
|
}
|
|
/* Ignore sj-nests: */
|
|
if (!(embedding->on_expr && embedding->outer_join))
|
|
{
|
|
tab->table->reginfo.not_exists_optimize= 0;
|
|
continue;
|
|
}
|
|
NESTED_JOIN *nested_join= embedding->nested_join;
|
|
if (!nested_join->counter)
|
|
{
|
|
/*
|
|
Table tab is the first inner table for nested_join.
|
|
Save reference to it in the nested join structure.
|
|
*/
|
|
nested_join->first_nested= tab;
|
|
tab->on_expr_ref= &embedding->on_expr;
|
|
tab->cond_equal= tbl->cond_equal;
|
|
if (embedding->embedding)
|
|
tab->first_upper= embedding->embedding->nested_join->first_nested;
|
|
}
|
|
if (!tab->first_inner)
|
|
tab->first_inner= nested_join->first_nested;
|
|
if (++nested_join->counter < nested_join->n_tables)
|
|
break;
|
|
/* Table tab is the last inner table for nested join. */
|
|
nested_join->first_nested->last_inner= tab;
|
|
}
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Build a temporary join prefix condition for JOIN_TABs up to the last tab
|
|
|
|
@param ret OUT the condition is returned here
|
|
|
|
@return
|
|
false OK
|
|
true Out of memory
|
|
|
|
@detail
|
|
Walk through the join prefix (from the first table to the last_tab) and
|
|
build a condition:
|
|
|
|
join_tab_1_cond AND join_tab_2_cond AND ... AND last_tab_conds
|
|
|
|
The condition is only intended to be used by the range optimizer, so:
|
|
- it is not normalized (can have Item_cond_and inside another
|
|
Item_cond_and)
|
|
- it does not include join->exec_const_cond and other similar conditions.
|
|
*/
|
|
|
|
bool build_tmp_join_prefix_cond(JOIN *join, JOIN_TAB *last_tab, Item **ret)
|
|
{
|
|
THD *const thd= join->thd;
|
|
Item_cond_and *all_conds= NULL;
|
|
|
|
Item *res= NULL;
|
|
|
|
// Pick the ON-expression. Use the same logic as in get_sargable_cond():
|
|
if (last_tab->on_expr_ref)
|
|
res= *last_tab->on_expr_ref;
|
|
else if (last_tab->table->pos_in_table_list &&
|
|
last_tab->table->pos_in_table_list->embedding &&
|
|
!last_tab->table->pos_in_table_list->embedding->sj_on_expr)
|
|
{
|
|
res= last_tab->table->pos_in_table_list->embedding->on_expr;
|
|
}
|
|
|
|
for (JOIN_TAB *tab= first_depth_first_tab(join);
|
|
tab;
|
|
tab= next_depth_first_tab(join, tab))
|
|
{
|
|
if (tab->select_cond)
|
|
{
|
|
if (!res)
|
|
res= tab->select_cond;
|
|
else
|
|
{
|
|
if (!all_conds)
|
|
{
|
|
if (!(all_conds= new (thd->mem_root)Item_cond_and(thd, res,
|
|
tab->select_cond)))
|
|
return true;
|
|
res= all_conds;
|
|
}
|
|
else
|
|
all_conds->add(tab->select_cond, thd->mem_root);
|
|
}
|
|
}
|
|
if (tab == last_tab)
|
|
break;
|
|
}
|
|
*ret= all_conds? all_conds: res;
|
|
return false;
|
|
}
|
|
|
|
|
|
static bool
|
|
make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
|
|
{
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("make_join_select");
|
|
if (select)
|
|
{
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_conditions(thd, "attaching_conditions_to_tables");
|
|
Json_writer_array trace_attached_comp(thd,
|
|
"attached_conditions_computation");
|
|
add_not_null_conds(join);
|
|
table_map used_tables;
|
|
/*
|
|
Step #1: Extract constant condition
|
|
- Extract and check the constant part of the WHERE
|
|
- Extract constant parts of ON expressions from outer
|
|
joins and attach them appropriately.
|
|
*/
|
|
if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */
|
|
{ /* there may be a select without a cond. */
|
|
if (join->table_count > 1)
|
|
cond->update_used_tables(); // Tablenr may have changed
|
|
|
|
/*
|
|
Extract expressions that depend on constant tables
|
|
1. Const part of the join's WHERE clause can be checked immediately
|
|
and if it is not satisfied then the join has empty result
|
|
2. Constant parts of outer joins' ON expressions must be attached
|
|
there inside the triggers.
|
|
*/
|
|
{ // Check const tables
|
|
Item* const_cond= NULL;
|
|
const_cond= make_cond_for_table(thd, cond,
|
|
join->const_table_map,
|
|
(table_map) 0, -1, FALSE, FALSE);
|
|
if (!const_cond && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
|
|
/* Add conditions added by add_not_null_conds(). */
|
|
for (uint i= 0 ; i < join->const_tables ; i++)
|
|
add_cond_and_fix(thd, &const_cond,
|
|
join->join_tab[i].select_cond);
|
|
|
|
DBUG_EXECUTE("where",print_where(const_cond,"constants",
|
|
QT_ORDINARY););
|
|
|
|
if (const_cond)
|
|
{
|
|
Json_writer_object trace_const_cond(thd);
|
|
trace_const_cond.add("condition_on_constant_tables", const_cond);
|
|
if (const_cond->is_expensive())
|
|
{
|
|
if (unlikely(trace_const_cond.trace_started()))
|
|
trace_const_cond.
|
|
add("evalualted", "false").
|
|
add("cause", "expensive cond");
|
|
}
|
|
else
|
|
{
|
|
bool const_cond_result;
|
|
{
|
|
Json_writer_array a(thd, "computing_condition");
|
|
const_cond_result= const_cond->val_int() != 0;
|
|
}
|
|
if (!const_cond_result)
|
|
{
|
|
DBUG_PRINT("info",("Found impossible WHERE condition"));
|
|
if (unlikely(trace_const_cond.trace_started()))
|
|
trace_const_cond.
|
|
add("evalualted", "true").
|
|
add("found", "impossible where");
|
|
join->exec_const_cond= NULL;
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
join->exec_const_cond= const_cond;
|
|
}
|
|
|
|
if (join->table_count != join->const_tables)
|
|
{
|
|
COND *outer_ref_cond= make_cond_for_table(thd, cond,
|
|
join->const_table_map |
|
|
OUTER_REF_TABLE_BIT,
|
|
OUTER_REF_TABLE_BIT,
|
|
-1, FALSE, FALSE);
|
|
if (outer_ref_cond)
|
|
{
|
|
add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond);
|
|
join->outer_ref_cond= outer_ref_cond;
|
|
|
|
Json_writer_object trace(thd);
|
|
trace.add("outer_ref_cond", outer_ref_cond);
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
else
|
|
{
|
|
COND *pseudo_bits_cond=
|
|
make_cond_for_table(thd, cond,
|
|
join->const_table_map |
|
|
PSEUDO_TABLE_BITS,
|
|
PSEUDO_TABLE_BITS,
|
|
-1, FALSE, FALSE);
|
|
if (pseudo_bits_cond)
|
|
{
|
|
add_cond_and_fix(thd, &pseudo_bits_cond,
|
|
join->pseudo_bits_cond);
|
|
join->pseudo_bits_cond= pseudo_bits_cond;
|
|
|
|
Json_writer_object trace(thd);
|
|
trace.add("pseudo_bits_cond", pseudo_bits_cond);
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Step #2: Extract WHERE/ON parts
|
|
*/
|
|
|
|
uint i;
|
|
for (i= join->top_join_tab_count - 1; i >= join->const_tables; i--)
|
|
{
|
|
if (!join->join_tab[i].bush_children)
|
|
break;
|
|
}
|
|
uint last_top_base_tab_idx= i;
|
|
|
|
table_map save_used_tables= 0;
|
|
used_tables=((select->const_tables=join->const_table_map) |
|
|
OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
|
|
JOIN_TAB *tab;
|
|
table_map current_map;
|
|
i= join->const_tables;
|
|
for (tab= first_depth_first_tab(join); tab;
|
|
tab= next_depth_first_tab(join, tab))
|
|
{
|
|
bool is_hj;
|
|
|
|
/*
|
|
first_inner is the X in queries like:
|
|
SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
|
|
*/
|
|
JOIN_TAB *first_inner_tab= tab->first_inner;
|
|
COND *tmp;
|
|
|
|
if (!tab->bush_children)
|
|
current_map= tab->table->map;
|
|
else
|
|
current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables;
|
|
|
|
/*
|
|
Tables that are within SJ-Materialization nests cannot have their
|
|
conditions referring to preceding non-const tables.
|
|
- If we're looking at the first SJM table, reset used_tables
|
|
to refer to only allowed tables
|
|
*/
|
|
if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info &&
|
|
tab->emb_sj_nest->sj_mat_info->is_used &&
|
|
!(used_tables & tab->emb_sj_nest->sj_inner_tables))
|
|
{
|
|
save_used_tables= used_tables;
|
|
used_tables= join->const_table_map | OUTER_REF_TABLE_BIT |
|
|
RAND_TABLE_BIT;
|
|
}
|
|
|
|
used_tables|=current_map;
|
|
|
|
if ((tab->type == JT_REF || tab->type == JT_RANGE) && tab->quick &&
|
|
(((uint) tab->ref.key == tab->quick->index &&
|
|
tab->ref.key_length < tab->quick->max_used_key_length) ||
|
|
(!is_hash_join_key_no(tab->ref.key) &&
|
|
tab->table->intersect_keys.is_set(tab->ref.key))))
|
|
{
|
|
/* Range uses longer key; Use this instead of ref on key */
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object ref_to_range(thd);
|
|
ref_to_range.
|
|
add("ref_to_range", true).
|
|
add("cause", "range uses longer key");
|
|
}
|
|
tab->type= JT_RANGE;
|
|
tab->use_quick=1;
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts=0; // Don't use ref key.
|
|
join->best_positions[i].records_read= rows2double(tab->quick->records);
|
|
|
|
/*
|
|
We will use join cache here : prevent sorting of the first
|
|
table only and sort at the end.
|
|
*/
|
|
if (i != join->const_tables &&
|
|
join->table_count > join->const_tables + 1 &&
|
|
join->best_positions[i].use_join_buffer)
|
|
join->full_join= 1;
|
|
}
|
|
|
|
tmp= NULL;
|
|
|
|
if (cond)
|
|
{
|
|
if (tab->bush_children)
|
|
{
|
|
// Reached the materialization tab
|
|
tmp= make_cond_after_sjm(thd, cond, cond, save_used_tables,
|
|
used_tables, /*inside_or_clause=*/FALSE);
|
|
used_tables= save_used_tables | used_tables;
|
|
save_used_tables= 0;
|
|
}
|
|
else
|
|
{
|
|
tmp= make_cond_for_table(thd, cond, used_tables, current_map, i,
|
|
FALSE, FALSE);
|
|
if (!tmp && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
|
|
if (tab == join->join_tab + last_top_base_tab_idx)
|
|
{
|
|
/*
|
|
This pushes conjunctive conditions of WHERE condition such that:
|
|
- their used_tables() contain RAND_TABLE_BIT
|
|
- the conditions does not refer to any fields
|
|
(such like rand() > 0.5)
|
|
*/
|
|
table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
|
|
COND *rand_cond= make_cond_for_table(thd, cond, used_tables,
|
|
rand_table_bit, -1,
|
|
FALSE, FALSE);
|
|
if (rand_cond)
|
|
add_cond_and_fix(thd, &tmp, rand_cond);
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
/* Add conditions added by add_not_null_conds(). */
|
|
if (tab->select_cond)
|
|
add_cond_and_fix(thd, &tmp, tab->select_cond);
|
|
}
|
|
|
|
is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) &&
|
|
(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
|
|
((join->max_allowed_join_cache_level+1)/2 == 2 ||
|
|
((join->max_allowed_join_cache_level+1)/2 > 2 &&
|
|
is_hash_join_key_no(tab->ref.key))) &&
|
|
(!tab->emb_sj_nest ||
|
|
join->allowed_semijoin_with_cache) &&
|
|
(!(tab->table->map & join->outer_join) ||
|
|
join->allowed_outer_join_with_cache);
|
|
|
|
if (cond && !tmp && tab->quick)
|
|
{ // Outer join
|
|
if ((tab->type != JT_ALL && tab->type != JT_RANGE) && !is_hj)
|
|
{
|
|
/*
|
|
Don't use the quick method
|
|
We come here in the case where we have 'key=constant' and
|
|
the test is removed by make_cond_for_table()
|
|
*/
|
|
delete tab->quick;
|
|
tab->quick= 0;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Hack to handle the case where we only refer to a table
|
|
in the ON part of an OUTER JOIN. In this case we want the code
|
|
below to check if we should use 'quick' instead.
|
|
*/
|
|
DBUG_PRINT("info", ("Item_int"));
|
|
tmp= (Item*) Item_true;
|
|
}
|
|
|
|
}
|
|
if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
|
|
tab->type == JT_EQ_REF || first_inner_tab)
|
|
{
|
|
DBUG_EXECUTE("where",print_where(tmp,
|
|
tab->table ?
|
|
tab->table->alias.c_ptr() :"sjm-nest",
|
|
QT_ORDINARY););
|
|
SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
|
|
thd->memdup((uchar*) select,
|
|
sizeof(*select)));
|
|
if (!sel)
|
|
DBUG_RETURN(1); // End of memory
|
|
/*
|
|
If tab is an inner table of an outer join operation,
|
|
add a match guard to the pushed down predicate.
|
|
The guard will turn the predicate on only after
|
|
the first match for outer tables is encountered.
|
|
*/
|
|
if (cond && tmp)
|
|
{
|
|
/*
|
|
Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
|
|
a cond, so neutralize the hack above.
|
|
*/
|
|
COND *tmp_cond;
|
|
if (!(tmp_cond= add_found_match_trig_cond(thd, first_inner_tab, tmp,
|
|
0)))
|
|
DBUG_RETURN(1);
|
|
sel->cond= tmp_cond;
|
|
tab->set_select_cond(tmp_cond, __LINE__);
|
|
/* Push condition to storage engine if this is enabled
|
|
and the condition is not guarded */
|
|
if (tab->table)
|
|
{
|
|
tab->table->file->pushed_cond= NULL;
|
|
if ((tab->table->file->ha_table_flags() &
|
|
HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
|
|
!first_inner_tab)
|
|
{
|
|
Json_writer_object wrap(thd);
|
|
Json_writer_object trace_cp(thd, "table_condition_pushdown");
|
|
trace_cp.add_table_name(tab->table);
|
|
|
|
COND *push_cond=
|
|
make_cond_for_table(thd, tmp_cond, current_map, current_map,
|
|
-1, FALSE, FALSE);
|
|
if (push_cond)
|
|
{
|
|
trace_cp.add("push_cond", push_cond);
|
|
/* Push condition to handler */
|
|
if (!tab->table->file->cond_push(push_cond))
|
|
tab->table->file->pushed_cond= push_cond;
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sel->cond= NULL;
|
|
tab->set_select_cond(NULL, __LINE__);
|
|
}
|
|
|
|
sel->head=tab->table;
|
|
DBUG_EXECUTE("where",
|
|
print_where(tmp,
|
|
tab->table ? tab->table->alias.c_ptr() :
|
|
"(sjm-nest)",
|
|
QT_ORDINARY););
|
|
if (tab->quick)
|
|
{
|
|
/* Use quick key read if it's a constant and it's not used
|
|
with key reading */
|
|
if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
|
|
tab->type != JT_FT &&
|
|
((tab->type != JT_CONST && tab->type != JT_REF) ||
|
|
(uint) tab->ref.key == tab->quick->index)) || is_hj)
|
|
{
|
|
DBUG_ASSERT(tab->quick->is_valid());
|
|
sel->quick=tab->quick; // Use value from get_quick_...
|
|
sel->quick_keys.clear_all();
|
|
sel->needed_reg.clear_all();
|
|
if (is_hj && tab->rowid_filter)
|
|
tab->clear_range_rowid_filter();
|
|
}
|
|
else
|
|
{
|
|
delete tab->quick;
|
|
}
|
|
tab->quick=0;
|
|
}
|
|
uint ref_key= (sel->head ?
|
|
(uint) sel->head->reginfo.join_tab->ref.key+1 :
|
|
0);
|
|
if (i == join->const_tables && ref_key)
|
|
{
|
|
if (!tab->const_keys.is_clear_all() &&
|
|
tab->table->reginfo.impossible_range)
|
|
DBUG_RETURN(1);
|
|
}
|
|
else if ((tab->type == JT_ALL || tab->type == JT_NEXT))
|
|
{
|
|
if (!tab->const_keys.is_clear_all() &&
|
|
tab->table->reginfo.impossible_range)
|
|
DBUG_RETURN(1); // Impossible range
|
|
/*
|
|
We plan to scan all rows either with table or index scan
|
|
Check again if we should use an index.
|
|
|
|
There are two cases:
|
|
1) There could be an index usage the refers to a previous
|
|
table that we didn't consider before, but could be consider
|
|
now as a "last resort". For example
|
|
SELECT * from t1,t2 where t1.a between t2.a and t2.b;
|
|
2) If the current table is the first non const table
|
|
and there is a limit it still possibly beneficial
|
|
to use the index even if the index range is big as
|
|
we can stop when we've found limit rows.
|
|
|
|
(1) - Don't switch the used index if we are using semi-join
|
|
LooseScan on this table. Using different index will not
|
|
produce the desired ordering and de-duplication.
|
|
*/
|
|
|
|
if (!tab->table->is_filled_at_execution() &&
|
|
!tab->loosescan_match_tab && // (1)
|
|
((cond && (!tab->keys.is_subset(tab->const_keys) &&
|
|
i > join->const_tables)) ||
|
|
(!tab->const_keys.is_clear_all() && i == join->const_tables &&
|
|
join->unit->lim.get_select_limit() <
|
|
join->best_positions[i].records_read &&
|
|
!(join->select_options & OPTION_FOUND_ROWS))))
|
|
{
|
|
/* Join with outer join condition */
|
|
COND *orig_cond=sel->cond;
|
|
|
|
if (build_tmp_join_prefix_cond(join, tab, &sel->cond))
|
|
return true;
|
|
|
|
/*
|
|
We can't call sel->cond->fix_fields,
|
|
as it will break tab->on_expr if it's AND condition
|
|
(fix_fields currently removes extra AND/OR levels).
|
|
Yet attributes of the just built condition are not needed.
|
|
Thus we call sel->cond->quick_fix_field for safety.
|
|
*/
|
|
if (sel->cond && !sel->cond->fixed())
|
|
sel->cond->quick_fix_field();
|
|
quick_select_return res;
|
|
|
|
if ((res= sel->test_quick_select(thd, tab->keys,
|
|
((used_tables & ~ current_map) |
|
|
OUTER_REF_TABLE_BIT),
|
|
(join->select_options &
|
|
OPTION_FOUND_ROWS ?
|
|
HA_POS_ERROR :
|
|
join->unit->lim.get_select_limit()),
|
|
0,
|
|
FALSE, FALSE, FALSE,
|
|
Item_func::BITMAP_ALL)) ==
|
|
SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
{
|
|
/*
|
|
Before reporting "Impossible WHERE" for the whole query
|
|
we have to check isn't it only "impossible ON" instead
|
|
*/
|
|
sel->cond=orig_cond;
|
|
if (!*tab->on_expr_ref ||
|
|
(res= sel->test_quick_select(thd, tab->keys,
|
|
used_tables & ~ current_map,
|
|
(join->select_options &
|
|
OPTION_FOUND_ROWS ?
|
|
HA_POS_ERROR :
|
|
join->unit->lim.get_select_limit()),
|
|
0, FALSE, FALSE, FALSE,
|
|
Item_func::BITMAP_NONE)) ==
|
|
SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
DBUG_RETURN(1); // Impossible WHERE
|
|
}
|
|
else
|
|
sel->cond=orig_cond;
|
|
|
|
if (res == SQL_SELECT::ERROR)
|
|
DBUG_RETURN(1); /* Some error in one of test_quick_select calls */
|
|
|
|
/* Fix for EXPLAIN */
|
|
if (sel->quick)
|
|
{
|
|
join->best_positions[i].records_read=
|
|
(double) sel->quick->records;
|
|
set_if_smaller(join->best_positions[i].records_out,
|
|
rows2double(sel->head->opt_range_condition_rows));
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
sel->head->opt_range_condition_rows may have been
|
|
updated to a smaller number than before by a call to
|
|
test_quick_select. This can happen even if the range
|
|
optimizer decided to not use the range (sel->quick was
|
|
not set).
|
|
*/
|
|
set_if_smaller(join->best_positions[i].records_out,
|
|
rows2double(sel->head->opt_range_condition_rows));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sel->needed_reg=tab->needed_reg;
|
|
}
|
|
sel->quick_keys= tab->table->opt_range_keys;
|
|
if (!sel->quick_keys.is_subset(tab->checked_keys) ||
|
|
!sel->needed_reg.is_subset(tab->checked_keys))
|
|
{
|
|
handler *file= tab->table->file;
|
|
/*
|
|
"Range checked for each record" is a "last resort" access method
|
|
that should only be used when the other option is a cross-product
|
|
join.
|
|
|
|
We use the following condition (it's approximate):
|
|
1. There are potential keys for (sel->needed_reg)
|
|
2. There were no possible ways to construct a quick select, or
|
|
the quick select would be more expensive than the full table
|
|
scan.
|
|
*/
|
|
tab->use_quick= (!sel->needed_reg.is_clear_all() &&
|
|
(sel->quick_keys.is_clear_all() ||
|
|
(sel->quick &&
|
|
sel->quick->read_time >
|
|
file->cost(file->ha_scan_and_compare_time(tab->table->file-> stats.records))))) ?
|
|
2 : 1;
|
|
sel->read_tables= used_tables & ~current_map;
|
|
sel->quick_keys.clear_all();
|
|
}
|
|
if (i != join->const_tables && tab->use_quick != 2 &&
|
|
!tab->first_inner)
|
|
{ /* Read with cache */
|
|
/*
|
|
TODO: the execution also gets here when we will not be using
|
|
join buffer. Review these cases and perhaps, remove this call.
|
|
(The final decision whether to use join buffer is made in
|
|
check_join_cache_usage, so we should only call make_scan_filter()
|
|
there, too).
|
|
*/
|
|
if (tab->make_scan_filter())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Push down conditions from all ON expressions.
|
|
Each of these conditions are guarded by a variable
|
|
that turns if off just before null complemented row for
|
|
outer joins is formed. Thus, the condition from an
|
|
'on expression' are guaranteed not to be checked for
|
|
the null complemented row.
|
|
*/
|
|
|
|
/*
|
|
First push down constant conditions from ON expressions.
|
|
- Each pushed-down condition is wrapped into trigger which is
|
|
enabled only for non-NULL-complemented record
|
|
- The condition is attached to the first_inner_table.
|
|
|
|
With regards to join nests:
|
|
- if we start at top level, don't walk into nests
|
|
- if we start inside a nest, stay within that nest.
|
|
*/
|
|
JOIN_TAB *start_from= tab->bush_root_tab?
|
|
tab->bush_root_tab->bush_children->start :
|
|
join->join_tab + join->const_tables;
|
|
JOIN_TAB *end_with= tab->bush_root_tab?
|
|
tab->bush_root_tab->bush_children->end :
|
|
join->join_tab + join->top_join_tab_count;
|
|
for (JOIN_TAB *join_tab= start_from;
|
|
join_tab != end_with;
|
|
join_tab++)
|
|
{
|
|
if (*join_tab->on_expr_ref)
|
|
{
|
|
JOIN_TAB *cond_tab= join_tab->first_inner;
|
|
COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref,
|
|
join->const_table_map,
|
|
(table_map) 0, -1, FALSE, FALSE);
|
|
if (!tmp_cond)
|
|
{
|
|
if (!thd->is_error())
|
|
continue;
|
|
DBUG_RETURN(1);
|
|
}
|
|
tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond,
|
|
&cond_tab->not_null_compl);
|
|
if (!tmp_cond)
|
|
DBUG_RETURN(1);
|
|
tmp_cond->quick_fix_field();
|
|
cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond :
|
|
new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond,
|
|
tmp_cond);
|
|
if (!cond_tab->select_cond)
|
|
DBUG_RETURN(1);
|
|
cond_tab->select_cond->quick_fix_field();
|
|
cond_tab->select_cond->update_used_tables();
|
|
if (cond_tab->select)
|
|
cond_tab->select->cond= cond_tab->select_cond;
|
|
}
|
|
}
|
|
|
|
|
|
/* Push down non-constant conditions from ON expressions */
|
|
JOIN_TAB *last_tab= tab;
|
|
|
|
/*
|
|
while we're inside of an outer join and last_tab is
|
|
the last of its tables ...
|
|
*/
|
|
while (first_inner_tab && first_inner_tab->last_inner == last_tab)
|
|
{
|
|
/*
|
|
Table tab is the last inner table of an outer join.
|
|
An on expression is always attached to it.
|
|
*/
|
|
COND *on_expr= *first_inner_tab->on_expr_ref;
|
|
|
|
table_map used_tables2= (join->const_table_map |
|
|
OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
|
|
|
|
start_from= tab->bush_root_tab?
|
|
tab->bush_root_tab->bush_children->start :
|
|
join->join_tab + join->const_tables;
|
|
for (JOIN_TAB *inner_tab= start_from;
|
|
inner_tab <= last_tab;
|
|
inner_tab++)
|
|
{
|
|
DBUG_ASSERT(inner_tab->table);
|
|
current_map= inner_tab->table->map;
|
|
used_tables2|= current_map;
|
|
/*
|
|
psergey: have put the -1 below. It's bad, will need to fix it.
|
|
*/
|
|
COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2,
|
|
current_map,
|
|
/*(inner_tab - first_tab)*/ -1,
|
|
FALSE, FALSE);
|
|
if (!tmp_cond && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
if (tab == last_tab)
|
|
{
|
|
/*
|
|
This pushes conjunctive conditions of ON expression of an outer
|
|
join such that:
|
|
- their used_tables() contain RAND_TABLE_BIT
|
|
- the conditions does not refer to any fields
|
|
(such like rand() > 0.5)
|
|
*/
|
|
table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
|
|
COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2,
|
|
rand_table_bit, -1,
|
|
FALSE, FALSE);
|
|
if (rand_cond)
|
|
add_cond_and_fix(thd, &tmp_cond, rand_cond);
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
bool is_sjm_lookup_tab= FALSE;
|
|
if (inner_tab->bush_children)
|
|
{
|
|
/*
|
|
'inner_tab' is an SJ-Materialization tab, i.e. we have a join
|
|
order like this:
|
|
|
|
ot1 sjm_tab LEFT JOIN ot2 ot3
|
|
^ ^
|
|
'tab'-+ +--- left join we're adding triggers for
|
|
|
|
LEFT JOIN's ON expression may not have references to subquery
|
|
columns. The subquery was in the WHERE clause, so IN-equality
|
|
is in the WHERE clause, also.
|
|
However, equality propagation code may have propagated the
|
|
IN-equality into ON expression, and we may get things like
|
|
|
|
subquery_inner_table=const
|
|
|
|
in the ON expression. We must not check such conditions during
|
|
SJM-lookup, because 1) subquery_inner_table has no valid current
|
|
row (materialization temp.table has it instead), and 2) they
|
|
would be true anyway.
|
|
*/
|
|
SJ_MATERIALIZATION_INFO *sjm=
|
|
inner_tab->bush_children->start->emb_sj_nest->sj_mat_info;
|
|
if (sjm->is_used && !sjm->is_sj_scan)
|
|
is_sjm_lookup_tab= TRUE;
|
|
}
|
|
|
|
if (inner_tab == first_inner_tab && inner_tab->on_precond &&
|
|
!is_sjm_lookup_tab)
|
|
add_cond_and_fix(thd, &tmp_cond, inner_tab->on_precond);
|
|
if (tmp_cond && !is_sjm_lookup_tab)
|
|
{
|
|
JOIN_TAB *cond_tab= (inner_tab < first_inner_tab ?
|
|
first_inner_tab : inner_tab);
|
|
Item **sel_cond_ref= (inner_tab < first_inner_tab ?
|
|
&first_inner_tab->on_precond :
|
|
&inner_tab->select_cond);
|
|
/*
|
|
First add the guards for match variables of
|
|
all embedding outer join operations.
|
|
*/
|
|
if (!(tmp_cond= add_found_match_trig_cond(thd,
|
|
cond_tab->first_inner,
|
|
tmp_cond,
|
|
first_inner_tab)))
|
|
DBUG_RETURN(1);
|
|
/*
|
|
Now add the guard turning the predicate off for
|
|
the null complemented row.
|
|
*/
|
|
DBUG_PRINT("info", ("Item_func_trig_cond"));
|
|
tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond,
|
|
&first_inner_tab->
|
|
not_null_compl);
|
|
DBUG_PRINT("info", ("Item_func_trig_cond %p",
|
|
tmp_cond));
|
|
if (tmp_cond)
|
|
tmp_cond->quick_fix_field();
|
|
/* Add the predicate to other pushed down predicates */
|
|
DBUG_PRINT("info", ("Item_cond_and"));
|
|
*sel_cond_ref= !(*sel_cond_ref) ?
|
|
tmp_cond :
|
|
new (thd->mem_root) Item_cond_and(thd, *sel_cond_ref, tmp_cond);
|
|
DBUG_PRINT("info", ("Item_cond_and %p",
|
|
(*sel_cond_ref)));
|
|
if (!(*sel_cond_ref))
|
|
DBUG_RETURN(1);
|
|
(*sel_cond_ref)->quick_fix_field();
|
|
(*sel_cond_ref)->update_used_tables();
|
|
if (cond_tab->select)
|
|
cond_tab->select->cond= cond_tab->select_cond;
|
|
}
|
|
}
|
|
first_inner_tab= first_inner_tab->first_upper;
|
|
}
|
|
if (!tab->bush_children)
|
|
i++;
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_attached_comp.end();
|
|
Json_writer_array trace_attached_summary(thd,
|
|
"attached_conditions_summary");
|
|
for (tab= first_depth_first_tab(join); tab;
|
|
tab= next_depth_first_tab(join, tab))
|
|
{
|
|
if (!tab->table)
|
|
continue;
|
|
Item *const cond = tab->select_cond;
|
|
Json_writer_object trace_one_table(thd);
|
|
trace_one_table.add_table_name(tab);
|
|
trace_one_table.add("attached_condition", cond);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
static
|
|
uint get_next_field_for_derived_key(uchar *arg)
|
|
{
|
|
KEYUSE *keyuse= *(KEYUSE **) arg;
|
|
if (!keyuse)
|
|
return (uint) (-1);
|
|
TABLE *table= keyuse->table;
|
|
uint key= keyuse->key;
|
|
uint fldno= keyuse->keypart;
|
|
uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
|
|
0 : (keyuse-1)->keypart+1;
|
|
for ( ;
|
|
keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
|
|
keyuse++)
|
|
keyuse->keypart= keypart;
|
|
if (keyuse->key != key)
|
|
keyuse= 0;
|
|
*((KEYUSE **) arg)= keyuse;
|
|
return fldno;
|
|
}
|
|
|
|
|
|
static
|
|
uint get_next_field_for_derived_key_simple(uchar *arg)
|
|
{
|
|
KEYUSE *keyuse= *(KEYUSE **) arg;
|
|
if (!keyuse)
|
|
return (uint) (-1);
|
|
TABLE *table= keyuse->table;
|
|
uint key= keyuse->key;
|
|
uint fldno= keyuse->keypart;
|
|
for ( ;
|
|
keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
|
|
keyuse++)
|
|
;
|
|
if (keyuse->key != key)
|
|
keyuse= 0;
|
|
*((KEYUSE **) arg)= keyuse;
|
|
return fldno;
|
|
}
|
|
|
|
static
|
|
bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
|
|
{
|
|
TABLE *table= keyuse->table;
|
|
if (table->alloc_keys(keys))
|
|
return TRUE;
|
|
uint key_count= 0;
|
|
KEYUSE *first_keyuse= keyuse;
|
|
uint prev_part= keyuse->keypart;
|
|
uint parts= 0;
|
|
uint i= 0;
|
|
|
|
for ( ; i < count && key_count < keys; )
|
|
{
|
|
do
|
|
{
|
|
keyuse->key= table->s->keys;
|
|
keyuse->keypart_map= (key_part_map) (1 << parts);
|
|
keyuse++;
|
|
i++;
|
|
}
|
|
while (i < count && keyuse->used_tables == first_keyuse->used_tables &&
|
|
keyuse->keypart == prev_part);
|
|
parts++;
|
|
if (i < count && keyuse->used_tables == first_keyuse->used_tables)
|
|
{
|
|
prev_part= keyuse->keypart;
|
|
}
|
|
else
|
|
{
|
|
KEYUSE *save_first_keyuse= first_keyuse;
|
|
if (table->check_tmp_key(table->s->keys, parts,
|
|
get_next_field_for_derived_key_simple,
|
|
(uchar *) &first_keyuse))
|
|
|
|
{
|
|
JOIN_TAB *tab;
|
|
first_keyuse= save_first_keyuse;
|
|
if (table->add_tmp_key(table->s->keys, parts,
|
|
get_next_field_for_derived_key,
|
|
(uchar *) &first_keyuse,
|
|
FALSE))
|
|
return TRUE;
|
|
table->reginfo.join_tab->keys.set_bit(table->s->keys - 1);
|
|
tab= table->reginfo.join_tab;
|
|
for (uint i=0; i < parts; i++)
|
|
tab->key_dependent|= save_first_keyuse[i].used_tables;
|
|
}
|
|
else
|
|
{
|
|
/* Mark keyuses for this key to be excluded */
|
|
for (KEYUSE *curr=save_first_keyuse; curr < keyuse; curr++)
|
|
{
|
|
curr->key= MAX_KEY;
|
|
}
|
|
}
|
|
first_keyuse= keyuse;
|
|
key_count++;
|
|
parts= 0;
|
|
prev_part= keyuse->keypart;
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static
|
|
bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
|
|
{
|
|
KEYUSE *keyuse, *end_keyuse;
|
|
size_t elements= keyuse_array->elements;
|
|
TABLE *prev_table= 0;
|
|
|
|
DBUG_ASSERT(elements > 0);
|
|
/* The last element is an end marker */
|
|
DBUG_ASSERT(dynamic_element(keyuse_array, elements-1,
|
|
KEYUSE*)[0].table == 0);
|
|
|
|
for (keyuse= dynamic_element(keyuse_array, 0, KEYUSE*),
|
|
end_keyuse= keyuse + elements - 1;
|
|
keyuse < end_keyuse;
|
|
keyuse++)
|
|
{
|
|
DBUG_ASSERT(keyuse->table);
|
|
|
|
KEYUSE *first_table_keyuse= NULL;
|
|
table_map last_used_tables= 0;
|
|
uint count= 0;
|
|
uint keys= 0;
|
|
TABLE_LIST *derived= NULL;
|
|
|
|
if (keyuse->table != prev_table)
|
|
derived= keyuse->table->pos_in_table_list;
|
|
|
|
if (!derived->is_materialized_derived())
|
|
continue;
|
|
|
|
for (;;)
|
|
{
|
|
if (keyuse->table != prev_table)
|
|
{
|
|
prev_table= keyuse->table;
|
|
while (keyuse->table == prev_table && keyuse->key != MAX_KEY)
|
|
keyuse++;
|
|
if (keyuse->table != prev_table)
|
|
{
|
|
keyuse--;
|
|
break;
|
|
}
|
|
first_table_keyuse= keyuse;
|
|
last_used_tables= keyuse->used_tables;
|
|
count= 0;
|
|
keys= 0;
|
|
}
|
|
else if (keyuse->used_tables != last_used_tables)
|
|
{
|
|
keys++;
|
|
last_used_tables= keyuse->used_tables;
|
|
}
|
|
count++;
|
|
keyuse++;
|
|
if (keyuse->table != prev_table)
|
|
{
|
|
if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
|
|
return TRUE;
|
|
keyuse--;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Drops unused keys for each materialized derived table/view
|
|
|
|
@details
|
|
For materialized derived tables only ref access can be used, it employs
|
|
only one index, thus we don't need the rest. For each materialized derived
|
|
table/view call TABLE::use_index to save one index chosen by the optimizer
|
|
and free others. No key is chosen then all keys will be dropped.
|
|
*/
|
|
|
|
void JOIN::drop_unused_derived_keys()
|
|
{
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
|
|
TABLE *tmp_tbl= tab->table;
|
|
/*
|
|
Skip placeholders and already created tables (we cannot change keys
|
|
for created tables)
|
|
*/
|
|
if (!tmp_tbl || tmp_tbl->is_created())
|
|
continue;
|
|
if (!tmp_tbl->pos_in_table_list->is_materialized_derived())
|
|
continue;
|
|
|
|
/*
|
|
tmp_tbl->max_keys is the number of keys pre-allocated in
|
|
TABLE::alloc_keys(). Can be 0 if alloc_keys() was not called.
|
|
|
|
tmp_tbl->s->keys is number of keys defined for the table.
|
|
Normally 0 or 1 (= unique key)
|
|
*/
|
|
|
|
if (likely(tmp_tbl->s->keys) && tab->ref.key >= 0 &&
|
|
!tab->is_ref_for_hash_join())
|
|
{
|
|
if (tmp_tbl->s->keys > 1)
|
|
{
|
|
/* remove all keys except the chosen one and unique keys */
|
|
tmp_tbl->use_index(tab->ref.key, &tab->keys);
|
|
}
|
|
/*
|
|
We dropped all keys except the chosen one and unique keys.
|
|
The choosen one is stored as the first key (number 0).
|
|
*/
|
|
tab->ref.key= 0;
|
|
}
|
|
else if (tmp_tbl->s->keys)
|
|
{
|
|
/* The query cannot use keys, remove all non unique keys */
|
|
tmp_tbl->use_index(-1, &tab->keys);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Evaluate the bitmap of used tables for items from the select list
|
|
*/
|
|
|
|
inline void JOIN::eval_select_list_used_tables()
|
|
{
|
|
select_list_used_tables= 0;
|
|
Item *item;
|
|
List_iterator_fast<Item> it(fields_list);
|
|
while ((item= it++))
|
|
{
|
|
select_list_used_tables|= item->used_tables();
|
|
}
|
|
Item_outer_ref *ref;
|
|
List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list);
|
|
while ((ref= ref_it++))
|
|
{
|
|
item= ref->outer_ref;
|
|
select_list_used_tables|= item->used_tables();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Determine {after which table we'll produce ordered set}
|
|
|
|
SYNOPSIS
|
|
make_join_orderinfo()
|
|
join
|
|
|
|
|
|
DESCRIPTION
|
|
Determine if the set is already ordered for ORDER BY, so it can
|
|
disable join cache because it will change the ordering of the results.
|
|
Code handles sort table that is at any location (not only first after
|
|
the const tables) despite the fact that it's currently prohibited.
|
|
We must disable join cache if the first non-const table alone is
|
|
ordered. If there is a temp table the ordering is done as a last
|
|
operation and doesn't prevent join cache usage.
|
|
|
|
RETURN
|
|
Number of table after which the set will be ordered
|
|
join->tables if we don't need an ordered set
|
|
*/
|
|
|
|
static uint make_join_orderinfo(JOIN *join)
|
|
{
|
|
/*
|
|
This function needs to be fixed to take into account that we now have SJM
|
|
nests.
|
|
*/
|
|
DBUG_ASSERT(0);
|
|
|
|
JOIN_TAB *tab;
|
|
if (join->need_tmp)
|
|
return join->table_count;
|
|
tab= join->get_sort_by_join_tab();
|
|
return tab ? (uint)(tab-join->join_tab) : join->table_count;
|
|
}
|
|
|
|
/*
|
|
Deny usage of join buffer for the specified table
|
|
|
|
SYNOPSIS
|
|
set_join_cache_denial()
|
|
tab join table for which join buffer usage is to be denied
|
|
|
|
DESCRIPTION
|
|
The function denies usage of join buffer when joining the table 'tab'.
|
|
The table is marked as not employing any join buffer. If a join cache
|
|
object has been already allocated for the table this object is destroyed.
|
|
|
|
RETURN
|
|
none
|
|
*/
|
|
|
|
static
|
|
void set_join_cache_denial(JOIN_TAB *join_tab)
|
|
{
|
|
if (join_tab->cache)
|
|
{
|
|
/*
|
|
If there is a previous cache linked to this cache through the
|
|
next_cache pointer: remove the link.
|
|
*/
|
|
if (join_tab->cache->prev_cache)
|
|
join_tab->cache->prev_cache->next_cache= 0;
|
|
/*
|
|
Same for the next_cache
|
|
*/
|
|
if (join_tab->cache->next_cache)
|
|
join_tab->cache->next_cache->prev_cache= 0;
|
|
|
|
join_tab->cache->free();
|
|
join_tab->cache= 0;
|
|
}
|
|
if (join_tab->use_join_cache)
|
|
{
|
|
join_tab->use_join_cache= FALSE;
|
|
join_tab->used_join_cache_level= 0;
|
|
/*
|
|
It could be only sub_select(). It could not be sub_seject_sjm because we
|
|
don't do join buffering for the first table in sjm nest.
|
|
*/
|
|
join_tab[-1].next_select= sub_select;
|
|
join_tab[-1].cached_pfs_batch_update= join_tab[-1].pfs_batch_update();
|
|
if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
|
|
{
|
|
join_tab->type= JT_ALL;
|
|
join_tab->ref.key_parts= 0;
|
|
}
|
|
join_tab->join->return_tab= join_tab;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
The default implementation of unlock-row method of READ_RECORD,
|
|
used in all access methods.
|
|
*/
|
|
|
|
void rr_unlock_row(st_join_table *tab)
|
|
{
|
|
READ_RECORD *info= &tab->read_record;
|
|
info->table->file->unlock_row();
|
|
}
|
|
|
|
|
|
/**
|
|
Pick the appropriate access method functions
|
|
|
|
Sets the functions for the selected table access method
|
|
|
|
@param tab Table reference to put access method
|
|
*/
|
|
|
|
static void
|
|
pick_table_access_method(JOIN_TAB *tab)
|
|
{
|
|
switch (tab->type)
|
|
{
|
|
case JT_REF:
|
|
tab->read_first_record= join_read_always_key;
|
|
tab->read_record.read_record_func= join_read_next_same;
|
|
break;
|
|
|
|
case JT_REF_OR_NULL:
|
|
tab->read_first_record= join_read_always_key_or_null;
|
|
tab->read_record.read_record_func= join_read_next_same_or_null;
|
|
break;
|
|
|
|
case JT_CONST:
|
|
tab->read_first_record= join_read_const;
|
|
tab->read_record.read_record_func= join_no_more_records;
|
|
break;
|
|
|
|
case JT_EQ_REF:
|
|
tab->read_first_record= join_read_key;
|
|
tab->read_record.read_record_func= join_no_more_records;
|
|
break;
|
|
|
|
case JT_FT:
|
|
tab->read_first_record= join_ft_read_first;
|
|
tab->read_record.read_record_func= join_ft_read_next;
|
|
break;
|
|
|
|
case JT_SYSTEM:
|
|
tab->read_first_record= join_read_system;
|
|
tab->read_record.read_record_func= join_no_more_records;
|
|
break;
|
|
|
|
/* keep gcc happy */
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Revise usage of join buffer for the specified table and the whole nest
|
|
|
|
SYNOPSIS
|
|
revise_cache_usage()
|
|
tab join table for which join buffer usage is to be revised
|
|
|
|
DESCRIPTION
|
|
The function revise the decision to use a join buffer for the table 'tab'.
|
|
If this table happened to be among the inner tables of a nested outer join/
|
|
semi-join the functions denies usage of join buffers for all of them
|
|
|
|
RETURN
|
|
none
|
|
*/
|
|
|
|
static
|
|
void revise_cache_usage(JOIN_TAB *join_tab)
|
|
{
|
|
JOIN_TAB *tab;
|
|
JOIN_TAB *first_inner;
|
|
|
|
if (join_tab->first_inner)
|
|
{
|
|
JOIN_TAB *end_tab= join_tab;
|
|
for (first_inner= join_tab->first_inner;
|
|
first_inner;
|
|
first_inner= first_inner->first_upper)
|
|
{
|
|
for (tab= end_tab; tab >= first_inner; tab--)
|
|
set_join_cache_denial(tab);
|
|
end_tab= first_inner;
|
|
}
|
|
}
|
|
else if (join_tab->first_sj_inner_tab)
|
|
{
|
|
first_inner= join_tab->first_sj_inner_tab;
|
|
for (tab= join_tab; tab >= first_inner; tab--)
|
|
{
|
|
set_join_cache_denial(tab);
|
|
}
|
|
}
|
|
else set_join_cache_denial(join_tab);
|
|
}
|
|
|
|
|
|
/*
|
|
end_select-compatible function that writes the record into a sjm temptable
|
|
|
|
SYNOPSIS
|
|
end_sj_materialize()
|
|
join The join
|
|
join_tab Points to right after the last join_tab in materialization bush
|
|
end_of_records FALSE <=> This call is made to pass another record
|
|
combination
|
|
TRUE <=> EOF (no action)
|
|
|
|
DESCRIPTION
|
|
This function is used by semi-join materialization to capture suquery's
|
|
resultset and write it into the temptable (that is, materialize it).
|
|
|
|
NOTE
|
|
This function is used only for semi-join materialization. Non-semijoin
|
|
materialization uses different mechanism.
|
|
|
|
RETURN
|
|
NESTED_LOOP_OK
|
|
NESTED_LOOP_ERROR
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
int error;
|
|
THD *thd= join->thd;
|
|
SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
|
|
DBUG_ENTER("end_sj_materialize");
|
|
if (!end_of_records)
|
|
{
|
|
TABLE *table= sjm->table;
|
|
|
|
List_iterator<Item> it(sjm->sjm_table_cols);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
if (item->is_null())
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
fill_record(thd, table, table->field, sjm->sjm_table_cols, true, false,
|
|
true);
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
|
|
{
|
|
/* create_myisam_from_heap will generate error if needed */
|
|
if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
|
|
create_internal_tmp_table_from_heap(thd, table,
|
|
sjm->sjm_table_param.start_recinfo,
|
|
&sjm->sjm_table_param.recinfo, error, 1, NULL))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
Check whether a join buffer can be used to join the specified table
|
|
|
|
SYNOPSIS
|
|
check_join_cache_usage()
|
|
tab joined table to check join buffer usage for
|
|
options options of the join
|
|
no_jbuf_after don't use join buffering after table with this number
|
|
prev_tab previous join table
|
|
|
|
DESCRIPTION
|
|
The function finds out whether the table 'tab' can be joined using a join
|
|
buffer. This check is performed after the best execution plan for 'join'
|
|
has been chosen. If the function decides that a join buffer can be employed
|
|
then it selects the most appropriate join cache object that contains this
|
|
join buffer.
|
|
The result of the check and the type of the the join buffer to be used
|
|
depend on:
|
|
- the access method to access rows of the joined table
|
|
- whether the join table is an inner table of an outer join or semi-join
|
|
- whether the optimizer switches
|
|
outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
|
|
join_cache_hashed, join_cache_bka,
|
|
are set on or off
|
|
- the join cache level set for the query
|
|
- the join 'options'.
|
|
|
|
In any case join buffer is not used if the number of the joined table is
|
|
greater than 'no_jbuf_after'. It's also never used if the value of
|
|
join_cache_level is equal to 0.
|
|
If the optimizer switch outer_join_with_cache is off no join buffer is
|
|
used for outer join operations.
|
|
If the optimizer switch semijoin_with_cache is off no join buffer is used
|
|
for semi-join operations.
|
|
If the optimizer switch join_cache_incremental is off no incremental join
|
|
buffers are used.
|
|
If the optimizer switch join_cache_hashed is off then the optimizer uses
|
|
neither BNLH algorithm, nor BKAH algorithm to perform join operations.
|
|
|
|
If the optimizer switch join_cache_bka is off then the optimizer uses
|
|
neither BKA algorithm, nor BKAH algorithm to perform join operation.
|
|
The valid settings for join_cache_level lay in the interval 0..8.
|
|
If it set to 0 no join buffers are used to perform join operations.
|
|
Currently we differentiate between join caches of 8 levels:
|
|
1 : non-incremental join cache used for BNL join algorithm
|
|
2 : incremental join cache used for BNL join algorithm
|
|
3 : non-incremental join cache used for BNLH join algorithm
|
|
4 : incremental join cache used for BNLH join algorithm
|
|
5 : non-incremental join cache used for BKA join algorithm
|
|
6 : incremental join cache used for BKA join algorithm
|
|
7 : non-incremental join cache used for BKAH join algorithm
|
|
8 : incremental join cache used for BKAH join algorithm
|
|
If the value of join_cache_level is set to n then no join caches of
|
|
levels higher than n can be employed.
|
|
|
|
If the optimizer switches outer_join_with_cache, semijoin_with_cache,
|
|
join_cache_incremental, join_cache_hashed, join_cache_bka are all on
|
|
the following rules are applied.
|
|
If join_cache_level==1|2 then join buffer is used for inner joins, outer
|
|
joins and semi-joins with 'JT_ALL' access method. In this case a
|
|
JOIN_CACHE_BNL object is employed.
|
|
If join_cache_level==3|4 and then join buffer is used for a join operation
|
|
(inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
|
|
then a JOIN_CACHE_BNLH object is employed.
|
|
If an index is used to access rows of the joined table and the value of
|
|
join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed.
|
|
If an index is used to access rows of the joined table and the value of
|
|
join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed.
|
|
If the value of join_cache_level is odd then creation of a non-linked
|
|
join cache is forced.
|
|
|
|
Currently for any join operation a join cache of the level of the
|
|
highest allowed and applicable level is used.
|
|
For example, if join_cache_level is set to 6 and the optimizer switch
|
|
join_cache_bka is off, while the optimizer switch join_cache_hashed is
|
|
on then for any inner join operation with JT_REF/JT_EQREF access method
|
|
to the joined table the BNLH join algorithm will be used, while for
|
|
the table accessed by the JT_ALL methods the BNL algorithm will be used.
|
|
|
|
If the function decides that a join buffer can be used to join the table
|
|
'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
|
|
the selected join cache object to the field 'cache' of the previous
|
|
join table.
|
|
If the function creates a join cache object it tries to initialize it. The
|
|
failure to do this results in an invocation of the function that destructs
|
|
the created object.
|
|
If the function decides that but some reasons no join buffer can be used
|
|
for a table it calls the function revise_cache_usage that checks
|
|
whether join cache should be denied for some previous tables. In this case
|
|
a pointer to the first table for which join cache usage has been denied
|
|
is passed in join->return_val (see the function set_join_cache_denial).
|
|
|
|
The functions changes the value the fields tab->icp_other_tables_ok and
|
|
tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm
|
|
requires it.
|
|
|
|
NOTES
|
|
An inner table of a nested outer join or a nested semi-join can be currently
|
|
joined only when a linked cache object is employed. In these cases setting
|
|
join_cache_incremental to 'off' results in denial of usage of any join
|
|
buffer when joining the table.
|
|
For a nested outer join/semi-join, currently, we either use join buffers for
|
|
all inner tables or for none of them.
|
|
Some engines (e.g. Falcon) currently allow to use only a join cache
|
|
of the type JOIN_CACHE_BKAH when the joined table is accessed through
|
|
an index. For these engines setting the value of join_cache_level to 5 or 6
|
|
results in that no join buffer is used to join the table.
|
|
|
|
RETURN VALUE
|
|
cache level if cache is used, otherwise returns 0
|
|
|
|
TODO
|
|
Support BKA inside SJ-Materialization nests. When doing this, we'll need
|
|
to only store sj-inner tables in the join buffer.
|
|
#if 0
|
|
JOIN_TAB *first_tab= join->join_tab+join->const_tables;
|
|
uint n_tables= i-join->const_tables;
|
|
/ *
|
|
We normally put all preceding tables into the join buffer, except
|
|
for the constant tables.
|
|
If we're inside a semi-join materialization nest, e.g.
|
|
|
|
outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ...
|
|
^-- we're here
|
|
|
|
then we need to put into the join buffer only the tables from
|
|
within the nest.
|
|
* /
|
|
if (i >= first_sjm_table && i < last_sjm_table)
|
|
{
|
|
n_tables= i - first_sjm_table; // will be >0 if we got here
|
|
first_tab= join->join_tab + first_sjm_table;
|
|
}
|
|
#endif
|
|
*/
|
|
|
|
static
|
|
uint check_join_cache_usage(JOIN_TAB *tab,
|
|
ulonglong options,
|
|
uint no_jbuf_after,
|
|
uint table_index,
|
|
JOIN_TAB *prev_tab)
|
|
{
|
|
uint flags= 0;
|
|
ha_rows rows= 0;
|
|
uint bufsz= 4096;
|
|
JOIN_CACHE *prev_cache=0;
|
|
JOIN *join= tab->join;
|
|
MEM_ROOT *root= join->thd->mem_root;
|
|
uint cache_level= tab->used_join_cache_level;
|
|
bool force_unlinked_cache=
|
|
!(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT);
|
|
bool no_hashed_cache=
|
|
!(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT);
|
|
bool no_bka_cache=
|
|
!(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT);
|
|
|
|
join->return_tab= 0;
|
|
|
|
if (tab->no_forced_join_cache)
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
Don't use join cache if @@join_cache_level==0 or this table is the first
|
|
one join suborder (either at top level or inside a bush)
|
|
*/
|
|
if (cache_level == 0 || !prev_tab)
|
|
return 0;
|
|
|
|
if (force_unlinked_cache && (cache_level%2 == 0))
|
|
cache_level--;
|
|
|
|
if (options & SELECT_NO_JOIN_CACHE)
|
|
goto no_join_cache;
|
|
|
|
if (tab->use_quick == 2)
|
|
goto no_join_cache;
|
|
|
|
if (tab->table->map & join->complex_firstmatch_tables)
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
Don't use join cache if we're inside a join tab range covered by LooseScan
|
|
strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it
|
|
should be possible to use join buffering in the same way we're using it for
|
|
multi-table firstmatch ranges).
|
|
*/
|
|
if (tab->inside_loosescan_range)
|
|
goto no_join_cache;
|
|
|
|
if (tab->is_inner_table_of_semijoin() &&
|
|
!join->allowed_semijoin_with_cache)
|
|
goto no_join_cache;
|
|
if (tab->is_inner_table_of_outer_join() &&
|
|
!join->allowed_outer_join_with_cache)
|
|
goto no_join_cache;
|
|
|
|
if (tab->table->pos_in_table_list->table_function &&
|
|
!tab->table->pos_in_table_list->table_function->join_cache_allowed())
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
Non-linked join buffers can't guarantee one match
|
|
*/
|
|
if (tab->is_nested_inner())
|
|
{
|
|
if (force_unlinked_cache || cache_level == 1)
|
|
goto no_join_cache;
|
|
if (cache_level & 1)
|
|
cache_level--;
|
|
}
|
|
|
|
/*
|
|
Don't use BKA for materialized tables. We could actually have a
|
|
meaningful use of BKA when linked join buffers are used.
|
|
|
|
The problem is, the temp.table is not filled (actually not even opened
|
|
properly) yet, and this doesn't let us call
|
|
handler->multi_range_read_info(). It is possible to come up with
|
|
estimates, etc. without acessing the table, but it seems not to worth the
|
|
effort now.
|
|
*/
|
|
if (tab->table->pos_in_table_list->is_materialized_derived())
|
|
{
|
|
no_bka_cache= true;
|
|
/*
|
|
Don't use hash join algorithm if the temporary table for the rows
|
|
of the derived table will be created with an equi-join key.
|
|
*/
|
|
if (tab->table->s->keys)
|
|
no_hashed_cache= true;
|
|
}
|
|
|
|
/*
|
|
Don't use join buffering if we're dictated not to by no_jbuf_after
|
|
(This is not meaningfully used currently)
|
|
*/
|
|
if (table_index > no_jbuf_after)
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
TODO: BNL join buffer should be perfectly ok with tab->bush_children.
|
|
*/
|
|
if (tab->loosescan_match_tab || tab->bush_children)
|
|
goto no_join_cache;
|
|
|
|
for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
|
|
first_inner= first_inner->first_upper)
|
|
{
|
|
if (first_inner != tab &&
|
|
(!first_inner->use_join_cache || !(tab-1)->use_join_cache))
|
|
goto no_join_cache;
|
|
}
|
|
if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
|
|
(!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache))
|
|
goto no_join_cache;
|
|
if (!prev_tab->use_join_cache)
|
|
{
|
|
/*
|
|
Check whether table tab and the previous one belong to the same nest of
|
|
inner tables and if so do not use join buffer when joining table tab.
|
|
*/
|
|
if (tab->first_inner && tab != tab->first_inner)
|
|
{
|
|
for (JOIN_TAB *first_inner= tab[-1].first_inner;
|
|
first_inner;
|
|
first_inner= first_inner->first_upper)
|
|
{
|
|
if (first_inner == tab->first_inner)
|
|
goto no_join_cache;
|
|
}
|
|
}
|
|
else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab &&
|
|
tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
|
|
goto no_join_cache;
|
|
}
|
|
|
|
prev_cache= prev_tab->cache;
|
|
|
|
switch (tab->type) {
|
|
case JT_NEXT:
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
if (cache_level == 1)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BNL(join, tab, prev_cache)))
|
|
{
|
|
tab->icp_other_tables_ok= FALSE;
|
|
/* If make_join_select() hasn't called make_scan_filter(), do it now */
|
|
if (!tab->cache_select && tab->make_scan_filter())
|
|
goto no_join_cache;
|
|
return (2 - MY_TEST(!prev_cache));
|
|
}
|
|
goto no_join_cache;
|
|
case JT_SYSTEM:
|
|
case JT_CONST:
|
|
case JT_REF:
|
|
case JT_EQ_REF:
|
|
if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
|
|
goto no_join_cache;
|
|
if (tab->ref.is_access_triggered())
|
|
goto no_join_cache;
|
|
|
|
if (!tab->is_ref_for_hash_join() && !no_bka_cache)
|
|
{
|
|
Cost_estimate cost;
|
|
cost.reset();
|
|
flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
|
|
if (tab->table->covering_keys.is_set(tab->ref.key))
|
|
flags|= HA_MRR_INDEX_ONLY;
|
|
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
|
|
tab->ref.key_parts,
|
|
&bufsz, &flags, &cost);
|
|
}
|
|
|
|
if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
|
|
tab->is_ref_for_hash_join() ||
|
|
((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
|
|
{
|
|
if (!tab->hash_join_is_possible() ||
|
|
tab->make_scan_filter())
|
|
goto no_join_cache;
|
|
if (cache_level == 3)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BNLH(join, tab, prev_cache)))
|
|
{
|
|
tab->icp_other_tables_ok= FALSE;
|
|
return (4 - MY_TEST(!prev_cache));
|
|
}
|
|
goto no_join_cache;
|
|
}
|
|
if (cache_level > 4 && no_bka_cache)
|
|
goto no_join_cache;
|
|
|
|
if ((flags & HA_MRR_NO_ASSOCIATION) &&
|
|
(cache_level <= 6 || no_hashed_cache))
|
|
goto no_join_cache;
|
|
|
|
if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
|
|
{
|
|
if (cache_level <= 6 || no_hashed_cache)
|
|
{
|
|
if (cache_level == 5)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BKA(join, tab, flags, prev_cache)))
|
|
return (6 - MY_TEST(!prev_cache));
|
|
goto no_join_cache;
|
|
}
|
|
else
|
|
{
|
|
if (cache_level == 7)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BKAH(join, tab, flags, prev_cache)))
|
|
{
|
|
tab->idx_cond_fact_out= FALSE;
|
|
return (8 - MY_TEST(!prev_cache));
|
|
}
|
|
goto no_join_cache;
|
|
}
|
|
}
|
|
goto no_join_cache;
|
|
default : ;
|
|
}
|
|
|
|
no_join_cache:
|
|
if (tab->type != JT_ALL && tab->type != JT_RANGE && tab->is_ref_for_hash_join())
|
|
{
|
|
tab->type= JT_ALL;
|
|
tab->ref.key_parts= 0;
|
|
}
|
|
revise_cache_usage(tab);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Check whether join buffers can be used to join tables of a join
|
|
|
|
SYNOPSIS
|
|
check_join_cache_usage()
|
|
join join whose tables are to be checked
|
|
options options of the join
|
|
no_jbuf_after don't use join buffering after table with this number
|
|
(The tables are assumed to be numbered in
|
|
first_linear_tab(join, WITHOUT_CONST_TABLES),
|
|
next_linear_tab(join, WITH_CONST_TABLES) order).
|
|
|
|
DESCRIPTION
|
|
For each table after the first non-constant table the function checks
|
|
whether the table can be joined using a join buffer. If the function decides
|
|
that a join buffer can be employed then it selects the most appropriate join
|
|
cache object that contains this join buffer whose level is not greater
|
|
than join_cache_level set for the join. To make this check the function
|
|
calls the function check_join_cache_usage for every non-constant table.
|
|
|
|
NOTES
|
|
In some situations (e.g. for nested outer joins, for nested semi-joins) only
|
|
incremental buffers can be used. If it turns out that for some inner table
|
|
no join buffer can be used then any inner table of an outer/semi-join nest
|
|
cannot use join buffer. In the case when already chosen buffer must be
|
|
denied for a table the function recalls check_join_cache_usage()
|
|
starting from this table. The pointer to the table from which the check
|
|
has to be restarted is returned in join->return_val (see the description
|
|
of check_join_cache_usage).
|
|
*/
|
|
|
|
void check_join_cache_usage_for_tables(JOIN *join, ulonglong options,
|
|
uint no_jbuf_after)
|
|
{
|
|
JOIN_TAB *tab;
|
|
JOIN_TAB *prev_tab;
|
|
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->used_join_cache_level= join->max_allowed_join_cache_level;
|
|
}
|
|
|
|
uint idx= join->const_tables;
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
restart:
|
|
tab->icp_other_tables_ok= TRUE;
|
|
tab->idx_cond_fact_out= TRUE;
|
|
|
|
/*
|
|
Check if we have a preceding join_tab, as something that will feed us
|
|
records that we could buffer. We don't have it, if
|
|
- this is the first non-const table in the join order,
|
|
- this is the first table inside an SJM nest.
|
|
*/
|
|
prev_tab= tab - 1;
|
|
if (tab == join->join_tab + join->const_tables ||
|
|
(tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab))
|
|
prev_tab= NULL;
|
|
|
|
switch (tab->type) {
|
|
case JT_SYSTEM:
|
|
case JT_CONST:
|
|
case JT_EQ_REF:
|
|
case JT_REF:
|
|
case JT_REF_OR_NULL:
|
|
case JT_NEXT:
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
tab->used_join_cache_level= check_join_cache_usage(tab, options,
|
|
no_jbuf_after,
|
|
idx,
|
|
prev_tab);
|
|
tab->use_join_cache= MY_TEST(tab->used_join_cache_level);
|
|
/*
|
|
psergey-merge: todo: raise the question that this is really stupid that
|
|
we can first allocate a join buffer, then decide not to use it and free
|
|
it.
|
|
*/
|
|
if (join->return_tab)
|
|
{
|
|
tab= join->return_tab;
|
|
goto restart;
|
|
}
|
|
break;
|
|
default:
|
|
tab->used_join_cache_level= 0;
|
|
}
|
|
if (!tab->bush_children)
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Remove pushdown conditions that are already checked by the scan phase
|
|
of BNL/BNLH joins.
|
|
|
|
@note
|
|
If the single-table condition for this table will be used by a
|
|
blocked join to pre-filter this table's rows, there is no need
|
|
to re-check the same single-table condition for each joined record.
|
|
|
|
This method removes from JOIN_TAB::select_cond and JOIN_TAB::select::cond
|
|
all top-level conjuncts that also appear in in JOIN_TAB::cache_select::cond.
|
|
*/
|
|
|
|
void JOIN_TAB::remove_redundant_bnl_scan_conds()
|
|
{
|
|
if (!(select_cond && cache_select && cache &&
|
|
(cache->get_join_alg() == JOIN_CACHE::BNL_JOIN_ALG ||
|
|
cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)))
|
|
return;
|
|
|
|
/*
|
|
select->cond is not processed separately. This method assumes it is always
|
|
the same as select_cond.
|
|
*/
|
|
if (select && select->cond != select_cond)
|
|
return;
|
|
|
|
if (is_cond_and(select_cond))
|
|
{
|
|
List_iterator<Item> pushed_cond_li(*((Item_cond*) select_cond)->argument_list());
|
|
Item *pushed_item;
|
|
Item_cond_and *reduced_select_cond= new (join->thd->mem_root)
|
|
Item_cond_and(join->thd);
|
|
|
|
if (is_cond_and(cache_select->cond))
|
|
{
|
|
List_iterator<Item> scan_cond_li(*((Item_cond*) cache_select->cond)->argument_list());
|
|
Item *scan_item;
|
|
while ((pushed_item= pushed_cond_li++))
|
|
{
|
|
bool found_cond= false;
|
|
scan_cond_li.rewind();
|
|
while ((scan_item= scan_cond_li++))
|
|
{
|
|
if (pushed_item->eq(scan_item, 0))
|
|
{
|
|
found_cond= true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found_cond)
|
|
reduced_select_cond->add(pushed_item, join->thd->mem_root);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while ((pushed_item= pushed_cond_li++))
|
|
{
|
|
if (!pushed_item->eq(cache_select->cond, 0))
|
|
reduced_select_cond->add(pushed_item, join->thd->mem_root);
|
|
}
|
|
}
|
|
|
|
/*
|
|
JOIN_CACHE::check_match uses JOIN_TAB::select->cond instead of
|
|
JOIN_TAB::select_cond. set_cond() sets both pointers.
|
|
*/
|
|
if (reduced_select_cond->argument_list()->is_empty())
|
|
set_cond(NULL);
|
|
else if (reduced_select_cond->argument_list()->elements == 1)
|
|
set_cond(reduced_select_cond->argument_list()->head());
|
|
else
|
|
{
|
|
reduced_select_cond->quick_fix_field();
|
|
set_cond(reduced_select_cond);
|
|
}
|
|
}
|
|
else if (select_cond->eq(cache_select->cond, 0))
|
|
set_cond(NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
Plan refinement stage: do various setup things for the executor
|
|
|
|
SYNOPSIS
|
|
make_join_readinfo()
|
|
join Join being processed
|
|
options Join's options (checking for SELECT_DESCRIBE,
|
|
SELECT_NO_JOIN_CACHE)
|
|
no_jbuf_after Don't use join buffering after table with this number.
|
|
|
|
DESCRIPTION
|
|
Plan refinement stage: do various set ups for the executioner
|
|
- set up use of join buffering
|
|
- push index conditions
|
|
- increment relevant counters
|
|
- etc
|
|
|
|
RETURN
|
|
FALSE - OK
|
|
TRUE - Out of memory
|
|
*/
|
|
|
|
static bool
|
|
make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
|
|
{
|
|
JOIN_TAB *tab;
|
|
uint i;
|
|
DBUG_ENTER("make_join_readinfo");
|
|
|
|
Json_writer_object trace_wrapper(join->thd);
|
|
Json_writer_array trace_arr(join->thd, "make_join_readinfo");
|
|
|
|
bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE));
|
|
bool sorted= 1;
|
|
|
|
join->complex_firstmatch_tables= table_map(0);
|
|
|
|
if (!join->select_lex->sj_nests.is_empty() &&
|
|
setup_semijoin_dups_elimination(join, options, no_jbuf_after))
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
/* For const tables, set partial_join_cardinality to 1. */
|
|
for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++)
|
|
tab->partial_join_cardinality= 1;
|
|
|
|
JOIN_TAB *prev_tab= NULL;
|
|
i= join->const_tables;
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
/*
|
|
The approximation below for partial join cardinality is not good because
|
|
- it does not take into account some pushdown predicates
|
|
- it does not differentiate between inner joins, outer joins and
|
|
semi-joins.
|
|
Later it should be improved.
|
|
*/
|
|
|
|
if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)
|
|
prev_tab= NULL;
|
|
DBUG_ASSERT(tab->bush_children ||
|
|
tab->table == join->best_positions[i].table->table);
|
|
|
|
tab->partial_join_cardinality= join->best_positions[i].records_read *
|
|
(prev_tab ?
|
|
prev_tab->partial_join_cardinality : 1);
|
|
if (!tab->bush_children)
|
|
i++;
|
|
}
|
|
|
|
check_join_cache_usage_for_tables(join, options, no_jbuf_after);
|
|
|
|
JOIN_TAB *first_tab;
|
|
for (tab= first_tab= first_linear_tab(join,
|
|
WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->bush_children)
|
|
{
|
|
if (setup_sj_materialization_part2(tab))
|
|
return TRUE;
|
|
}
|
|
|
|
TABLE *table=tab->table;
|
|
uint jcl= tab->used_join_cache_level;
|
|
tab->read_record.table= table;
|
|
tab->read_record.unlock_row= rr_unlock_row;
|
|
tab->read_record.print_error= true;
|
|
tab->sorted= sorted;
|
|
sorted= 0; // only first must be sorted
|
|
|
|
|
|
/*
|
|
We should not set tab->next_select for the last table in the
|
|
SMJ-nest, as setup_sj_materialization() has already set it to
|
|
end_sj_materialize.
|
|
*/
|
|
if (!(tab->bush_root_tab &&
|
|
tab->bush_root_tab->bush_children->end == tab + 1))
|
|
tab->next_select= sub_select; /* normal select */
|
|
|
|
if (tab->loosescan_match_tab)
|
|
{
|
|
if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
|
|
loosescan_key_len)))
|
|
return TRUE; /* purecov: inspected */
|
|
tab->sorted= TRUE;
|
|
}
|
|
table->status=STATUS_NO_RECORD;
|
|
pick_table_access_method (tab);
|
|
|
|
if (jcl)
|
|
tab[-1].next_select=sub_select_cache;
|
|
|
|
if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)
|
|
tab->type= JT_HASH;
|
|
|
|
switch (tab->type) {
|
|
case JT_SYSTEM: // Only happens with left join
|
|
case JT_CONST: // Only happens with left join
|
|
/* Only happens with outer joins */
|
|
tab->read_first_record= tab->type == JT_SYSTEM ? join_read_system
|
|
: join_read_const;
|
|
tab->read_record.unlock_row= join_const_unlock_row;
|
|
if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) &&
|
|
(!jcl || jcl > 4) && !tab->ref.is_access_triggered())
|
|
push_index_cond(tab, tab->ref.key);
|
|
break;
|
|
case JT_EQ_REF:
|
|
tab->read_record.unlock_row= join_read_key_unlock_row;
|
|
/* fall through */
|
|
if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) &&
|
|
(!jcl || jcl > 4) && !tab->ref.is_access_triggered())
|
|
push_index_cond(tab, tab->ref.key);
|
|
break;
|
|
case JT_REF_OR_NULL:
|
|
case JT_REF:
|
|
if (tab->select)
|
|
{
|
|
delete tab->select->quick;
|
|
tab->select->quick=0;
|
|
}
|
|
delete tab->quick;
|
|
tab->quick=0;
|
|
if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) &&
|
|
(!jcl || jcl > 4) && !tab->ref.is_access_triggered())
|
|
push_index_cond(tab, tab->ref.key);
|
|
break;
|
|
case JT_NEXT: // Index scan
|
|
DBUG_ASSERT(!tab->quick);
|
|
if (tab->select)
|
|
{
|
|
/*
|
|
select->quick may be set if there was a possible range and
|
|
it had a higher cost than a table scan.
|
|
*/
|
|
delete tab->select->quick;
|
|
tab->select->quick=0;
|
|
}
|
|
if (tab->use_quick == 2)
|
|
{
|
|
join->thd->set_status_no_good_index_used();
|
|
tab->read_first_record= join_init_quick_read_record;
|
|
if (statistics)
|
|
join->thd->inc_status_select_range_check();
|
|
}
|
|
else
|
|
{
|
|
tab->read_first_record= join_read_first;
|
|
if (statistics)
|
|
{
|
|
join->thd->inc_status_select_scan();
|
|
join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
|
|
}
|
|
}
|
|
break;
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
case JT_HASH:
|
|
{
|
|
bool have_quick_select= tab->select && tab->select->quick;
|
|
/*
|
|
If previous table use cache
|
|
If the incoming data set is already sorted don't use cache.
|
|
Also don't use cache if this is the first table in semi-join
|
|
materialization nest.
|
|
*/
|
|
/* These init changes read_record */
|
|
if (tab->use_quick == 2)
|
|
{
|
|
join->thd->set_status_no_good_index_used();
|
|
tab->read_first_record= join_init_quick_read_record;
|
|
if (statistics)
|
|
join->thd->inc_status_select_range_check();
|
|
}
|
|
else
|
|
{
|
|
if (!tab->bush_children)
|
|
tab->read_first_record= join_init_read_record;
|
|
if (tab == first_tab)
|
|
{
|
|
if (tab->select && tab->select->quick)
|
|
{
|
|
if (statistics)
|
|
join->thd->inc_status_select_range();
|
|
}
|
|
else
|
|
{
|
|
join->thd->set_status_no_index_used();
|
|
if (statistics)
|
|
{
|
|
join->thd->inc_status_select_scan();
|
|
join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (have_quick_select)
|
|
{
|
|
if (statistics)
|
|
join->thd->inc_status_select_full_range_join();
|
|
}
|
|
else
|
|
{
|
|
join->thd->set_status_no_index_used();
|
|
if (statistics)
|
|
{
|
|
join->thd->inc_status_select_full_join();
|
|
join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
|
|
}
|
|
}
|
|
}
|
|
if (!table->no_keyread)
|
|
{
|
|
if (!(have_quick_select &&
|
|
tab->select->quick->index != MAX_KEY && //not index_merge
|
|
table->covering_keys.is_set(tab->select->quick->index)) &&
|
|
(!table->covering_keys.is_clear_all() && ! have_quick_select))
|
|
{ // Only read index tree
|
|
if (tab->loosescan_match_tab)
|
|
tab->index= tab->loosescan_key;
|
|
else
|
|
tab->index= tab->cached_covering_key;
|
|
tab->read_first_record= join_read_first;
|
|
/* Read with index_first / index_next */
|
|
tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT;
|
|
}
|
|
}
|
|
if (have_quick_select &&
|
|
tab->select->quick->index != MAX_KEY &&
|
|
!tab->table->covering_keys.is_set(tab->select->quick->index))
|
|
push_index_cond(tab, tab->select->quick->index);
|
|
}
|
|
break;
|
|
}
|
|
case JT_FT:
|
|
break;
|
|
/* purecov: begin deadcode */
|
|
default:
|
|
DBUG_PRINT("error",("Table type %d found",tab->type));
|
|
break;
|
|
case JT_UNKNOWN:
|
|
case JT_MAYBE_REF:
|
|
abort();
|
|
/* purecov: end */
|
|
}
|
|
tab->cached_pfs_batch_update= tab->pfs_batch_update();
|
|
|
|
DBUG_EXECUTE("where",
|
|
char buff[256];
|
|
String str(buff,sizeof(buff),system_charset_info);
|
|
str.length(0);
|
|
if (tab->table)
|
|
str.append(tab->table->alias);
|
|
else
|
|
str.append(STRING_WITH_LEN("<no_table_name>"));
|
|
str.append(STRING_WITH_LEN(" final_pushdown_cond"));
|
|
print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY););
|
|
}
|
|
uint n_top_tables= (uint)(join->join_tab_ranges.head()->end -
|
|
join->join_tab_ranges.head()->start);
|
|
|
|
join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */
|
|
|
|
/*
|
|
If a join buffer is used to join a table the ordering by an index
|
|
for the first non-constant table cannot be employed anymore.
|
|
*/
|
|
for (tab= join->join_tab + join->const_tables ;
|
|
tab != join->join_tab + n_top_tables ; tab++)
|
|
{
|
|
if (tab->use_join_cache)
|
|
{
|
|
JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
|
|
join->group_list ?
|
|
join->join_tab+join->const_tables :
|
|
join->get_sort_by_join_tab();
|
|
/*
|
|
It could be that sort_by_tab==NULL, and the plan is to use filesort()
|
|
on the first table.
|
|
*/
|
|
if (join->order)
|
|
{
|
|
join->simple_order= 0;
|
|
join->need_tmp= 1;
|
|
}
|
|
|
|
if (join->group && !join->group_optimized_away)
|
|
{
|
|
join->need_tmp= 1;
|
|
join->simple_group= 0;
|
|
}
|
|
|
|
if (sort_by_tab)
|
|
{
|
|
join->need_tmp= 1;
|
|
join->simple_order= join->simple_group= 0;
|
|
if (sort_by_tab->type == JT_NEXT &&
|
|
!sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
|
|
{
|
|
sort_by_tab->type= JT_ALL;
|
|
sort_by_tab->read_first_record= join_init_read_record;
|
|
}
|
|
else if (sort_by_tab->type == JT_HASH_NEXT &&
|
|
!sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
|
|
{
|
|
sort_by_tab->type= JT_HASH;
|
|
sort_by_tab->read_first_record= join_init_read_record;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
Give error if we some tables are done with a full join.
|
|
|
|
This is used by multi_table_update and multi_table_delete when running
|
|
in safe mode.
|
|
|
|
@param join Join condition
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Error (full join used)
|
|
*/
|
|
|
|
bool error_if_full_join(JOIN *join)
|
|
{
|
|
for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab;
|
|
tab= next_top_level_tab(join, tab))
|
|
{
|
|
if ((tab->type == JT_ALL || tab->type == JT_NEXT))
|
|
{
|
|
my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
|
|
ER_THD(join->thd,
|
|
ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
|
|
return(1);
|
|
}
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
|
|
/**
|
|
build_range_rowid_filter()
|
|
|
|
Build range rowid filter. This function should only be called if
|
|
need_to_build_rowid_filter is true
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Error, transaction should be rolled back
|
|
*/
|
|
|
|
bool JOIN_TAB::build_range_rowid_filter()
|
|
{
|
|
|
|
DBUG_ASSERT(need_to_build_rowid_filter && rowid_filter);
|
|
|
|
/**
|
|
The same handler object (table->file) is used to build a filter
|
|
and to perfom a primary table access (by the main query).
|
|
|
|
To estimate the time for filter building tracker should be changed
|
|
and after building of the filter has been finished it should be
|
|
switched back to the previos tracker.
|
|
*/
|
|
|
|
Exec_time_tracker *table_tracker= table->file->get_time_tracker();
|
|
Rowid_filter_tracker *rowid_tracker= rowid_filter->get_tracker();
|
|
table->file->set_time_tracker(rowid_tracker->get_time_tracker());
|
|
rowid_tracker->start_tracking(join->thd);
|
|
Rowid_filter::build_return_code build_rc= rowid_filter->build();
|
|
if (build_rc != Rowid_filter::SUCCESS)
|
|
{
|
|
/* Failed building rowid filter */
|
|
clear_range_rowid_filter();
|
|
}
|
|
need_to_build_rowid_filter= false;
|
|
rowid_tracker->stop_tracking(join->thd);
|
|
table->file->set_time_tracker(table_tracker);
|
|
return (build_rc == Rowid_filter::FATAL_ERROR);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
Clear used rowid filter
|
|
|
|
Note that rowid_filter is allocated on mem_root and not really freed!
|
|
Only the rowid data is freed.
|
|
*/
|
|
|
|
void JOIN_TAB::clear_range_rowid_filter()
|
|
{
|
|
delete rowid_filter;
|
|
rowid_filter= 0;
|
|
need_to_build_rowid_filter= false;
|
|
range_rowid_filter_info= 0;
|
|
}
|
|
|
|
/**
|
|
cleanup JOIN_TAB.
|
|
|
|
DESCRIPTION
|
|
This is invoked when we've finished all join executions.
|
|
*/
|
|
|
|
void JOIN_TAB::cleanup()
|
|
{
|
|
DBUG_ENTER("JOIN_TAB::cleanup");
|
|
|
|
DBUG_PRINT("enter", ("tab: %p table %s.%s",
|
|
this,
|
|
(table ? table->s->db.str : "?"),
|
|
(table ? table->s->table_name.str : "?")));
|
|
delete select;
|
|
select= 0;
|
|
delete quick;
|
|
quick= 0;
|
|
if (rowid_filter)
|
|
clear_range_rowid_filter();
|
|
if (cache)
|
|
{
|
|
cache->free();
|
|
cache= 0;
|
|
}
|
|
limit= 0;
|
|
// Free select that was created for filesort outside of create_sort_index
|
|
if (filesort && filesort->select && !filesort->own_select)
|
|
delete filesort->select;
|
|
delete filesort;
|
|
filesort= NULL;
|
|
if (table)
|
|
{
|
|
table->file->ha_end_keyread();
|
|
if (type == JT_FT)
|
|
table->file->ha_ft_end();
|
|
else
|
|
table->file->ha_index_or_rnd_end();
|
|
preread_init_done= FALSE;
|
|
if (table->pos_in_table_list && table->pos_in_table_list->jtbm_subselect)
|
|
{
|
|
if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
/*
|
|
Set this to NULL so that cleanup_empty_jtbm_semi_joins() doesn't
|
|
attempt to make another free_tmp_table call.
|
|
*/
|
|
table->pos_in_table_list->table= NULL;
|
|
free_tmp_table(join->thd, table);
|
|
}
|
|
else
|
|
{
|
|
TABLE_LIST *tmp= table->pos_in_table_list;
|
|
end_read_record(&read_record);
|
|
tmp->jtbm_subselect->cleanup();
|
|
/*
|
|
The above call freed the materialized temptable. Set it to NULL so
|
|
that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked
|
|
multiple times (it may be)
|
|
*/
|
|
tmp->table= NULL;
|
|
}
|
|
table= NULL;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
/*if (table->pos_in_table_list && table->pos_in_table_list->derived)
|
|
{
|
|
delete table->pos_in_table_list->derived->derived->dt_handler;
|
|
}*/
|
|
|
|
/*
|
|
We need to reset this for next select
|
|
(Tested in part_of_refkey)
|
|
*/
|
|
table->reginfo.join_tab= 0;
|
|
}
|
|
end_read_record(&read_record);
|
|
explain_plan= NULL;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Estimate the time to get rows of the joined table
|
|
|
|
Updates found_records, records, cached_covering_key, read_time and
|
|
cache_scan_and_compare_time
|
|
*/
|
|
|
|
void JOIN_TAB::estimate_scan_time()
|
|
{
|
|
THD *thd= join->thd;
|
|
handler *file= table->file;
|
|
double row_copy_cost, copy_cost;
|
|
ALL_READ_COST * const cost= &cached_scan_and_compare_cost;
|
|
cost->reset();
|
|
|
|
cached_covering_key= MAX_KEY;
|
|
if (table->is_created())
|
|
{
|
|
if (table->is_filled_at_execution())
|
|
{
|
|
get_delayed_table_estimates(table, &records, &read_time,
|
|
&startup_cost);
|
|
table->opt_range_condition_rows= records;
|
|
table->used_stat_records= records;
|
|
cost->row_cost.cpu= read_time;
|
|
row_copy_cost= file->ROW_COPY_COST;
|
|
}
|
|
else
|
|
{
|
|
records= table->stat_records();
|
|
/*
|
|
table->opt_range_condition_rows has already been set to
|
|
table->file->stats.records
|
|
*/
|
|
DBUG_ASSERT(table->opt_range_condition_rows == records);
|
|
|
|
if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
|
|
{
|
|
cached_covering_key= find_shortest_key(table, &table->covering_keys);
|
|
cost->index_cost= file->ha_key_scan_time(cached_covering_key, records);
|
|
read_time= file->cost(cost->index_cost);
|
|
row_copy_cost= 0; // Included in ha_key_scan_time
|
|
}
|
|
else
|
|
{
|
|
cost->row_cost= file->ha_scan_time(records);
|
|
read_time= file->cost(cost->row_cost);
|
|
row_copy_cost= 0; // Included in ha_scan_time
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
The following is same as calling
|
|
TABLE_SHARE::update_optimizer_costs, but without locks
|
|
*/
|
|
if (table->s->db_type() == heap_hton)
|
|
memcpy(&table->s->optimizer_costs, &heap_optimizer_costs,
|
|
sizeof(heap_optimizer_costs));
|
|
else
|
|
memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs,
|
|
sizeof(tmp_table_optimizer_costs));
|
|
file->set_optimizer_costs(thd);
|
|
table->s->optimizer_costs_inited=1;
|
|
|
|
records= table->stat_records();
|
|
DBUG_ASSERT(table->opt_range_condition_rows == records);
|
|
cost->row_cost= table->file->ha_scan_time(MY_MAX(records, 1000));
|
|
read_time= file->cost(cost->row_cost);
|
|
row_copy_cost= table->s->optimizer_costs.row_copy_cost;
|
|
}
|
|
|
|
found_records= records;
|
|
copy_cost= (records * (row_copy_cost + WHERE_COST_THD(thd)));
|
|
cached_scan_and_compare_time= read_time + copy_cost;
|
|
cost->copy_cost+= copy_cost;
|
|
|
|
/*
|
|
Assume we only need to do physical IO once even if we scan the file
|
|
multiple times.
|
|
*/
|
|
cost->max_index_blocks= (longlong) ceil(cost->index_cost.io);
|
|
cost->max_row_blocks= (longlong) ceil(cost->row_cost.io);
|
|
DBUG_ASSERT(compare_cost(cached_scan_and_compare_time,
|
|
file->cost(cost)));
|
|
}
|
|
|
|
|
|
/**
|
|
Estimate the number of rows that an access method will read from a table.
|
|
|
|
@todo: why not use JOIN_TAB::found_records or JOIN_TAB::records_read
|
|
*/
|
|
|
|
double JOIN_TAB::get_examined_rows()
|
|
{
|
|
double examined_rows;
|
|
const SQL_SELECT *sel= get_sql_select();
|
|
|
|
if (sel && sel->quick && use_quick != 2)
|
|
{
|
|
examined_rows= (double) sel->quick->records;
|
|
DBUG_ASSERT(examined_rows == sel->quick->records);
|
|
}
|
|
else if (type == JT_NEXT || type == JT_ALL || type == JT_RANGE ||
|
|
type == JT_HASH || type == JT_HASH_NEXT)
|
|
{
|
|
if (limit)
|
|
{
|
|
/*
|
|
@todo This estimate is wrong, a LIMIT query may examine much more rows
|
|
than the LIMIT itself.
|
|
*/
|
|
examined_rows= (double)limit;
|
|
}
|
|
else
|
|
{
|
|
if (table->is_filled_at_execution())
|
|
examined_rows= (double)records;
|
|
else
|
|
{
|
|
/*
|
|
handler->info(HA_STATUS_VARIABLE) has been called in
|
|
make_join_statistics()
|
|
*/
|
|
examined_rows= (double)table->stat_records();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
examined_rows= records_init;
|
|
|
|
if (examined_rows >= (double) HA_ROWS_MAX)
|
|
return (double) HA_ROWS_MAX;
|
|
return examined_rows;
|
|
}
|
|
|
|
|
|
/**
|
|
Initialize the join_tab before reading.
|
|
Currently only derived table/view materialization is done here.
|
|
|
|
TODO: consider moving this together with join_tab_execution_startup
|
|
*/
|
|
|
|
bool JOIN_TAB::preread_init()
|
|
{
|
|
TABLE_LIST *derived= table->pos_in_table_list;
|
|
DBUG_ENTER("JOIN_TAB::preread_init");
|
|
|
|
if (!derived || !derived->is_materialized_derived())
|
|
{
|
|
preread_init_done= TRUE;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/* Materialize derived table/view. */
|
|
if ((!derived->get_unit()->executed ||
|
|
derived->is_recursive_with_table() ||
|
|
derived->get_unit()->uncacheable) &&
|
|
mysql_handle_single_derived(join->thd->lex,
|
|
derived, DT_CREATE | DT_FILL))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) ||
|
|
derived->is_nonrecursive_derived_with_rec_ref() ||
|
|
is_split_derived)
|
|
preread_init_done= TRUE;
|
|
if (select && select->quick)
|
|
select->quick->replace_handler(table->file);
|
|
|
|
DBUG_EXECUTE_IF("show_explain_probe_join_tab_preread",
|
|
if (dbug_user_var_equals_int(join->thd,
|
|
"show_explain_probe_select_id",
|
|
join->select_lex->select_number))
|
|
dbug_serve_apcs(join->thd, 1);
|
|
);
|
|
|
|
/* init ftfuns for just initialized derived table */
|
|
if (table->fulltext_searched)
|
|
if (init_ftfuncs(join->thd, join->select_lex, MY_TEST(join->order)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
pfs_batch_update()
|
|
|
|
Check if the used table will do a lot of read calls in a row without
|
|
any intervening read calls to any other tables.
|
|
|
|
@return 0 No
|
|
@return 1 Yes
|
|
|
|
If yes, then the handler will be informed about this with the
|
|
start_psi_batch_mode() / end_psi_batch_mode() calls
|
|
|
|
This is currently used only to speed up performance schema code for
|
|
multiple reads.
|
|
|
|
In the future we may also inform the engine about this. The engine
|
|
could use this information to cache the used pages, keep blocks
|
|
locked in the page cache and similar things to speed up repeated
|
|
reads.
|
|
|
|
The return value of this function is cached in
|
|
JOIN_TAB::cached_pfs_batch_update
|
|
*/
|
|
|
|
bool JOIN_TAB::pfs_batch_update()
|
|
{
|
|
/*
|
|
Use PFS batch mode if
|
|
1. tab is an inner-most table, or
|
|
2. will read more than one row (not eq_ref or const access type)
|
|
3. no subqueries
|
|
*/
|
|
|
|
return join->join_tab + join->table_count - 1 == this && // 1
|
|
type != JT_EQ_REF && type != JT_CONST && type != JT_SYSTEM && // 2
|
|
(!select_cond || !select_cond->with_subquery()); // 3
|
|
}
|
|
|
|
|
|
/**
|
|
Build a TABLE_REF structure for index lookup in the temporary table
|
|
|
|
@param thd Thread handle
|
|
@param tmp_key The temporary table key
|
|
@param it The iterator of items for lookup in the key
|
|
@param skip Number of fields from the beginning to skip
|
|
|
|
@details
|
|
Build TABLE_REF object for lookup in the key 'tmp_key' using items
|
|
accessible via item iterator 'it'.
|
|
|
|
@retval TRUE Error
|
|
@retval FALSE OK
|
|
*/
|
|
|
|
bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
|
|
KEY *tmp_key,
|
|
Item_iterator &it,
|
|
bool value,
|
|
uint skip)
|
|
{
|
|
uint tmp_key_parts= tmp_key->user_defined_key_parts;
|
|
uint i;
|
|
DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
|
|
|
|
key= 0; /* The only temp table index. */
|
|
key_length= tmp_key->key_length;
|
|
if (!(key_buff=
|
|
(uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
|
|
!(key_copy=
|
|
(store_key**) thd->alloc((sizeof(store_key*) *
|
|
(tmp_key_parts + 1)))) ||
|
|
!(items=
|
|
(Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
|
|
|
|
KEY_PART_INFO *cur_key_part= tmp_key->key_part;
|
|
store_key **ref_key= key_copy;
|
|
uchar *cur_ref_buff= key_buff;
|
|
|
|
it.open();
|
|
for (i= 0; i < skip; i++) it.next();
|
|
for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
|
|
{
|
|
Item *item= it.next();
|
|
DBUG_ASSERT(item);
|
|
items[i]= item;
|
|
int null_count= MY_TEST(cur_key_part->field->real_maybe_null());
|
|
*ref_key= new store_key_item(thd, cur_key_part->field,
|
|
/* TIMOUR:
|
|
the NULL byte is taken into account in
|
|
cur_key_part->store_length, so instead of
|
|
cur_ref_buff + MY_TEST(maybe_null), we could
|
|
use that information instead.
|
|
*/
|
|
cur_ref_buff + null_count,
|
|
null_count ? cur_ref_buff : 0,
|
|
cur_key_part->length, items[i], value);
|
|
cur_ref_buff+= cur_key_part->store_length;
|
|
}
|
|
*ref_key= NULL; /* End marker. */
|
|
key_err= 1;
|
|
key_parts= tmp_key_parts;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates
|
|
between ref access and full table scan)
|
|
*/
|
|
|
|
bool TABLE_REF::is_access_triggered()
|
|
{
|
|
for (uint i = 0; i < key_parts; i++)
|
|
{
|
|
if (cond_guards[i])
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Partially cleanup JOIN after it has executed: close index or rnd read
|
|
(table cursors), free quick selects.
|
|
|
|
This function is called in the end of execution of a JOIN, before the used
|
|
tables are unlocked and closed.
|
|
|
|
For a join that is resolved using a temporary table, the first sweep is
|
|
performed against actual tables and an intermediate result is inserted
|
|
into the temprorary table.
|
|
The last sweep is performed against the temporary table. Therefore,
|
|
the base tables and associated buffers used to fill the temporary table
|
|
are no longer needed, and this function is called to free them.
|
|
|
|
For a join that is performed without a temporary table, this function
|
|
is called after all rows are sent, but before EOF packet is sent.
|
|
|
|
For a simple SELECT with no subqueries this function performs a full
|
|
cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
|
|
tables.
|
|
|
|
If a JOIN is executed for a subquery or if it has a subquery, we can't
|
|
do the full cleanup and need to do a partial cleanup only.
|
|
- If a JOIN is not the top level join, we must not unlock the tables
|
|
because the outer select may not have been evaluated yet, and we
|
|
can't unlock only selected tables of a query.
|
|
- Additionally, if this JOIN corresponds to a correlated subquery, we
|
|
should not free quick selects and join buffers because they will be
|
|
needed for the next execution of the correlated subquery.
|
|
- However, if this is a JOIN for a [sub]select, which is not
|
|
a correlated subquery itself, but has subqueries, we can free it
|
|
fully and also free JOINs of all its subqueries. The exception
|
|
is a subquery in SELECT list, e.g: @n
|
|
SELECT a, (select MY_MAX(b) from t1) group by c @n
|
|
This subquery will not be evaluated at first sweep and its value will
|
|
not be inserted into the temporary table. Instead, it's evaluated
|
|
when selecting from the temporary table. Therefore, it can't be freed
|
|
here even though it's not correlated.
|
|
|
|
@todo
|
|
Unlock tables even if the join isn't top level select in the tree
|
|
*/
|
|
|
|
void JOIN::join_free()
|
|
{
|
|
SELECT_LEX_UNIT *tmp_unit;
|
|
SELECT_LEX *sl;
|
|
/*
|
|
Optimization: if not EXPLAIN and we are done with the JOIN,
|
|
free all tables.
|
|
*/
|
|
bool full= !(select_lex->uncacheable) && !(thd->lex->describe);
|
|
bool can_unlock= full;
|
|
DBUG_ENTER("JOIN::join_free");
|
|
|
|
cleanup(full);
|
|
|
|
for (tmp_unit= select_lex->first_inner_unit();
|
|
tmp_unit;
|
|
tmp_unit= tmp_unit->next_unit())
|
|
{
|
|
if (tmp_unit->with_element && tmp_unit->with_element->is_recursive)
|
|
continue;
|
|
for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
|
|
{
|
|
Item_subselect *subselect= sl->master_unit()->item;
|
|
bool full_local= full && (!subselect || subselect->is_evaluated());
|
|
/*
|
|
If this join is evaluated, we can fully clean it up and clean up all
|
|
its underlying joins even if they are correlated -- they will not be
|
|
used any more anyway.
|
|
If this join is not yet evaluated, we still must clean it up to
|
|
close its table cursors -- it may never get evaluated, as in case of
|
|
... HAVING FALSE OR a IN (SELECT ...))
|
|
but all table cursors must be closed before the unlock.
|
|
*/
|
|
sl->cleanup_all_joins(full_local);
|
|
/* Can't unlock if at least one JOIN is still needed */
|
|
can_unlock= can_unlock && full_local;
|
|
}
|
|
}
|
|
/*
|
|
We are not using tables anymore
|
|
Unlock all tables. We may be in an INSERT .... SELECT statement.
|
|
*/
|
|
if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
|
|
!(select_options & SELECT_NO_UNLOCK) &&
|
|
!select_lex->subquery_in_having &&
|
|
(select_lex == (thd->lex->unit.fake_select_lex ?
|
|
thd->lex->unit.fake_select_lex :
|
|
thd->lex->first_select_lex())))
|
|
{
|
|
/*
|
|
TODO: unlock tables even if the join isn't top level select in the
|
|
tree.
|
|
*/
|
|
mysql_unlock_read_tables(thd, lock); // Don't free join->lock
|
|
lock= 0;
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Free resources of given join.
|
|
|
|
@param full true if we should free all resources, call with full==1
|
|
should be last, before it this function can be called with
|
|
full==0
|
|
|
|
@note
|
|
With subquery this function definitely will be called several times,
|
|
but even for simple query it can be called several times.
|
|
*/
|
|
|
|
void JOIN::cleanup(bool full)
|
|
{
|
|
DBUG_ENTER("JOIN::cleanup");
|
|
DBUG_PRINT("enter", ("select: %d (%p) join: %p full: %u",
|
|
select_lex->select_number, select_lex, this,
|
|
(uint) full));
|
|
|
|
if (full)
|
|
have_query_plan= QEP_DELETED;
|
|
|
|
if (original_join_tab)
|
|
{
|
|
/* Free the original optimized join created for the group_by_handler */
|
|
join_tab= original_join_tab;
|
|
original_join_tab= 0;
|
|
}
|
|
|
|
if (join_tab)
|
|
{
|
|
JOIN_TAB *tab;
|
|
|
|
if (full)
|
|
{
|
|
/*
|
|
Call cleanup() on join tabs used by the join optimization
|
|
(join->join_tab may now be pointing to result of make_simple_join
|
|
reading from the temporary table)
|
|
|
|
We also need to check table_count to handle various degenerate joins
|
|
w/o tables: they don't have some members initialized and
|
|
WALK_OPTIMIZATION_TABS may not work correctly for them.
|
|
*/
|
|
if (top_join_tab_count && tables_list)
|
|
{
|
|
for (tab= first_breadth_first_tab(); tab;
|
|
tab= next_breadth_first_tab(first_breadth_first_tab(),
|
|
top_join_tab_count, tab))
|
|
{
|
|
tab->cleanup();
|
|
delete tab->filesort_result;
|
|
tab->filesort_result= NULL;
|
|
}
|
|
}
|
|
cleaned= true;
|
|
//psergey2: added (Q: why not in the above loop?)
|
|
{
|
|
JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
|
|
for (uint i= 0; i < aggr_tables; i++, curr_tab++)
|
|
{
|
|
if (curr_tab->aggr)
|
|
{
|
|
free_tmp_table(thd, curr_tab->table);
|
|
curr_tab->table= NULL;
|
|
delete curr_tab->tmp_table_param;
|
|
curr_tab->tmp_table_param= NULL;
|
|
curr_tab->aggr= NULL;
|
|
|
|
delete curr_tab->filesort_result;
|
|
curr_tab->filesort_result= NULL;
|
|
}
|
|
}
|
|
aggr_tables= 0; // psergey3
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->partial_cleanup();
|
|
}
|
|
}
|
|
}
|
|
if (full)
|
|
{
|
|
cleanup_empty_jtbm_semi_joins(this, join_list);
|
|
|
|
// Run Cached_item DTORs!
|
|
group_fields.delete_elements();
|
|
order_fields.delete_elements();
|
|
|
|
/*
|
|
We can't call delete_elements() on copy_funcs as this will cause
|
|
problems in free_elements() as some of the elements are then deleted.
|
|
*/
|
|
tmp_table_param.copy_funcs.empty();
|
|
/*
|
|
If we have tmp_join and 'this' JOIN is not tmp_join and
|
|
tmp_table_param.copy_field's of them are equal then we have to remove
|
|
pointer to tmp_table_param.copy_field from tmp_join, because it will
|
|
be removed in tmp_table_param.cleanup().
|
|
*/
|
|
tmp_table_param.cleanup();
|
|
|
|
delete pushdown_query;
|
|
pushdown_query= 0;
|
|
|
|
if (!join_tab)
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *table_ref;
|
|
while ((table_ref= li++))
|
|
{
|
|
if (table_ref->table &&
|
|
table_ref->jtbm_subselect &&
|
|
table_ref->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
free_tmp_table(thd, table_ref->table);
|
|
table_ref->table= NULL;
|
|
}
|
|
}
|
|
}
|
|
free_pushdown_handlers(*join_list);
|
|
}
|
|
/* Restore ref array to original state */
|
|
if (current_ref_ptrs != items0)
|
|
{
|
|
set_items_ref_array(items0);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
Clean up all derived pushdown handlers in this join.
|
|
|
|
@detail
|
|
Note that dt_handler is picked at the prepare stage (as opposed
|
|
to optimization stage where one could expect this).
|
|
Because of that, we have to do cleanups in this function that is called
|
|
from JOIN::cleanup() and not in JOIN_TAB::cleanup.
|
|
*/
|
|
void JOIN::free_pushdown_handlers(List<TABLE_LIST>& join_list)
|
|
{
|
|
List_iterator<TABLE_LIST> li(join_list);
|
|
TABLE_LIST *table_ref;
|
|
while ((table_ref= li++))
|
|
{
|
|
if (table_ref->nested_join)
|
|
free_pushdown_handlers(table_ref->nested_join->join_list);
|
|
if (table_ref->pushdown_derived)
|
|
{
|
|
delete table_ref->pushdown_derived;
|
|
table_ref->pushdown_derived= NULL;
|
|
}
|
|
delete table_ref->dt_handler;
|
|
table_ref->dt_handler= NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Remove the following expressions from ORDER BY and GROUP BY:
|
|
Constant expressions @n
|
|
Expression that only uses tables that are of type EQ_REF and the reference
|
|
is in the ORDER list or if all refereed tables are of the above type.
|
|
|
|
In the following, the X field can be removed:
|
|
@code
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
|
|
SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
|
|
@endcode
|
|
|
|
These can't be optimized:
|
|
@code
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
|
|
@endcode
|
|
|
|
TODO: this function checks ORDER::used, which can only have a value of 0.
|
|
*/
|
|
|
|
static bool
|
|
eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab)
|
|
{
|
|
if (tab->cached_eq_ref_table) // If cached
|
|
return tab->eq_ref_table;
|
|
tab->cached_eq_ref_table=1;
|
|
/* We can skip const tables only if not an outer table */
|
|
if (tab->type == JT_CONST && !tab->first_inner)
|
|
return (tab->eq_ref_table=1); /* purecov: inspected */
|
|
if (tab->type != JT_EQ_REF || tab->table->maybe_null)
|
|
return (tab->eq_ref_table=0); // We must use this
|
|
Item **ref_item=tab->ref.items;
|
|
Item **end=ref_item+tab->ref.key_parts;
|
|
uint found=0;
|
|
table_map map=tab->table->map;
|
|
|
|
for (; ref_item != end ; ref_item++)
|
|
{
|
|
if (! (*ref_item)->const_item())
|
|
{ // Not a const ref
|
|
ORDER *order;
|
|
for (order=start_order ; order ; order=order->next)
|
|
{
|
|
if ((*ref_item)->eq(order->item[0],0))
|
|
break;
|
|
}
|
|
if (order)
|
|
{
|
|
if (!(order->used & map))
|
|
{
|
|
found++;
|
|
order->used|= map;
|
|
}
|
|
continue; // Used in ORDER BY
|
|
}
|
|
if (!only_eq_ref_tables(join,start_order, (*ref_item)->used_tables()))
|
|
return (tab->eq_ref_table=0);
|
|
}
|
|
}
|
|
/* Check that there was no reference to table before sort order */
|
|
for (; found && start_order ; start_order=start_order->next)
|
|
{
|
|
if (start_order->used & map)
|
|
{
|
|
found--;
|
|
continue;
|
|
}
|
|
if (start_order->depend_map & map)
|
|
return (tab->eq_ref_table=0);
|
|
}
|
|
return tab->eq_ref_table=1;
|
|
}
|
|
|
|
|
|
static bool
|
|
only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
|
|
{
|
|
tables&= ~PSEUDO_TABLE_BITS;
|
|
for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
|
|
{
|
|
if (tables & 1 && !eq_ref_table(join, order, *tab))
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
/** Update the dependency map for the tables. */
|
|
|
|
static void update_depend_map(JOIN *join)
|
|
{
|
|
JOIN_TAB *join_tab;
|
|
for (join_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITH_CONST_TABLES);
|
|
join_tab;
|
|
join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
|
|
{
|
|
TABLE_REF *ref= &join_tab->ref;
|
|
table_map depend_map=0;
|
|
Item **item=ref->items;
|
|
uint i;
|
|
for (i=0 ; i < ref->key_parts ; i++,item++)
|
|
depend_map|=(*item)->used_tables();
|
|
depend_map&= ~OUTER_REF_TABLE_BIT;
|
|
ref->depend_map= depend_map;
|
|
for (JOIN_TAB **tab=join->map2table;
|
|
depend_map ;
|
|
tab++,depend_map>>=1 )
|
|
{
|
|
if (depend_map & 1)
|
|
ref->depend_map|=(*tab)->ref.depend_map;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/** Update the dependency map for the sort order. */
|
|
|
|
static void update_depend_map_for_order(JOIN *join, ORDER *order)
|
|
{
|
|
for (; order ; order=order->next)
|
|
{
|
|
table_map depend_map;
|
|
order->item[0]->update_used_tables();
|
|
order->depend_map=depend_map=order->item[0]->used_tables();
|
|
order->used= 0;
|
|
// Not item_sum(), RAND() and no reference to table outside of sub select
|
|
if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
|
|
&& !order->item[0]->with_sum_func() &&
|
|
join->join_tab)
|
|
{
|
|
for (JOIN_TAB **tab=join->map2table;
|
|
depend_map ;
|
|
tab++, depend_map>>=1)
|
|
{
|
|
if (depend_map & 1)
|
|
order->depend_map|=(*tab)->ref.depend_map;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Remove all constants from ORDER and check if ORDER only contains simple
|
|
expressions.
|
|
|
|
We also remove all duplicate expressions, keeping only the first one.
|
|
|
|
simple_order is set to 1 if sort_order only uses fields from head table
|
|
and the head table is not a LEFT JOIN table.
|
|
|
|
@param join Join handler
|
|
@param first_order List of SORT or GROUP order
|
|
@param cond WHERE statement
|
|
@param change_list Set to 1 if we should remove things from list.
|
|
If this is not set, then only simple_order is
|
|
calculated. This is not set when we
|
|
are using ROLLUP
|
|
@param simple_order Set to 1 if we are only using simple
|
|
expressions.
|
|
|
|
@return
|
|
Returns new sort order
|
|
*/
|
|
|
|
static ORDER *
|
|
remove_const(JOIN *join,ORDER *first_order, COND *cond,
|
|
bool change_list, bool *simple_order)
|
|
{
|
|
/*
|
|
We can't do ORDER BY using filesort if the select list contains a non
|
|
deterministic value like RAND() or ROWNUM().
|
|
For example:
|
|
SELECT a,ROWNUM() FROM t1 ORDER BY a;
|
|
|
|
If we would first sort the table 't1', the ROWNUM() column would be
|
|
generated during end_send() and the order would be wrong.
|
|
|
|
Previously we had here also a test of ROLLUP:
|
|
'join->rollup.state == ROLLUP::STATE_NONE'
|
|
|
|
I deleted this because the ROLLUP was never enforced because of a
|
|
bug where the inital value of simple_order was ignored. Having
|
|
ROLLUP tested now when the code is fixed, causes many test failure
|
|
and some wrong results so better to leave the code as it was
|
|
related to ROLLUP.
|
|
*/
|
|
*simple_order= !join->select_lex->rownum_in_field_list;
|
|
if (join->only_const_tables())
|
|
return change_list ? 0 : first_order; // No need to sort
|
|
|
|
ORDER *order,**prev_ptr, *tmp_order;
|
|
table_map UNINIT_VAR(first_table); /* protected by first_is_base_table */
|
|
table_map not_const_tables= ~join->const_table_map;
|
|
table_map ref;
|
|
bool first_is_base_table= FALSE;
|
|
DBUG_ENTER("remove_const");
|
|
|
|
/*
|
|
Join tab is set after make_join_statistics() has been called.
|
|
In case of one table with GROUP BY this function is called before
|
|
join_tab is set for the GROUP_BY expression
|
|
*/
|
|
if (join->join_tab)
|
|
{
|
|
if (join->join_tab[join->const_tables].table)
|
|
{
|
|
first_table= join->join_tab[join->const_tables].table->map;
|
|
first_is_base_table= TRUE;
|
|
}
|
|
|
|
/*
|
|
Cleanup to avoid interference of calls of this function for
|
|
ORDER BY and GROUP BY
|
|
*/
|
|
for (JOIN_TAB *tab= join->join_tab + join->const_tables;
|
|
tab < join->join_tab + join->top_join_tab_count;
|
|
tab++)
|
|
tab->cached_eq_ref_table= FALSE;
|
|
|
|
JOIN_TAB *head= join->join_tab + join->const_tables;
|
|
*simple_order&= head->on_expr_ref[0] == NULL;
|
|
if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS)
|
|
{
|
|
uint u1, u2, u3, u4;
|
|
/*
|
|
normally the condition is (see filesort_use_addons())
|
|
|
|
length + sortlength <= max_length_for_sort_data
|
|
|
|
but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative
|
|
is to use a temporary table, which is rather expensive.
|
|
|
|
TODO proper cost estimations
|
|
*/
|
|
*simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3, &u4);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
first_is_base_table= FALSE;
|
|
first_table= 0; // Not used, for gcc
|
|
}
|
|
|
|
prev_ptr= &first_order;
|
|
|
|
/* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
|
|
|
|
update_depend_map_for_order(join, first_order);
|
|
for (order=first_order; order ; order=order->next)
|
|
{
|
|
table_map order_tables=order->item[0]->used_tables();
|
|
if (order->item[0]->with_sum_func() ||
|
|
order->item[0]->with_window_func() ||
|
|
/*
|
|
If the outer table of an outer join is const (either by itself or
|
|
after applying WHERE condition), grouping on a field from such a
|
|
table will be optimized away and filesort without temporary table
|
|
will be used unless we prevent that now. Filesort is not fit to
|
|
handle joins and the join condition is not applied. We can't detect
|
|
the case without an expensive test, however, so we force temporary
|
|
table for all queries containing more than one table, ROLLUP, and an
|
|
outer join.
|
|
*/
|
|
(join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
|
|
join->outer_join))
|
|
*simple_order=0; // Must do a temp table to sort
|
|
else if (!(order_tables & not_const_tables))
|
|
{
|
|
if (order->item[0]->with_subquery())
|
|
{
|
|
/*
|
|
Delay the evaluation of constant ORDER and/or GROUP expressions that
|
|
contain subqueries until the execution phase.
|
|
*/
|
|
join->exec_const_order_group_cond.push_back(order->item[0],
|
|
join->thd->mem_root);
|
|
}
|
|
DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
|
|
*simple_order=0;
|
|
else
|
|
{
|
|
if (cond && const_expression_in_where(cond,order->item[0]))
|
|
{
|
|
DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
|
|
continue;
|
|
}
|
|
if (first_is_base_table &&
|
|
(ref=order_tables & (not_const_tables ^ first_table)))
|
|
{
|
|
if (!(order_tables & first_table) &&
|
|
only_eq_ref_tables(join,first_order, ref))
|
|
{
|
|
DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
|
|
continue;
|
|
}
|
|
/*
|
|
UseMultipleEqualitiesToRemoveTempTable:
|
|
Can use multiple-equalities here to check that ORDER BY columns
|
|
can be used without tmp. table.
|
|
*/
|
|
bool can_subst_to_first_table= false;
|
|
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
|
|
first_is_base_table &&
|
|
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
|
|
join->cond_equal)
|
|
{
|
|
table_map first_table_bit=
|
|
join->join_tab[join->const_tables].table->map;
|
|
|
|
Item *item= order->item[0];
|
|
|
|
/*
|
|
TODO: equality substitution in the context of ORDER BY is
|
|
sometimes allowed when it is not allowed in the general case.
|
|
|
|
We make the below call for its side effect: it will locate the
|
|
multiple equality the item belongs to and set item->item_equal
|
|
accordingly.
|
|
*/
|
|
Item *res= item->propagate_equal_fields(join->thd,
|
|
Value_source::
|
|
Context_identity(),
|
|
join->cond_equal);
|
|
Item_equal *item_eq;
|
|
if ((item_eq= res->get_item_equal()))
|
|
{
|
|
Item *first= item_eq->get_first(NO_PARTICULAR_TAB, NULL);
|
|
if (first->const_item() || first->used_tables() ==
|
|
first_table_bit)
|
|
{
|
|
can_subst_to_first_table= true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!can_subst_to_first_table)
|
|
{
|
|
*simple_order=0; // Must do a temp table to sort
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* Remove ORDER BY entries that we have seen before */
|
|
for (tmp_order= first_order;
|
|
tmp_order != order;
|
|
tmp_order= tmp_order->next)
|
|
{
|
|
if (tmp_order->item[0]->eq(order->item[0],1))
|
|
break;
|
|
}
|
|
if (tmp_order != order)
|
|
continue; // Duplicate order by. Remove
|
|
|
|
if (change_list)
|
|
*prev_ptr= order; // use this entry
|
|
prev_ptr= &order->next;
|
|
}
|
|
if (change_list)
|
|
*prev_ptr=0;
|
|
if (prev_ptr == &first_order) // Nothing to sort/group
|
|
*simple_order=1;
|
|
#ifndef DBUG_OFF
|
|
if (unlikely(join->thd->is_error()))
|
|
DBUG_PRINT("error",("Error from remove_const"));
|
|
#endif
|
|
DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
|
|
DBUG_RETURN(first_order);
|
|
}
|
|
|
|
|
|
/**
|
|
Filter out ORDER items those are equal to constants in WHERE
|
|
|
|
This function is a limited version of remove_const() for use
|
|
with non-JOIN statements (i.e. single-table UPDATE and DELETE).
|
|
|
|
|
|
@param order Linked list of ORDER BY arguments
|
|
@param cond WHERE expression
|
|
|
|
@return pointer to new filtered ORDER list or NULL if whole list eliminated
|
|
|
|
@note
|
|
This function overwrites input order list.
|
|
*/
|
|
|
|
ORDER *simple_remove_const(ORDER *order, COND *where)
|
|
{
|
|
if (!order || !where)
|
|
return order;
|
|
|
|
ORDER *first= NULL, *prev= NULL;
|
|
for (; order; order= order->next)
|
|
{
|
|
DBUG_ASSERT(!order->item[0]->with_sum_func()); // should never happen
|
|
if (!const_expression_in_where(where, order->item[0]))
|
|
{
|
|
if (!first)
|
|
first= order;
|
|
if (prev)
|
|
prev->next= order;
|
|
prev= order;
|
|
}
|
|
}
|
|
if (prev)
|
|
prev->next= NULL;
|
|
return first;
|
|
}
|
|
|
|
|
|
/*
|
|
Set all fields in the table to have a null value
|
|
|
|
@param tables Table list
|
|
*/
|
|
|
|
static void make_tables_null_complemented(List<TABLE_LIST> *tables)
|
|
{
|
|
List_iterator<TABLE_LIST> ti(*tables);
|
|
TABLE_LIST *table;
|
|
while ((table= ti++))
|
|
{
|
|
/*
|
|
Don't touch semi-join materialization tables, as the a join_free()
|
|
call may have freed them (and HAVING clause can't have references to
|
|
them anyway).
|
|
*/
|
|
if (!table->is_jtbm())
|
|
{
|
|
TABLE *tbl= table->table;
|
|
mark_as_null_row(tbl); // Set fields to NULL
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> *tables,
|
|
List<Item> *fields, bool send_row, ulonglong select_options,
|
|
const char *info, Item *having, List<Item> *all_fields)
|
|
{
|
|
DBUG_ENTER("return_zero_rows");
|
|
|
|
if (select_options & SELECT_DESCRIBE)
|
|
{
|
|
select_describe(join, FALSE, FALSE, FALSE, info);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
if (send_row)
|
|
{
|
|
/*
|
|
Set all tables to have NULL row. This is needed as we will be evaluating
|
|
HAVING condition.
|
|
*/
|
|
make_tables_null_complemented(tables);
|
|
|
|
List_iterator_fast<Item> it(*all_fields);
|
|
Item *item;
|
|
/*
|
|
Inform all items (especially aggregating) to calculate HAVING correctly,
|
|
also we will need it for sending results.
|
|
*/
|
|
join->no_rows_in_result_called= 1;
|
|
while ((item= it++))
|
|
item->no_rows_in_result();
|
|
if (having && having->val_int() == 0)
|
|
send_row=0;
|
|
}
|
|
|
|
/* Update results for FOUND_ROWS */
|
|
if (!join->send_row_on_empty_set())
|
|
{
|
|
join->thd->limit_found_rows= 0;
|
|
}
|
|
|
|
if (!(result->send_result_set_metadata(*fields,
|
|
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)))
|
|
{
|
|
bool send_error= FALSE;
|
|
if (send_row)
|
|
send_error= result->send_data_with_check(*fields, join->unit, 0) > 0;
|
|
if (likely(!send_error))
|
|
result->send_eof(); // Should be safe
|
|
}
|
|
/*
|
|
JOIN::join_free() must be called after the virtual method
|
|
select::send_result_set_metadata() returned control since
|
|
implementation of this method could use data strutcures
|
|
that are released by the method JOIN::join_free().
|
|
*/
|
|
join->join_free();
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
Reset table rows to contain a null-complement row (all fields are null)
|
|
|
|
Used only in JOIN::clear() and in do_select() if there where no matching rows.
|
|
|
|
@param join JOIN
|
|
@param cleared_tables Used to mark all cleared tables in the map. Needed for
|
|
unclear_tables() to know which tables to restore to
|
|
their original state.
|
|
*/
|
|
|
|
static void clear_tables(JOIN *join, table_map *cleared_tables)
|
|
{
|
|
DBUG_ASSERT(cleared_tables);
|
|
for (uint i= 0 ; i < join->table_count ; i++)
|
|
{
|
|
TABLE *table= join->table[i];
|
|
|
|
if (table->null_row)
|
|
continue; // Nothing more to do
|
|
(*cleared_tables)|= (((table_map) 1) << i);
|
|
if (table->s->null_bytes)
|
|
{
|
|
/*
|
|
Remember null bits for the record so that we can restore the
|
|
original const record in unclear_tables()
|
|
*/
|
|
memcpy(table->record[1], table->null_flags, table->s->null_bytes);
|
|
}
|
|
mark_as_null_row(table); // All fields are NULL
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Reverse null marking for tables and restore null bits.
|
|
This return the tables to the state of before clear_tables().
|
|
|
|
We have to do this because the tables may be re-used in a sub query
|
|
and the subquery will assume that the const tables contains the original
|
|
data before clear_tables().
|
|
*/
|
|
|
|
static void unclear_tables(JOIN *join, table_map *cleared_tables)
|
|
{
|
|
for (uint i= 0 ; i < join->table_count ; i++)
|
|
{
|
|
if ((*cleared_tables) & (((table_map) 1) << i))
|
|
{
|
|
TABLE *table= join->table[i];
|
|
if (table->s->null_bytes)
|
|
memcpy(table->null_flags, table->record[1], table->s->null_bytes);
|
|
unmark_as_null_row(table);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Make som simple condition optimization:
|
|
If there is a test 'field = const' change all refs to 'field' to 'const'
|
|
Remove all dummy tests 'item = item', 'const op const'.
|
|
Remove all 'item is NULL', when item can never be null!
|
|
item->marker should be 0 for all items on entry
|
|
Return in cond_value FALSE if condition is impossible (1 = 2)
|
|
*****************************************************************************/
|
|
|
|
class COND_CMP :public ilink {
|
|
public:
|
|
static void *operator new(size_t size, MEM_ROOT *mem_root)
|
|
{
|
|
return alloc_root(mem_root, size);
|
|
}
|
|
static void operator delete(void *ptr __attribute__((unused)),
|
|
size_t size __attribute__((unused)))
|
|
{ TRASH_FREE(ptr, size); }
|
|
|
|
static void operator delete(void *, MEM_ROOT*) {}
|
|
|
|
Item *and_level;
|
|
Item_bool_func2 *cmp_func;
|
|
COND_CMP(Item *a,Item_bool_func2 *b) :and_level(a),cmp_func(b) {}
|
|
};
|
|
|
|
/**
|
|
Find the multiple equality predicate containing a field.
|
|
|
|
The function retrieves the multiple equalities accessed through
|
|
the con_equal structure from current level and up looking for
|
|
an equality containing field. It stops retrieval as soon as the equality
|
|
is found and set up inherited_fl to TRUE if it's found on upper levels.
|
|
|
|
@param cond_equal multiple equalities to search in
|
|
@param field field to look for
|
|
@param[out] inherited_fl set up to TRUE if multiple equality is found
|
|
on upper levels (not on current level of
|
|
cond_equal)
|
|
|
|
@return
|
|
- Item_equal for the found multiple equality predicate if a success;
|
|
- NULL otherwise.
|
|
*/
|
|
|
|
Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
|
|
bool *inherited_fl)
|
|
{
|
|
Item_equal *item= 0;
|
|
bool in_upper_level= FALSE;
|
|
while (cond_equal)
|
|
{
|
|
List_iterator_fast<Item_equal> li(cond_equal->current_level);
|
|
while ((item= li++))
|
|
{
|
|
if (item->contains(field))
|
|
goto finish;
|
|
}
|
|
in_upper_level= TRUE;
|
|
cond_equal= cond_equal->upper_levels;
|
|
}
|
|
in_upper_level= FALSE;
|
|
finish:
|
|
*inherited_fl= in_upper_level;
|
|
return item;
|
|
}
|
|
|
|
|
|
/**
|
|
Check whether an equality can be used to build multiple equalities.
|
|
|
|
This function first checks whether the equality (left_item=right_item)
|
|
is a simple equality i.e. the one that equates a field with another field
|
|
or a constant (field=field_item or field=const_item).
|
|
If this is the case the function looks for a multiple equality
|
|
in the lists referenced directly or indirectly by cond_equal inferring
|
|
the given simple equality. If it doesn't find any, it builds a multiple
|
|
equality that covers the predicate, i.e. the predicate can be inferred
|
|
from this multiple equality.
|
|
The built multiple equality could be obtained in such a way:
|
|
create a binary multiple equality equivalent to the predicate, then
|
|
merge it, if possible, with one of old multiple equalities.
|
|
This guarantees that the set of multiple equalities covering equality
|
|
predicates will be minimal.
|
|
|
|
EXAMPLE:
|
|
For the where condition
|
|
@code
|
|
WHERE a=b AND b=c AND
|
|
(b=2 OR f=e)
|
|
@endcode
|
|
the check_equality will be called for the following equality
|
|
predicates a=b, b=c, b=2 and f=e.
|
|
- For a=b it will be called with *cond_equal=(0,[]) and will transform
|
|
*cond_equal into (0,[Item_equal(a,b)]).
|
|
- For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
|
|
and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
|
|
- For b=2 it will be called with *cond_equal=(ptr(CE),[])
|
|
and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
|
|
- For f=e it will be called with *cond_equal=(ptr(CE), [])
|
|
and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
|
|
|
|
@note
|
|
Now only fields that have the same type definitions (verified by
|
|
the Field::eq_def method) are placed to the same multiple equalities.
|
|
Because of this some equality predicates are not eliminated and
|
|
can be used in the constant propagation procedure.
|
|
We could weeken the equlity test as soon as at least one of the
|
|
equal fields is to be equal to a constant. It would require a
|
|
more complicated implementation: we would have to store, in
|
|
general case, its own constant for each fields from the multiple
|
|
equality. But at the same time it would allow us to get rid
|
|
of constant propagation completely: it would be done by the call
|
|
to cond->build_equal_items().
|
|
|
|
|
|
The implementation does not follow exactly the above rules to
|
|
build a new multiple equality for the equality predicate.
|
|
If it processes the equality of the form field1=field2, it
|
|
looks for multiple equalities me1 containig field1 and me2 containing
|
|
field2. If only one of them is found the fuction expands it with
|
|
the lacking field. If multiple equalities for both fields are
|
|
found they are merged. If both searches fail a new multiple equality
|
|
containing just field1 and field2 is added to the existing
|
|
multiple equalities.
|
|
If the function processes the predicate of the form field1=const,
|
|
it looks for a multiple equality containing field1. If found, the
|
|
function checks the constant of the multiple equality. If the value
|
|
is unknown, it is setup to const. Otherwise the value is compared with
|
|
const and the evaluation of the equality predicate is performed.
|
|
When expanding/merging equality predicates from the upper levels
|
|
the function first copies them for the current level. It looks
|
|
acceptable, as this happens rarely. The implementation without
|
|
copying would be much more complicated.
|
|
|
|
For description of how equality propagation works with SJM nests, grep
|
|
for EqualityPropagationAndSjmNests.
|
|
|
|
@param left_item left term of the quality to be checked
|
|
@param right_item right term of the equality to be checked
|
|
@param item equality item if the equality originates from a condition
|
|
predicate, 0 if the equality is the result of row
|
|
elimination
|
|
@param cond_equal multiple equalities that must hold together with the
|
|
equality
|
|
|
|
@retval
|
|
TRUE if the predicate is a simple equality predicate to be used
|
|
for building multiple equalities
|
|
@retval
|
|
FALSE otherwise
|
|
*/
|
|
|
|
bool check_simple_equality(THD *thd, const Item::Context &ctx,
|
|
Item *left_item, Item *right_item,
|
|
COND_EQUAL *cond_equal)
|
|
{
|
|
Item *orig_left_item= left_item;
|
|
Item *orig_right_item= right_item;
|
|
if (left_item->type() == Item::REF_ITEM)
|
|
{
|
|
Item_ref::Ref_Type left_ref= ((Item_ref*)left_item)->ref_type();
|
|
|
|
if (left_ref == Item_ref::VIEW_REF ||
|
|
left_ref == Item_ref::REF)
|
|
{
|
|
if (((Item_ref*)left_item)->get_depended_from())
|
|
return FALSE;
|
|
if (left_ref == Item_ref::VIEW_REF &&
|
|
((Item_direct_view_ref*)left_item)->get_null_ref_table() !=
|
|
NO_NULL_TABLE &&
|
|
!left_item->real_item()->used_tables())
|
|
return FALSE;
|
|
left_item= left_item->real_item();
|
|
}
|
|
}
|
|
if (right_item->type() == Item::REF_ITEM)
|
|
{
|
|
Item_ref::Ref_Type right_ref= ((Item_ref*)right_item)->ref_type();
|
|
if (right_ref == Item_ref::VIEW_REF ||
|
|
(right_ref == Item_ref::REF))
|
|
{
|
|
if (((Item_ref*)right_item)->get_depended_from())
|
|
return FALSE;
|
|
if (right_ref == Item_ref::VIEW_REF &&
|
|
((Item_direct_view_ref*)right_item)->get_null_ref_table() !=
|
|
NO_NULL_TABLE &&
|
|
!right_item->real_item()->used_tables())
|
|
return FALSE;
|
|
right_item= right_item->real_item();
|
|
}
|
|
}
|
|
if (left_item->type() == Item::FIELD_ITEM &&
|
|
right_item->type() == Item::FIELD_ITEM &&
|
|
!((Item_field*)left_item)->get_depended_from() &&
|
|
!((Item_field*)right_item)->get_depended_from())
|
|
{
|
|
/* The predicate the form field1=field2 is processed */
|
|
|
|
Field *left_field= ((Item_field*) left_item)->field;
|
|
Field *right_field= ((Item_field*) right_item)->field;
|
|
|
|
if (!left_field->eq_def(right_field) &&
|
|
!fields_equal_using_narrowing(thd, left_field, right_field))
|
|
return FALSE;
|
|
|
|
/* Search for multiple equalities containing field1 and/or field2 */
|
|
bool left_copyfl, right_copyfl;
|
|
Item_equal *left_item_equal=
|
|
find_item_equal(cond_equal, left_field, &left_copyfl);
|
|
Item_equal *right_item_equal=
|
|
find_item_equal(cond_equal, right_field, &right_copyfl);
|
|
|
|
/* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
|
|
if (left_field->eq(right_field)) /* f = f */
|
|
return (!(left_field->maybe_null() && !left_item_equal));
|
|
|
|
if (left_item_equal && left_item_equal == right_item_equal)
|
|
{
|
|
/*
|
|
The equality predicate is inference of one of the existing
|
|
multiple equalities, i.e the condition is already covered
|
|
by upper level equalities
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
/* Copy the found multiple equalities at the current level if needed */
|
|
if (left_copyfl)
|
|
{
|
|
/* left_item_equal of an upper level contains left_item */
|
|
left_item_equal= new (thd->mem_root) Item_equal(thd, left_item_equal);
|
|
left_item_equal->set_context_field(((Item_field*) left_item));
|
|
cond_equal->current_level.push_back(left_item_equal, thd->mem_root);
|
|
}
|
|
if (right_copyfl)
|
|
{
|
|
/* right_item_equal of an upper level contains right_item */
|
|
right_item_equal= new (thd->mem_root) Item_equal(thd, right_item_equal);
|
|
right_item_equal->set_context_field(((Item_field*) right_item));
|
|
cond_equal->current_level.push_back(right_item_equal, thd->mem_root);
|
|
}
|
|
|
|
if (left_item_equal)
|
|
{
|
|
/* left item was found in the current or one of the upper levels */
|
|
if (! right_item_equal)
|
|
left_item_equal->add(orig_right_item, thd->mem_root);
|
|
else
|
|
{
|
|
/* Merge two multiple equalities forming a new one */
|
|
left_item_equal->merge(thd, right_item_equal);
|
|
/* Remove the merged multiple equality from the list */
|
|
List_iterator<Item_equal> li(cond_equal->current_level);
|
|
while ((li++) != right_item_equal) ;
|
|
li.remove();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* left item was not found neither the current nor in upper levels */
|
|
if (right_item_equal)
|
|
right_item_equal->add(orig_left_item, thd->mem_root);
|
|
else
|
|
{
|
|
/* None of the fields was found in multiple equalities */
|
|
Type_handler_hybrid_field_type
|
|
tmp(orig_left_item->type_handler_for_comparison());
|
|
if (tmp.aggregate_for_comparison(orig_right_item->
|
|
type_handler_for_comparison()))
|
|
return false;
|
|
Item_equal *item_equal=
|
|
new (thd->mem_root) Item_equal(thd, tmp.type_handler(),
|
|
orig_left_item, orig_right_item,
|
|
false);
|
|
item_equal->set_context_field((Item_field*)left_item);
|
|
cond_equal->current_level.push_back(item_equal, thd->mem_root);
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
{
|
|
/* The predicate of the form field=const/const=field is processed */
|
|
Item *const_item= 0;
|
|
Item_field *field_item= 0;
|
|
Item *orig_field_item= 0;
|
|
if (left_item->type() == Item::FIELD_ITEM &&
|
|
!((Item_field*)left_item)->get_depended_from() &&
|
|
right_item->can_eval_in_optimize())
|
|
{
|
|
orig_field_item= orig_left_item;
|
|
field_item= (Item_field *) left_item;
|
|
const_item= right_item;
|
|
}
|
|
else if (right_item->type() == Item::FIELD_ITEM &&
|
|
!((Item_field*)right_item)->get_depended_from() &&
|
|
left_item->can_eval_in_optimize())
|
|
{
|
|
orig_field_item= orig_right_item;
|
|
field_item= (Item_field *) right_item;
|
|
const_item= left_item;
|
|
}
|
|
|
|
if (const_item &&
|
|
field_item->field->test_if_equality_guarantees_uniqueness(const_item))
|
|
{
|
|
/*
|
|
field_item and const_item are arguments of a scalar or a row
|
|
comparison function:
|
|
WHERE column=constant
|
|
WHERE (column, ...) = (constant, ...)
|
|
|
|
The owner comparison function has previously called fix_fields(),
|
|
so field_item and const_item should be directly comparable items,
|
|
field_item->cmp_context and const_item->cmp_context should be set.
|
|
In case of string comparison, charsets and collations of
|
|
field_item and const_item should have already be aggregated
|
|
for comparison, all necessary character set converters installed
|
|
and fixed.
|
|
|
|
In case of string comparison, const_item can be either:
|
|
- a weaker constant that does not need to be converted to field_item:
|
|
WHERE latin1_field = 'latin1_const'
|
|
WHERE varbinary_field = 'latin1_const'
|
|
WHERE latin1_bin_field = 'latin1_general_ci_const'
|
|
- a stronger constant that does not need to be converted to field_item:
|
|
WHERE latin1_field = binary 0xDF
|
|
WHERE latin1_field = 'a' COLLATE latin1_bin
|
|
- a result of conversion (e.g. from the session character set)
|
|
to the character set of field_item:
|
|
WHERE latin1_field = 'utf8_string_with_latin1_repertoire'
|
|
*/
|
|
bool copyfl;
|
|
|
|
Item_equal *item_equal = find_item_equal(cond_equal,
|
|
field_item->field, ©fl);
|
|
if (copyfl)
|
|
{
|
|
item_equal= new (thd->mem_root) Item_equal(thd, item_equal);
|
|
cond_equal->current_level.push_back(item_equal, thd->mem_root);
|
|
item_equal->set_context_field(field_item);
|
|
}
|
|
Item *const_item2= field_item->field->get_equal_const_item(thd, ctx,
|
|
const_item);
|
|
if (!const_item2)
|
|
return false;
|
|
|
|
if (item_equal)
|
|
{
|
|
/*
|
|
The flag cond_false will be set to 1 after this, if item_equal
|
|
already contains a constant and its value is not equal to
|
|
the value of const_item.
|
|
*/
|
|
item_equal->add_const(thd, const_item2);
|
|
}
|
|
else
|
|
{
|
|
Type_handler_hybrid_field_type
|
|
tmp(orig_left_item->type_handler_for_comparison());
|
|
if (tmp.aggregate_for_comparison(orig_right_item->
|
|
type_handler_for_comparison()))
|
|
return false;
|
|
item_equal= new (thd->mem_root) Item_equal(thd, tmp.type_handler(),
|
|
const_item2,
|
|
orig_field_item, true);
|
|
item_equal->set_context_field(field_item);
|
|
cond_equal->current_level.push_back(item_equal, thd->mem_root);
|
|
}
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Convert row equalities into a conjunction of regular equalities.
|
|
|
|
The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
|
|
into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
|
|
Ei=E'i the function checks whether it is a simple equality or a row
|
|
equality. If it is a simple equality it is used to expand multiple
|
|
equalities of cond_equal. If it is a row equality it converted to a
|
|
sequence of equalities between row elements. If Ei=E'i is neither a
|
|
simple equality nor a row equality the item for this predicate is added
|
|
to eq_list.
|
|
|
|
@param thd thread handle
|
|
@param left_row left term of the row equality to be processed
|
|
@param right_row right term of the row equality to be processed
|
|
@param cond_equal multiple equalities that must hold together with the
|
|
predicate
|
|
@param eq_list results of conversions of row equalities that are not
|
|
simple enough to form multiple equalities
|
|
|
|
@retval
|
|
TRUE if conversion has succeeded (no fatal error)
|
|
@retval
|
|
FALSE otherwise
|
|
*/
|
|
|
|
static bool check_row_equality(THD *thd, const Arg_comparator *comparators,
|
|
Item *left_row, Item_row *right_row,
|
|
COND_EQUAL *cond_equal, List<Item>* eq_list)
|
|
{
|
|
uint n= left_row->cols();
|
|
for (uint i= 0 ; i < n; i++)
|
|
{
|
|
bool is_converted;
|
|
Item *left_item= left_row->element_index(i);
|
|
Item *right_item= right_row->element_index(i);
|
|
if (left_item->type() == Item::ROW_ITEM &&
|
|
right_item->type() == Item::ROW_ITEM)
|
|
{
|
|
/*
|
|
Item_splocal for ROW SP variables return Item::ROW_ITEM.
|
|
Here we know that left_item and right_item are not Item_splocal,
|
|
because ROW SP variables with nested ROWs are not supported yet.
|
|
It's safe to cast left_item and right_item to Item_row.
|
|
*/
|
|
DBUG_ASSERT(!left_item->get_item_splocal());
|
|
DBUG_ASSERT(!right_item->get_item_splocal());
|
|
is_converted= check_row_equality(thd,
|
|
comparators[i].subcomparators(),
|
|
(Item_row *) left_item,
|
|
(Item_row *) right_item,
|
|
cond_equal, eq_list);
|
|
}
|
|
else
|
|
{
|
|
const Arg_comparator *tmp= &comparators[i];
|
|
is_converted= check_simple_equality(thd,
|
|
Item::Context(Item::ANY_SUBST,
|
|
tmp->compare_type_handler(),
|
|
tmp->compare_collation()),
|
|
left_item, right_item,
|
|
cond_equal);
|
|
}
|
|
|
|
if (!is_converted)
|
|
{
|
|
Item_func_eq *eq_item;
|
|
if (!(eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item)) ||
|
|
eq_item->set_cmp_func(thd))
|
|
return FALSE;
|
|
eq_item->quick_fix_field();
|
|
eq_list->push_back(eq_item, thd->mem_root);
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/**
|
|
Eliminate row equalities and form multiple equalities predicates.
|
|
|
|
This function checks whether the item is a simple equality
|
|
i.e. the one that equates a field with another field or a constant
|
|
(field=field_item or field=constant_item), or, a row equality.
|
|
For a simple equality the function looks for a multiple equality
|
|
in the lists referenced directly or indirectly by cond_equal inferring
|
|
the given simple equality. If it doesn't find any, it builds/expands
|
|
multiple equality that covers the predicate.
|
|
Row equalities are eliminated substituted for conjunctive regular
|
|
equalities which are treated in the same way as original equality
|
|
predicates.
|
|
|
|
@param thd thread handle
|
|
@param item predicate to process
|
|
@param cond_equal multiple equalities that must hold together with the
|
|
predicate
|
|
@param eq_list results of conversions of row equalities that are not
|
|
simple enough to form multiple equalities
|
|
|
|
@retval
|
|
TRUE if re-writing rules have been applied
|
|
@retval
|
|
FALSE otherwise, i.e.
|
|
if the predicate is not an equality,
|
|
or, if the equality is neither a simple one nor a row equality,
|
|
or, if the procedure fails by a fatal error.
|
|
*/
|
|
|
|
bool Item_func_eq::check_equality(THD *thd, COND_EQUAL *cond_equal,
|
|
List<Item> *eq_list)
|
|
{
|
|
Item *left_item= arguments()[0];
|
|
Item *right_item= arguments()[1];
|
|
|
|
if (left_item->type() == Item::ROW_ITEM &&
|
|
right_item->type() == Item::ROW_ITEM)
|
|
{
|
|
/*
|
|
Item_splocal::type() for ROW variables returns Item::ROW_ITEM.
|
|
Distinguish ROW-type Item_splocal from Item_row.
|
|
Example query:
|
|
SELECT 1 FROM DUAL WHERE row_sp_variable=ROW(100,200);
|
|
*/
|
|
if (left_item->get_item_splocal() ||
|
|
right_item->get_item_splocal())
|
|
return false;
|
|
return check_row_equality(thd,
|
|
cmp.subcomparators(),
|
|
(Item_row *) left_item,
|
|
(Item_row *) right_item,
|
|
cond_equal, eq_list);
|
|
}
|
|
return check_simple_equality(thd,
|
|
Context(ANY_SUBST,
|
|
compare_type_handler(),
|
|
compare_collation()),
|
|
left_item, right_item, cond_equal);
|
|
}
|
|
|
|
|
|
/**
|
|
Item_xxx::build_equal_items()
|
|
|
|
Replace all equality predicates in a condition referenced by "this"
|
|
by multiple equality items.
|
|
|
|
At each 'and' level the function detects items for equality predicates
|
|
and replaced them by a set of multiple equality items of class Item_equal,
|
|
taking into account inherited equalities from upper levels.
|
|
If an equality predicate is used not in a conjunction it's just
|
|
replaced by a multiple equality predicate.
|
|
For each 'and' level the function set a pointer to the inherited
|
|
multiple equalities in the cond_equal field of the associated
|
|
object of the type Item_cond_and.
|
|
The function also traverses the cond tree and and for each field reference
|
|
sets a pointer to the multiple equality item containing the field, if there
|
|
is any. If this multiple equality equates fields to a constant the
|
|
function replaces the field reference by the constant in the cases
|
|
when the field is not of a string type or when the field reference is
|
|
just an argument of a comparison predicate.
|
|
The function also determines the maximum number of members in
|
|
equality lists of each Item_cond_and object assigning it to
|
|
thd->lex->current_select->max_equal_elems.
|
|
|
|
@note
|
|
Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
|
|
f1=f2, .., fn-1=fn. It substitutes any inference from these
|
|
equality predicates that is equivalent to the conjunction.
|
|
Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
|
|
it is equivalent to ((a1=a2) AND (a2=a3)).
|
|
The function always makes a substitution of all equality predicates occurred
|
|
in a conjuction for a minimal set of multiple equality predicates.
|
|
This set can be considered as a canonical representation of the
|
|
sub-conjunction of the equality predicates.
|
|
E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
|
|
(=(t1.a,t2.b,t3.c) AND t2.b>5), not by
|
|
(=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
|
|
while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
|
|
(=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
|
|
but if additionally =(t4.d,t2.b) is inherited, it
|
|
will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
|
|
|
|
The function performs the substitution in a recursive descent by
|
|
the condtion tree, passing to the next AND level a chain of multiple
|
|
equality predicates which have been built at the upper levels.
|
|
The Item_equal items built at the level are attached to other
|
|
non-equality conjucts as a sublist. The pointer to the inherited
|
|
multiple equalities is saved in the and condition object (Item_cond_and).
|
|
This chain allows us for any field reference occurrence easily to find a
|
|
multiple equality that must be held for this occurrence.
|
|
For each AND level we do the following:
|
|
- scan it for all equality predicate (=) items
|
|
- join them into disjoint Item_equal() groups
|
|
- process the included OR conditions recursively to do the same for
|
|
lower AND levels.
|
|
|
|
We need to do things in this order as lower AND levels need to know about
|
|
all possible Item_equal objects in upper levels.
|
|
|
|
@param thd thread handle
|
|
@param inherited path to all inherited multiple equality items
|
|
|
|
@return
|
|
pointer to the transformed condition,
|
|
whose Used_tables_and_const_cache is up to date,
|
|
so no additional update_used_tables() is needed on the result.
|
|
*/
|
|
|
|
COND *Item_cond_and::build_equal_items(THD *thd,
|
|
COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
Item_equal *item_equal;
|
|
COND_EQUAL cond_equal;
|
|
cond_equal.upper_levels= inherited;
|
|
|
|
if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
|
|
return this; // Fatal error flag is set!
|
|
|
|
List<Item> eq_list;
|
|
List<Item> *cond_args= argument_list();
|
|
|
|
List_iterator<Item> li(*cond_args);
|
|
Item *item;
|
|
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
/*
|
|
Retrieve all conjuncts of this level detecting the equality
|
|
that are subject to substitution by multiple equality items and
|
|
removing each such predicate from the conjunction after having
|
|
found/created a multiple equality whose inference the predicate is.
|
|
*/
|
|
while ((item= li++))
|
|
{
|
|
/*
|
|
PS/SP note: we can safely remove a node from AND-OR
|
|
structure here because it's restored before each
|
|
re-execution of any prepared statement/stored procedure.
|
|
*/
|
|
if (item->check_equality(thd, &cond_equal, &eq_list))
|
|
li.remove();
|
|
}
|
|
|
|
/*
|
|
Check if we eliminated all the predicates of the level, e.g.
|
|
(a=a AND b=b AND a=a).
|
|
*/
|
|
if (!cond_args->elements &&
|
|
!cond_equal.current_level.elements &&
|
|
!eq_list.elements)
|
|
return (Item*) Item_true;
|
|
|
|
List_iterator_fast<Item_equal> it(cond_equal.current_level);
|
|
while ((item_equal= it++))
|
|
{
|
|
item_equal->set_link_equal_fields(link_item_fields);
|
|
item_equal->fix_fields(thd, NULL);
|
|
item_equal->update_used_tables();
|
|
set_if_bigger(thd->lex->current_select->max_equal_elems,
|
|
item_equal->n_field_items());
|
|
}
|
|
|
|
m_cond_equal.copy(cond_equal);
|
|
cond_equal.current_level= m_cond_equal.current_level;
|
|
inherited= &m_cond_equal;
|
|
|
|
/*
|
|
Make replacement of equality predicates for lower levels
|
|
of the condition expression.
|
|
*/
|
|
li.rewind();
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item;
|
|
if ((new_item= item->build_equal_items(thd, inherited, false, NULL))
|
|
!= item)
|
|
{
|
|
/* This replacement happens only for standalone equalities */
|
|
/*
|
|
This is ok with PS/SP as the replacement is done for
|
|
cond_args of an AND/OR item, which are restored for each
|
|
execution of PS/SP.
|
|
*/
|
|
li.replace(new_item);
|
|
}
|
|
}
|
|
cond_args->append(&eq_list);
|
|
cond_args->append((List<Item> *)&cond_equal.current_level);
|
|
update_used_tables();
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= &m_cond_equal;
|
|
return this;
|
|
}
|
|
|
|
|
|
COND *Item_cond::build_equal_items(THD *thd,
|
|
COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
List<Item> *cond_args= argument_list();
|
|
|
|
List_iterator<Item> li(*cond_args);
|
|
Item *item;
|
|
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
/*
|
|
Make replacement of equality predicates for lower levels
|
|
of the condition expression.
|
|
Update used_tables_cache and const_item_cache on the way.
|
|
*/
|
|
used_tables_and_const_cache_init();
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item;
|
|
if ((new_item= item->build_equal_items(thd, inherited, false, NULL))
|
|
!= item)
|
|
{
|
|
/* This replacement happens only for standalone equalities */
|
|
/*
|
|
This is ok with PS/SP as the replacement is done for
|
|
arguments of an AND/OR item, which are restored for each
|
|
execution of PS/SP.
|
|
*/
|
|
li.replace(new_item);
|
|
}
|
|
used_tables_and_const_cache_join(new_item);
|
|
}
|
|
return this;
|
|
}
|
|
|
|
|
|
COND *Item_func_eq::build_equal_items(THD *thd,
|
|
COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
COND_EQUAL cond_equal;
|
|
cond_equal.upper_levels= inherited;
|
|
List<Item> eq_list;
|
|
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
/*
|
|
If an equality predicate forms the whole and level,
|
|
we call it standalone equality and it's processed here.
|
|
E.g. in the following where condition
|
|
WHERE a=5 AND (b=5 or a=c)
|
|
(b=5) and (a=c) are standalone equalities.
|
|
In general we can't leave alone standalone eqalities:
|
|
for WHERE a=b AND c=d AND (b=c OR d=5)
|
|
b=c is replaced by =(a,b,c,d).
|
|
*/
|
|
if (Item_func_eq::check_equality(thd, &cond_equal, &eq_list))
|
|
{
|
|
Item_equal *item_equal;
|
|
int n= cond_equal.current_level.elements + eq_list.elements;
|
|
if (n == 0)
|
|
return (Item*) Item_true;
|
|
else if (n == 1)
|
|
{
|
|
if ((item_equal= cond_equal.current_level.pop()))
|
|
{
|
|
item_equal->fix_fields(thd, NULL);
|
|
item_equal->update_used_tables();
|
|
set_if_bigger(thd->lex->current_select->max_equal_elems,
|
|
item_equal->n_field_items());
|
|
item_equal->upper_levels= inherited;
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= new (thd->mem_root) COND_EQUAL(item_equal,
|
|
thd->mem_root);
|
|
return item_equal;
|
|
}
|
|
Item *res= eq_list.pop();
|
|
res->update_used_tables();
|
|
DBUG_ASSERT(res->type() == FUNC_ITEM);
|
|
return res;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Here a new AND level must be created. It can happen only
|
|
when a row equality is processed as a standalone predicate.
|
|
*/
|
|
Item_cond_and *and_cond= new (thd->mem_root) Item_cond_and(thd, eq_list);
|
|
and_cond->quick_fix_field();
|
|
List<Item> *cond_args= and_cond->argument_list();
|
|
List_iterator_fast<Item_equal> it(cond_equal.current_level);
|
|
while ((item_equal= it++))
|
|
{
|
|
if (item_equal->fix_length_and_dec(thd))
|
|
return NULL;
|
|
item_equal->update_used_tables();
|
|
set_if_bigger(thd->lex->current_select->max_equal_elems,
|
|
item_equal->n_field_items());
|
|
}
|
|
and_cond->m_cond_equal.copy(cond_equal);
|
|
cond_equal.current_level= and_cond->m_cond_equal.current_level;
|
|
cond_args->append((List<Item> *)&cond_equal.current_level);
|
|
and_cond->update_used_tables();
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= &and_cond->m_cond_equal;
|
|
return and_cond;
|
|
}
|
|
}
|
|
return Item_func::build_equal_items(thd, inherited, link_item_fields,
|
|
cond_equal_ref);
|
|
}
|
|
|
|
|
|
COND *Item_func::build_equal_items(THD *thd, COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
/*
|
|
For each field reference in cond, not from equal item predicates,
|
|
set a pointer to the multiple equality it belongs to (if there is any)
|
|
as soon the field is not of a string type or the field reference is
|
|
an argument of a comparison predicate.
|
|
*/
|
|
COND *cond= propagate_equal_fields(thd, Context_boolean(), inherited);
|
|
cond->update_used_tables();
|
|
DBUG_ASSERT(cond == this);
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
return cond;
|
|
}
|
|
|
|
|
|
COND *Item_equal::build_equal_items(THD *thd, COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
COND *cond= Item_func::build_equal_items(thd, inherited, link_item_fields,
|
|
cond_equal_ref);
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= new (thd->mem_root) COND_EQUAL(this, thd->mem_root);
|
|
return cond;
|
|
}
|
|
|
|
|
|
/**
|
|
Build multiple equalities for a condition and all on expressions that
|
|
inherit these multiple equalities.
|
|
|
|
The function first applies the cond->build_equal_items() method
|
|
to build all multiple equalities for condition cond utilizing equalities
|
|
referred through the parameter inherited. The extended set of
|
|
equalities is returned in the structure referred by the cond_equal_ref
|
|
parameter. After this the function calls itself recursively for
|
|
all on expressions whose direct references can be found in join_list
|
|
and who inherit directly the multiple equalities just having built.
|
|
|
|
@note
|
|
The on expression used in an outer join operation inherits all equalities
|
|
from the on expression of the embedding join, if there is any, or
|
|
otherwise - from the where condition.
|
|
This fact is not obvious, but presumably can be proved.
|
|
Consider the following query:
|
|
@code
|
|
SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
|
|
WHERE t1.a=t2.a;
|
|
@endcode
|
|
If the on expression in the query inherits =(t1.a,t2.a), then we
|
|
can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
|
|
the equality t3.a=t4.a. Although the on expression
|
|
t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
|
|
in the query the latter can be replaced by the former: the new query
|
|
will return the same result set as the original one.
|
|
|
|
Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
|
|
to use t1.a=t3.a AND t3.a=t4.a under the on condition:
|
|
@code
|
|
SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
This query equivalent to:
|
|
@code
|
|
SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
Similarly the original query can be rewritten to the query:
|
|
@code
|
|
SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
that is equivalent to:
|
|
@code
|
|
SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
Thus, applying equalities from the where condition we basically
|
|
can get more freedom in performing join operations.
|
|
Although we don't use this property now, it probably makes sense to use
|
|
it in the future.
|
|
@param thd Thread handler
|
|
@param cond condition to build the multiple equalities for
|
|
@param inherited path to all inherited multiple equality items
|
|
@param join_list list of join tables to which the condition
|
|
refers to
|
|
@ignore_on_conds TRUE <-> do not build multiple equalities
|
|
for on expressions
|
|
@param[out] cond_equal_ref pointer to the structure to place built
|
|
equalities in
|
|
@param link_equal_items equal fields are to be linked
|
|
|
|
@return
|
|
pointer to the transformed condition containing multiple equalities
|
|
*/
|
|
|
|
static COND *build_equal_items(JOIN *join, COND *cond,
|
|
COND_EQUAL *inherited,
|
|
List<TABLE_LIST> *join_list,
|
|
bool ignore_on_conds,
|
|
COND_EQUAL **cond_equal_ref,
|
|
bool link_equal_fields)
|
|
{
|
|
THD *thd= join->thd;
|
|
|
|
*cond_equal_ref= NULL;
|
|
|
|
if (cond)
|
|
{
|
|
cond= cond->build_equal_items(thd, inherited, link_equal_fields,
|
|
cond_equal_ref);
|
|
if (*cond_equal_ref)
|
|
{
|
|
(*cond_equal_ref)->upper_levels= inherited;
|
|
inherited= *cond_equal_ref;
|
|
}
|
|
}
|
|
|
|
if (join_list && !ignore_on_conds)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
while ((table= li++))
|
|
{
|
|
if (table->on_expr)
|
|
{
|
|
List<TABLE_LIST> *nested_join_list= table->nested_join ?
|
|
&table->nested_join->join_list : NULL;
|
|
/*
|
|
We can modify table->on_expr because its old value will
|
|
be restored before re-execution of PS/SP.
|
|
*/
|
|
table->on_expr= build_equal_items(join, table->on_expr, inherited,
|
|
nested_join_list, ignore_on_conds,
|
|
&table->cond_equal);
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
const char *table_name;
|
|
if (table->nested_join)
|
|
table_name= table->nested_join->join_list.head()->alias.str;
|
|
else
|
|
table_name= table->alias.str;
|
|
trace_condition(join->thd, "ON expr", "build_equal_items",
|
|
table->on_expr, table_name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return cond;
|
|
}
|
|
|
|
|
|
/**
|
|
Compare field items by table order in the execution plan.
|
|
|
|
If field1 and field2 belong to different tables then
|
|
field1 considered as better than field2 if the table containing
|
|
field1 is accessed earlier than the table containing field2.
|
|
The function finds out what of two fields is better according
|
|
this criteria.
|
|
If field1 and field2 belong to the same table then the result
|
|
of comparison depends on whether the fields are parts of
|
|
the key that are used to access this table.
|
|
|
|
@param field1 first field item to compare
|
|
@param field2 second field item to compare
|
|
@param table_join_idx index to tables determining table order
|
|
|
|
@retval
|
|
1 if field1 is better than field2
|
|
@retval
|
|
-1 if field2 is better than field1
|
|
@retval
|
|
0 otherwise
|
|
*/
|
|
|
|
static int compare_fields_by_table_order(Item *field1,
|
|
Item *field2,
|
|
void *table_join_idx)
|
|
{
|
|
int cmp= 0;
|
|
bool outer_ref= 0;
|
|
Item *field1_real= field1->real_item();
|
|
Item *field2_real= field2->real_item();
|
|
|
|
if (field1->const_item() || field1_real->const_item())
|
|
return -1;
|
|
if (field2->const_item() || field2_real->const_item())
|
|
return 1;
|
|
Item_field *f1= (Item_field *) field1_real;
|
|
Item_field *f2= (Item_field *) field2_real;
|
|
if (f1->used_tables() & OUTER_REF_TABLE_BIT)
|
|
{
|
|
outer_ref= 1;
|
|
cmp= -1;
|
|
}
|
|
if (f2->used_tables() & OUTER_REF_TABLE_BIT)
|
|
{
|
|
outer_ref= 1;
|
|
cmp++;
|
|
}
|
|
if (outer_ref)
|
|
return cmp;
|
|
JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
|
|
|
|
JOIN_TAB *tab1= idx[f1->field->table->tablenr];
|
|
JOIN_TAB *tab2= idx[f2->field->table->tablenr];
|
|
|
|
/*
|
|
if one of the table is inside a merged SJM nest and another one isn't,
|
|
compare SJM bush roots of the tables.
|
|
*/
|
|
if (tab1->bush_root_tab != tab2->bush_root_tab)
|
|
{
|
|
if (tab1->bush_root_tab)
|
|
tab1= tab1->bush_root_tab;
|
|
|
|
if (tab2->bush_root_tab)
|
|
tab2= tab2->bush_root_tab;
|
|
}
|
|
|
|
cmp= (int)(tab1 - tab2);
|
|
|
|
if (!cmp)
|
|
{
|
|
/* Fields f1, f2 belong to the same table */
|
|
|
|
JOIN_TAB *tab= idx[f1->field->table->tablenr];
|
|
uint keyno= MAX_KEY;
|
|
if (tab->ref.key_parts)
|
|
keyno= tab->ref.key;
|
|
else if (tab->select && tab->select->quick)
|
|
keyno = tab->select->quick->index;
|
|
if (keyno != MAX_KEY)
|
|
{
|
|
if (f1->field->part_of_key.is_set(keyno))
|
|
cmp= -1;
|
|
if (f2->field->part_of_key.is_set(keyno))
|
|
cmp++;
|
|
/*
|
|
Here:
|
|
if both f1, f2 are components of the key tab->ref.key then cmp==0,
|
|
if only f1 is a component of the key then cmp==-1 (f1 is better),
|
|
if only f2 is a component of the key then cmp==1, (f2 is better),
|
|
if none of f1,f1 is component of the key cmp==0.
|
|
*/
|
|
if (!cmp)
|
|
{
|
|
KEY *key_info= tab->table->key_info + keyno;
|
|
for (uint i= 0; i < key_info->user_defined_key_parts; i++)
|
|
{
|
|
Field *fld= key_info->key_part[i].field;
|
|
if (fld->eq(f1->field))
|
|
{
|
|
cmp= -1; // f1 is better
|
|
break;
|
|
}
|
|
if (fld->eq(f2->field))
|
|
{
|
|
cmp= 1; // f2 is better
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!cmp)
|
|
cmp= f1->field->field_index-f2->field->field_index;
|
|
}
|
|
return cmp < 0 ? -1 : (cmp ? 1 : 0);
|
|
}
|
|
|
|
|
|
static TABLE_LIST* embedding_sjm(Item *item)
|
|
{
|
|
Item_field *item_field= (Item_field *) (item->real_item());
|
|
TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
|
|
if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
|
|
return nest;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
Generate minimal set of simple equalities equivalent to a multiple equality.
|
|
|
|
The function retrieves the fields of the multiple equality item
|
|
item_equal and for each field f:
|
|
- if item_equal contains const it generates the equality f=const_item;
|
|
- otherwise, if f is not the first field, generates the equality
|
|
f=item_equal->get_first().
|
|
All generated equality are added to the cond conjunction.
|
|
|
|
@param cond condition to add the generated equality to
|
|
@param upper_levels structure to access multiple equality of upper levels
|
|
@param item_equal multiple equality to generate simple equality from
|
|
|
|
@note
|
|
Before generating an equality function checks that it has not
|
|
been generated for multiple equalities of the upper levels.
|
|
E.g. for the following where condition
|
|
WHERE a=5 AND ((a=b AND b=c) OR c>4)
|
|
the upper level AND condition will contain =(5,a),
|
|
while the lower level AND condition will contain =(5,a,b,c).
|
|
When splitting =(5,a,b,c) into a separate equality predicates
|
|
we should omit 5=a, as we have it already in the upper level.
|
|
The following where condition gives us a more complicated case:
|
|
WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
|
|
Given the tables are accessed in the order t1->t2->t3->t4 for
|
|
the selected query execution plan the lower level multiple
|
|
equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to
|
|
t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
|
|
generated for the upper level. Also t3.c=t4.d will be generated there.
|
|
So only t1.a=t3.c should be left in the lower level.
|
|
If cond is equal to 0, then not more then one equality is generated
|
|
and a pointer to it is returned as the result of the function.
|
|
|
|
Equality substutution and semi-join materialization nests:
|
|
|
|
In case join order looks like this:
|
|
|
|
outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3
|
|
|
|
We must not construct equalities like
|
|
|
|
outer_tbl1.col = inner_tbl1.col
|
|
|
|
because they would get attached to inner_tbl1 and will get evaluated
|
|
during materialization phase, when we don't have current value of
|
|
outer_tbl1.col.
|
|
|
|
Item_equal::get_first() also takes similar measures for dealing with
|
|
equality substitution in presense of SJM nests.
|
|
|
|
Grep for EqualityPropagationAndSjmNests for a more verbose description.
|
|
|
|
@return
|
|
- The condition with generated simple equalities or
|
|
a pointer to the simple generated equality, if success.
|
|
- 0, otherwise.
|
|
*/
|
|
|
|
Item *eliminate_item_equal(THD *thd, COND *cond, COND_EQUAL *upper_levels,
|
|
Item_equal *item_equal)
|
|
{
|
|
List<Item> eq_list;
|
|
Item_func_eq *eq_item= 0;
|
|
if (((Item *) item_equal)->const_item() && !item_equal->val_int())
|
|
return (Item*) Item_false;
|
|
Item *item_const= item_equal->get_const();
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
Item *head;
|
|
TABLE_LIST *current_sjm= NULL;
|
|
Item *current_sjm_head= NULL;
|
|
|
|
DBUG_ASSERT(!cond ||
|
|
cond->is_bool_literal() ||
|
|
(cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) cond)->functype() == Item_func::EQ_FUNC) ||
|
|
(cond->type() == Item::COND_ITEM &&
|
|
((Item_func *) cond)->functype() == Item_func::COND_AND_FUNC));
|
|
|
|
/*
|
|
Pick the "head" item: the constant one or the first in the join order
|
|
(if the first in the join order happends to be inside an SJM nest, that's
|
|
ok, because this is where the value will be unpacked after
|
|
materialization).
|
|
*/
|
|
if (item_const)
|
|
head= item_const;
|
|
else
|
|
{
|
|
TABLE_LIST *emb_nest;
|
|
head= item_equal->get_first(NO_PARTICULAR_TAB, NULL);
|
|
it++;
|
|
if ((emb_nest= embedding_sjm(head)))
|
|
{
|
|
current_sjm= emb_nest;
|
|
current_sjm_head= head;
|
|
}
|
|
}
|
|
|
|
Item *field_item;
|
|
/*
|
|
For each other item, generate "item=head" equality (except the tables that
|
|
are within SJ-Materialization nests, for those "head" is defined
|
|
differently)
|
|
*/
|
|
while ((field_item= it++))
|
|
{
|
|
Item_equal *upper= field_item->find_item_equal(upper_levels);
|
|
Item *item= field_item;
|
|
TABLE_LIST *field_sjm= embedding_sjm(field_item);
|
|
if (!field_sjm)
|
|
{
|
|
current_sjm= NULL;
|
|
current_sjm_head= NULL;
|
|
}
|
|
|
|
/*
|
|
Check if "field_item=head" equality is already guaranteed to be true
|
|
on upper AND-levels.
|
|
*/
|
|
if (upper)
|
|
{
|
|
TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field);
|
|
Item *upper_const= upper->get_const();
|
|
if (item_const && upper_const)
|
|
{
|
|
/*
|
|
Upper item also has "field_item=const".
|
|
Don't produce equality if const is equal to item_const.
|
|
*/
|
|
Item_func_eq *func= new (thd->mem_root) Item_func_eq(thd, item_const, upper_const);
|
|
func->set_cmp_func(thd);
|
|
func->quick_fix_field();
|
|
if (func->val_int())
|
|
item= 0;
|
|
}
|
|
else
|
|
{
|
|
Item_equal_fields_iterator li(*item_equal);
|
|
while ((item= li++) != field_item)
|
|
{
|
|
if (embedding_sjm(item) == field_sjm &&
|
|
item->find_item_equal(upper_levels) == upper)
|
|
break;
|
|
}
|
|
}
|
|
if (embedding_sjm(field_item) != native_sjm)
|
|
item= NULL; /* Don't produce equality */
|
|
}
|
|
|
|
bool produce_equality= MY_TEST(item == field_item);
|
|
if (!item_const && field_sjm && field_sjm != current_sjm)
|
|
{
|
|
/* Entering an SJM nest */
|
|
current_sjm_head= field_item;
|
|
if (!field_sjm->sj_mat_info->is_sj_scan)
|
|
produce_equality= FALSE;
|
|
}
|
|
|
|
if (produce_equality)
|
|
{
|
|
if (eq_item && eq_list.push_back(eq_item, thd->mem_root))
|
|
return 0;
|
|
|
|
/*
|
|
If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality
|
|
doesn't include a constant, we should produce equality with the first
|
|
of the equal items in this SJM (except for the first element inside the
|
|
SJM. For that, we produce the equality with the "head" item).
|
|
|
|
In other cases, get the "head" item, which is either first of the
|
|
equals on top level, or the constant.
|
|
*/
|
|
Item *head_item= (!item_const && current_sjm &&
|
|
current_sjm_head != field_item) ? current_sjm_head: head;
|
|
eq_item= new (thd->mem_root) Item_func_eq(thd,
|
|
field_item->remove_item_direct_ref(),
|
|
head_item->remove_item_direct_ref());
|
|
|
|
if (!eq_item || eq_item->set_cmp_func(thd))
|
|
return 0;
|
|
eq_item->eval_not_null_tables(0);
|
|
eq_item->quick_fix_field();
|
|
}
|
|
current_sjm= field_sjm;
|
|
}
|
|
|
|
/*
|
|
We have produced zero, one, or more pair-wise equalities eq_i. We want to
|
|
return an expression in form:
|
|
|
|
cond AND eq_1 AND eq_2 AND eq_3 AND ...
|
|
|
|
'cond' is a parameter for this function, which may be NULL, an Item_bool(1),
|
|
or an Item_func_eq or an Item_cond_and.
|
|
|
|
We want to return a well-formed condition: no nested Item_cond_and objects,
|
|
or Item_cond_and with a single child:
|
|
- if 'cond' is an Item_cond_and, we add eq_i as its tail
|
|
- if 'cond' is Item_bool(1), we return eq_i
|
|
- otherwise, we create our own Item_cond_and and put 'cond' at the front of
|
|
it.
|
|
- if we have only one condition to return, we don't create an Item_cond_and
|
|
*/
|
|
|
|
if (eq_item && eq_list.push_back(eq_item, thd->mem_root))
|
|
return 0;
|
|
COND *res= 0;
|
|
switch (eq_list.elements)
|
|
{
|
|
case 0:
|
|
res= cond ? cond : (Item*) Item_true;
|
|
break;
|
|
case 1:
|
|
if (!cond || cond->is_bool_literal())
|
|
res= eq_item;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (!res)
|
|
{
|
|
if (cond)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
res= cond;
|
|
((Item_cond *) res)->add_at_end(&eq_list);
|
|
}
|
|
else if (eq_list.push_front(cond, thd->mem_root))
|
|
return 0;
|
|
}
|
|
}
|
|
if (!res)
|
|
res= new (thd->mem_root) Item_cond_and(thd, eq_list);
|
|
if (res)
|
|
{
|
|
res->quick_fix_field();
|
|
res->update_used_tables();
|
|
res->eval_not_null_tables(0);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
Substitute every field reference in a condition by the best equal field
|
|
and eliminate all multiple equality predicates.
|
|
|
|
The function retrieves the cond condition and for each encountered
|
|
multiple equality predicate it sorts the field references in it
|
|
according to the order of tables specified by the table_join_idx
|
|
parameter. Then it eliminates the multiple equality predicate it
|
|
replacing it by the conjunction of simple equality predicates
|
|
equating every field from the multiple equality to the first
|
|
field in it, or to the constant, if there is any.
|
|
After this the function retrieves all other conjuncted
|
|
predicates substitute every field reference by the field reference
|
|
to the first equal field or equal constant if there are any.
|
|
|
|
@param context_tab Join tab that 'cond' will be attached to, or
|
|
NO_PARTICULAR_TAB. See notes above.
|
|
@param cond condition to process
|
|
@param cond_equal multiple equalities to take into consideration
|
|
@param table_join_idx index to tables determining field preference
|
|
@param do_substitution if false: do not do any field substitution
|
|
|
|
@note
|
|
At the first glance full sort of fields in multiple equality
|
|
seems to be an overkill. Yet it's not the case due to possible
|
|
new fields in multiple equality item of lower levels. We want
|
|
the order in them to comply with the order of upper levels.
|
|
|
|
context_tab may be used to specify which join tab `cond` will be
|
|
attached to. There are two possible cases:
|
|
|
|
1. context_tab != NO_PARTICULAR_TAB
|
|
We're doing substitution for an Item which will be evaluated in the
|
|
context of a particular item. For example, if the optimizer does a
|
|
ref access on "tbl1.key= expr" then
|
|
= equality substitution will be perfomed on 'expr'
|
|
= it is known in advance that 'expr' will be evaluated when
|
|
table t1 is accessed.
|
|
Note that in this kind of substution we never have to replace Item_equal
|
|
objects. For example, for
|
|
|
|
t.key= func(col1=col2 AND col2=const)
|
|
|
|
we will not build Item_equal or do equality substution (if we decide to,
|
|
this function will need to be fixed to handle it)
|
|
|
|
2. context_tab == NO_PARTICULAR_TAB
|
|
We're doing substitution in WHERE/ON condition, which is not yet
|
|
attached to any particular join_tab. We will use information about the
|
|
chosen join order to make "optimal" substitions, i.e. those that allow
|
|
to apply filtering as soon as possible. See eliminate_item_equal() and
|
|
Item_equal::get_first() for details.
|
|
|
|
@return
|
|
The transformed condition, or NULL in case of error
|
|
*/
|
|
|
|
static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab,
|
|
COND *cond,
|
|
COND_EQUAL *cond_equal,
|
|
void *table_join_idx,
|
|
bool do_substitution)
|
|
{
|
|
Item_equal *item_equal;
|
|
COND *org_cond= cond; // Return this in case of fatal error
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
|
|
|
|
bool and_level= ((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC;
|
|
if (and_level)
|
|
{
|
|
cond_equal= &((Item_cond_and *) cond)->m_cond_equal;
|
|
cond_list->disjoin((List<Item> *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */
|
|
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
while ((item_equal= it++))
|
|
{
|
|
item_equal->sort(&compare_fields_by_table_order, table_join_idx);
|
|
}
|
|
}
|
|
|
|
List_iterator<Item> li(*cond_list);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item= substitute_for_best_equal_field(thd, context_tab,
|
|
item, cond_equal,
|
|
table_join_idx,
|
|
do_substitution);
|
|
/*
|
|
This works OK with PS/SP re-execution as changes are made to
|
|
the arguments of AND/OR items only
|
|
*/
|
|
if (new_item && new_item != item)
|
|
li.replace(new_item);
|
|
}
|
|
|
|
if (and_level)
|
|
{
|
|
COND *eq_cond= 0;
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
bool false_eq_cond= FALSE;
|
|
bool all_deleted= true;
|
|
while ((item_equal= it++))
|
|
{
|
|
if (item_equal->get_extraction_flag() == MARKER_DELETION)
|
|
continue;
|
|
all_deleted= false;
|
|
eq_cond= eliminate_item_equal(thd, eq_cond, cond_equal->upper_levels,
|
|
item_equal);
|
|
if (!eq_cond)
|
|
{
|
|
eq_cond= 0;
|
|
break;
|
|
}
|
|
else if (eq_cond->is_bool_literal() && !eq_cond->val_bool())
|
|
{
|
|
/*
|
|
This occurs when eliminate_item_equal() founds that cond is
|
|
always false and substitutes it with Item_int 0.
|
|
Due to this, value of item_equal will be 0, so just return it.
|
|
*/
|
|
cond= eq_cond;
|
|
false_eq_cond= TRUE;
|
|
break;
|
|
}
|
|
}
|
|
if (eq_cond && !false_eq_cond)
|
|
{
|
|
/* Insert the generated equalities before all other conditions */
|
|
if (eq_cond->type() == Item::COND_ITEM)
|
|
((Item_cond *) cond)->add_at_head(
|
|
((Item_cond *) eq_cond)->argument_list());
|
|
else
|
|
{
|
|
if (cond_list->is_empty())
|
|
cond= eq_cond;
|
|
else
|
|
{
|
|
/* Do not add an equality condition if it's always true */
|
|
if (!eq_cond->is_bool_literal() &&
|
|
cond_list->push_front(eq_cond, thd->mem_root))
|
|
eq_cond= 0;
|
|
}
|
|
}
|
|
}
|
|
if (!eq_cond && !all_deleted)
|
|
{
|
|
/*
|
|
We are out of memory doing the transformation.
|
|
This is a fatal error now. However we bail out by returning the
|
|
original condition that we had before we started the transformation.
|
|
*/
|
|
cond_list->append((List<Item> *) &cond_equal->current_level);
|
|
}
|
|
}
|
|
}
|
|
else if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
item_equal= (Item_equal *) cond;
|
|
item_equal->sort(&compare_fields_by_table_order, table_join_idx);
|
|
cond_equal= item_equal->upper_levels;
|
|
if (cond_equal && cond_equal->current_level.head() == item_equal)
|
|
cond_equal= cond_equal->upper_levels;
|
|
if (item_equal->get_extraction_flag() == MARKER_DELETION)
|
|
return 0;
|
|
cond= eliminate_item_equal(thd, 0, cond_equal, item_equal);
|
|
return cond ? cond : org_cond;
|
|
}
|
|
else if (do_substitution)
|
|
{
|
|
while (cond_equal)
|
|
{
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
while((item_equal= it++))
|
|
{
|
|
REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab};
|
|
if (!(cond= cond->transform(thd, &Item::replace_equal_field,
|
|
(uchar *) &arg)))
|
|
return 0;
|
|
}
|
|
cond_equal= cond_equal->upper_levels;
|
|
}
|
|
}
|
|
return cond;
|
|
}
|
|
|
|
|
|
/**
|
|
Check appearance of new constant items in multiple equalities
|
|
of a condition after reading a constant table.
|
|
|
|
The function retrieves the cond condition and for each encountered
|
|
multiple equality checks whether new constants have appeared after
|
|
reading the constant (single row) table tab. If so it adjusts
|
|
the multiple equality appropriately.
|
|
|
|
@param cond condition whose multiple equalities are to be checked
|
|
@param table constant table that has been read
|
|
@param const_key mark key parts as constant
|
|
*/
|
|
|
|
static void update_const_equal_items(THD *thd, COND *cond, JOIN_TAB *tab,
|
|
bool const_key)
|
|
{
|
|
if (!(cond->used_tables() & tab->table->map))
|
|
return;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
|
|
List_iterator_fast<Item> li(*cond_list);
|
|
Item *item;
|
|
while ((item= li++))
|
|
update_const_equal_items(thd, item, tab,
|
|
cond->is_top_level_item() &&
|
|
((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC);
|
|
}
|
|
else if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
Item_equal *item_equal= (Item_equal *) cond;
|
|
bool contained_const= item_equal->get_const() != NULL;
|
|
item_equal->update_const(thd);
|
|
if (!contained_const && item_equal->get_const())
|
|
{
|
|
/* Update keys for range analysis */
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
while (it++)
|
|
{
|
|
Field *field= it.get_curr_field();
|
|
JOIN_TAB *stat= field->table->reginfo.join_tab;
|
|
key_map possible_keys= field->key_start;
|
|
possible_keys.intersect(field->table->keys_in_use_for_query);
|
|
stat[0].const_keys.merge(possible_keys);
|
|
|
|
/*
|
|
For each field in the multiple equality (for which we know that it
|
|
is a constant) we have to find its corresponding key part, and set
|
|
that key part in const_key_parts.
|
|
*/
|
|
if (!possible_keys.is_clear_all())
|
|
{
|
|
TABLE *field_tab= field->table;
|
|
KEYUSE *use;
|
|
for (use= stat->keyuse; use && use->table == field_tab; use++)
|
|
if (const_key &&
|
|
!use->is_for_hash_join() && possible_keys.is_set(use->key) &&
|
|
field_tab->key_info[use->key].key_part[use->keypart].field ==
|
|
field)
|
|
field_tab->const_key_parts[use->key]|= use->keypart_map;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Check if
|
|
WHERE expr=value AND expr=const
|
|
can be rewritten as:
|
|
WHERE const=value AND expr=const
|
|
|
|
@param target - the target operator whose "expr" argument will be
|
|
replaced to "const".
|
|
@param target_expr - the target's "expr" which will be replaced to "const".
|
|
@param target_value - the target's second argument, it will remain unchanged.
|
|
@param source - the equality expression ("=" or "<=>") that
|
|
can be used to rewrite the "target" part
|
|
(under certain conditions, see the code).
|
|
@param source_expr - the source's "expr". It should be exactly equal to
|
|
the target's "expr" to make condition rewrite possible.
|
|
@param source_const - the source's "const" argument, it will be inserted
|
|
into "target" instead of "expr".
|
|
*/
|
|
static bool
|
|
can_change_cond_ref_to_const(Item_bool_func2 *target,
|
|
Item *target_expr, Item *target_value,
|
|
Item_bool_func2 *source,
|
|
Item *source_expr, Item *source_const)
|
|
{
|
|
return target_expr->eq(source_expr,0) &&
|
|
target_value != source_const &&
|
|
target->compare_type_handler()->
|
|
can_change_cond_ref_to_const(target, target_expr, target_value,
|
|
source, source_expr, source_const);
|
|
}
|
|
|
|
|
|
/*
|
|
change field = field to field = const for each found field = const in the
|
|
and_level
|
|
*/
|
|
|
|
static void
|
|
change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
|
|
Item *and_father, Item *cond,
|
|
Item_bool_func2 *field_value_owner,
|
|
Item *field, Item *value)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= ((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item,
|
|
field_value_owner, field, value);
|
|
return;
|
|
}
|
|
if (cond->eq_cmp_result() == Item::COND_OK)
|
|
return; // Not a boolean function
|
|
|
|
Item_bool_func2 *func= (Item_bool_func2*) cond;
|
|
Item **args= func->arguments();
|
|
Item *left_item= args[0];
|
|
Item *right_item= args[1];
|
|
Item_func::Functype functype= func->functype();
|
|
|
|
if (can_change_cond_ref_to_const(func, right_item, left_item,
|
|
field_value_owner, field, value))
|
|
{
|
|
Item *tmp=value->clone_item(thd);
|
|
if (tmp)
|
|
{
|
|
tmp->collation.set(right_item->collation);
|
|
thd->change_item_tree(args + 1, tmp);
|
|
func->update_used_tables();
|
|
if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
|
|
&& and_father != cond && !left_item->const_item())
|
|
{
|
|
cond->marker= MARKER_CHANGE_COND;
|
|
COND_CMP *tmp2;
|
|
/* Will work, even if malloc would fail */
|
|
if ((tmp2= new (thd->mem_root) COND_CMP(and_father, func)))
|
|
save_list->push_back(tmp2);
|
|
}
|
|
/*
|
|
LIKE can be optimized for BINARY/VARBINARY/BLOB columns, e.g.:
|
|
|
|
from: WHERE CONCAT(c1)='const1' AND CONCAT(c1) LIKE 'const2'
|
|
to: WHERE CONCAT(c1)='const1' AND 'const1' LIKE 'const2'
|
|
|
|
So make sure to use set_cmp_func() only for non-LIKE operators.
|
|
*/
|
|
if (functype != Item_func::LIKE_FUNC)
|
|
((Item_bool_rowready_func2*) func)->set_cmp_func(thd);
|
|
}
|
|
}
|
|
else if (can_change_cond_ref_to_const(func, left_item, right_item,
|
|
field_value_owner, field, value))
|
|
{
|
|
Item *tmp= value->clone_item(thd);
|
|
if (tmp)
|
|
{
|
|
tmp->collation.set(left_item->collation);
|
|
thd->change_item_tree(args, tmp);
|
|
value= tmp;
|
|
func->update_used_tables();
|
|
if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
|
|
&& and_father != cond && !right_item->const_item())
|
|
{
|
|
args[0]= args[1]; // For easy check
|
|
thd->change_item_tree(args + 1, value);
|
|
cond->marker= MARKER_CHANGE_COND;
|
|
COND_CMP *tmp2;
|
|
/* Will work, even if malloc would fail */
|
|
if ((tmp2=new (thd->mem_root) COND_CMP(and_father, func)))
|
|
save_list->push_back(tmp2);
|
|
}
|
|
if (functype != Item_func::LIKE_FUNC)
|
|
((Item_bool_rowready_func2*) func)->set_cmp_func(thd);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
|
|
COND *and_father, COND *cond)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= ((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC;
|
|
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
I_List<COND_CMP> save;
|
|
while ((item=li++))
|
|
{
|
|
propagate_cond_constants(thd, &save,and_level ? cond : item, item);
|
|
}
|
|
if (and_level)
|
|
{ // Handle other found items
|
|
I_List_iterator<COND_CMP> cond_itr(save);
|
|
COND_CMP *cond_cmp;
|
|
while ((cond_cmp=cond_itr++))
|
|
{
|
|
Item **args= cond_cmp->cmp_func->arguments();
|
|
if (!args[0]->const_item())
|
|
change_cond_ref_to_const(thd, &save,cond_cmp->and_level,
|
|
cond_cmp->and_level,
|
|
cond_cmp->cmp_func, args[0], args[1]);
|
|
}
|
|
}
|
|
}
|
|
else if (and_father != cond && cond->marker == MARKER_UNUSED) // In a AND group
|
|
{
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
(((Item_func*) cond)->functype() == Item_func::EQ_FUNC ||
|
|
((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC))
|
|
{
|
|
Item_bool_func2 *func= dynamic_cast<Item_bool_func2*>(cond);
|
|
Item **args= func->arguments();
|
|
bool left_const= args[0]->can_eval_in_optimize();
|
|
bool right_const= args[1]->can_eval_in_optimize();
|
|
if (!(left_const && right_const) &&
|
|
args[0]->cmp_type() == args[1]->cmp_type())
|
|
{
|
|
if (right_const)
|
|
{
|
|
resolve_const_item(thd, &args[1], args[0]);
|
|
func->update_used_tables();
|
|
change_cond_ref_to_const(thd, save_list, and_father, and_father,
|
|
func, args[0], args[1]);
|
|
}
|
|
else if (left_const)
|
|
{
|
|
resolve_const_item(thd, &args[0], args[1]);
|
|
func->update_used_tables();
|
|
change_cond_ref_to_const(thd, save_list, and_father, and_father,
|
|
func, args[1], args[0]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
Simplify joins replacing outer joins by inner joins whenever it's
|
|
possible.
|
|
|
|
The function, during a retrieval of join_list, eliminates those
|
|
outer joins that can be converted into inner join, possibly nested.
|
|
It also moves the on expressions for the converted outer joins
|
|
and from inner joins to conds.
|
|
The function also calculates some attributes for nested joins:
|
|
- used_tables
|
|
- not_null_tables
|
|
- dep_tables.
|
|
- on_expr_dep_tables
|
|
The first two attributes are used to test whether an outer join can
|
|
be substituted for an inner join. The third attribute represents the
|
|
relation 'to be dependent on' for tables. If table t2 is dependent
|
|
on table t1, then in any evaluated execution plan table access to
|
|
table t2 must precede access to table t2. This relation is used also
|
|
to check whether the query contains invalid cross-references.
|
|
The forth attribute is an auxiliary one and is used to calculate
|
|
dep_tables.
|
|
As the attribute dep_tables qualifies possibles orders of tables in the
|
|
execution plan, the dependencies required by the straight join
|
|
modifiers are reflected in this attribute as well.
|
|
The function also removes all braces that can be removed from the join
|
|
expression without changing its meaning.
|
|
|
|
@note
|
|
An outer join can be replaced by an inner join if the where condition
|
|
or the on expression for an embedding nested join contains a conjunctive
|
|
predicate rejecting null values for some attribute of the inner tables.
|
|
|
|
E.g. in the query:
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
|
|
@endcode
|
|
the predicate t2.b < 5 rejects nulls.
|
|
The query is converted first to:
|
|
@code
|
|
SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
|
|
@endcode
|
|
then to the equivalent form:
|
|
@code
|
|
SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
|
|
@endcode
|
|
|
|
|
|
Similarly the following query:
|
|
@code
|
|
SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
|
|
WHERE t2.c < 5
|
|
@endcode
|
|
is converted to:
|
|
@code
|
|
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
|
|
|
|
@endcode
|
|
|
|
One conversion might trigger another:
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
|
|
LEFT JOIN t3 ON t3.b=t2.b
|
|
WHERE t3 IS NOT NULL =>
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
|
|
WHERE t3 IS NOT NULL AND t3.b=t2.b =>
|
|
SELECT * FROM t1, t2, t3
|
|
WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
|
|
@endcode
|
|
|
|
The function removes all unnecessary braces from the expression
|
|
produced by the conversions.
|
|
E.g.
|
|
@code
|
|
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
|
|
@endcode
|
|
finally is converted to:
|
|
@code
|
|
SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
|
|
|
|
@endcode
|
|
|
|
|
|
It also will remove braces from the following queries:
|
|
@code
|
|
SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
|
|
SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
|
|
@endcode
|
|
|
|
The benefit of this simplification procedure is that it might return
|
|
a query for which the optimizer can evaluate execution plan with more
|
|
join orders. With a left join operation the optimizer does not
|
|
consider any plan where one of the inner tables is before some of outer
|
|
tables.
|
|
|
|
IMPLEMENTATION
|
|
The function is implemented by a recursive procedure. On the recursive
|
|
ascent all attributes are calculated, all outer joins that can be
|
|
converted are replaced and then all unnecessary braces are removed.
|
|
As join list contains join tables in the reverse order sequential
|
|
elimination of outer joins does not require extra recursive calls.
|
|
|
|
SEMI-JOIN NOTES
|
|
Remove all semi-joins that have are within another semi-join (i.e. have
|
|
an "ancestor" semi-join nest)
|
|
|
|
EXAMPLES
|
|
Here is an example of a join query with invalid cross references:
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
|
|
@endcode
|
|
|
|
@param join reference to the query info
|
|
@param join_list list representation of the join to be converted
|
|
@param conds conditions to add on expressions for converted joins
|
|
@param top true <=> conds is the where condition
|
|
@param in_sj TRUE <=> processing semi-join nest's children
|
|
@return
|
|
- The new condition, if success
|
|
- 0, otherwise
|
|
*/
|
|
|
|
static COND *
|
|
simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
|
|
bool in_sj)
|
|
{
|
|
TABLE_LIST *table;
|
|
NESTED_JOIN *nested_join;
|
|
TABLE_LIST *prev_table= 0;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
|
|
DBUG_ENTER("simplify_joins");
|
|
|
|
/*
|
|
Try to simplify join operations from join_list.
|
|
The most outer join operation is checked for conversion first.
|
|
*/
|
|
while ((table= li++))
|
|
{
|
|
table_map used_tables;
|
|
table_map not_null_tables= (table_map) 0;
|
|
|
|
if ((nested_join= table->nested_join))
|
|
{
|
|
/*
|
|
If the element of join_list is a nested join apply
|
|
the procedure to its nested join list first.
|
|
*/
|
|
if (table->on_expr)
|
|
{
|
|
Item *expr= table->on_expr;
|
|
/*
|
|
If an on expression E is attached to the table,
|
|
check all null rejected predicates in this expression.
|
|
If such a predicate over an attribute belonging to
|
|
an inner table of an embedded outer join is found,
|
|
the outer join is converted to an inner join and
|
|
the corresponding on expression is added to E.
|
|
*/
|
|
expr= simplify_joins(join, &nested_join->join_list,
|
|
expr, FALSE, in_sj || table->sj_on_expr);
|
|
|
|
if (!table->prep_on_expr || expr != table->on_expr)
|
|
{
|
|
DBUG_ASSERT(expr);
|
|
|
|
table->on_expr= expr;
|
|
table->prep_on_expr= expr->copy_andor_structure(join->thd);
|
|
}
|
|
}
|
|
nested_join->used_tables= (table_map) 0;
|
|
nested_join->not_null_tables=(table_map) 0;
|
|
conds= simplify_joins(join, &nested_join->join_list, conds, top,
|
|
in_sj || table->sj_on_expr);
|
|
used_tables= nested_join->used_tables;
|
|
not_null_tables= nested_join->not_null_tables;
|
|
/* The following two might become unequal after table elimination: */
|
|
nested_join->n_tables= nested_join->join_list.elements;
|
|
}
|
|
else
|
|
{
|
|
if (!table->prep_on_expr)
|
|
table->prep_on_expr= table->on_expr;
|
|
used_tables= table->get_map();
|
|
if (conds)
|
|
not_null_tables= conds->not_null_tables();
|
|
}
|
|
|
|
if (table->embedding)
|
|
{
|
|
table->embedding->nested_join->used_tables|= used_tables;
|
|
table->embedding->nested_join->not_null_tables|= not_null_tables;
|
|
}
|
|
|
|
if (!(table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) ||
|
|
(used_tables & not_null_tables))
|
|
{
|
|
/*
|
|
For some of the inner tables there are conjunctive predicates
|
|
that reject nulls => the outer join can be replaced by an inner join.
|
|
*/
|
|
if (table->outer_join && !table->embedding && table->table)
|
|
table->table->maybe_null= FALSE;
|
|
table->outer_join= 0;
|
|
if (!(straight_join || table->straight))
|
|
{
|
|
table->dep_tables= 0;
|
|
TABLE_LIST *embedding= table->embedding;
|
|
while (embedding)
|
|
{
|
|
if (embedding->nested_join->join_list.head()->outer_join)
|
|
{
|
|
if (!embedding->sj_subq_pred)
|
|
table->dep_tables= embedding->dep_tables;
|
|
break;
|
|
}
|
|
embedding= embedding->embedding;
|
|
}
|
|
}
|
|
if (table->on_expr)
|
|
{
|
|
/* Add ON expression to the WHERE or upper-level ON condition. */
|
|
if (conds)
|
|
{
|
|
conds= and_conds(join->thd, conds, table->on_expr);
|
|
conds->top_level_item();
|
|
/* conds is always a new item as both cond and on_expr existed */
|
|
DBUG_ASSERT(!conds->fixed());
|
|
conds->fix_fields(join->thd, &conds);
|
|
}
|
|
else
|
|
conds= table->on_expr;
|
|
table->prep_on_expr= table->on_expr= 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Only inner tables of non-convertible outer joins
|
|
remain with on_expr.
|
|
*/
|
|
if (table->on_expr)
|
|
{
|
|
table_map table_on_expr_used_tables= table->on_expr->used_tables();
|
|
table->dep_tables|= table_on_expr_used_tables;
|
|
if (table->embedding)
|
|
{
|
|
table->dep_tables&= ~table->embedding->nested_join->used_tables;
|
|
/*
|
|
Embedding table depends on tables used
|
|
in embedded on expressions.
|
|
*/
|
|
table->embedding->on_expr_dep_tables|= table_on_expr_used_tables;
|
|
}
|
|
else
|
|
table->dep_tables&= ~table->get_map();
|
|
}
|
|
|
|
if (prev_table)
|
|
{
|
|
/* The order of tables is reverse: prev_table follows table */
|
|
if (prev_table->straight || straight_join)
|
|
prev_table->dep_tables|= used_tables;
|
|
if (prev_table->on_expr)
|
|
{
|
|
prev_table->dep_tables|= table->on_expr_dep_tables;
|
|
table_map prev_used_tables= prev_table->nested_join ?
|
|
prev_table->nested_join->used_tables :
|
|
prev_table->get_map();
|
|
/*
|
|
If on expression contains only references to inner tables
|
|
we still make the inner tables dependent on the outer tables.
|
|
It would be enough to set dependency only on one outer table
|
|
for them. Yet this is really a rare case.
|
|
Note:
|
|
RAND_TABLE_BIT mask should not be counted as it
|
|
prevents update of inner table dependences.
|
|
For example it might happen if RAND() function
|
|
is used in JOIN ON clause.
|
|
*/
|
|
if (!((prev_table->on_expr->used_tables() &
|
|
~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) &
|
|
~prev_used_tables))
|
|
prev_table->dep_tables|= used_tables;
|
|
}
|
|
}
|
|
prev_table= table;
|
|
}
|
|
|
|
/*
|
|
Flatten nested joins that can be flattened.
|
|
no ON expression and not a semi-join => can be flattened.
|
|
*/
|
|
li.rewind();
|
|
while ((table= li++))
|
|
{
|
|
nested_join= table->nested_join;
|
|
if (table->sj_on_expr && !in_sj)
|
|
{
|
|
/*
|
|
If this is a semi-join that is not contained within another semi-join
|
|
leave it intact (otherwise it is flattened)
|
|
*/
|
|
/*
|
|
Make sure that any semi-join appear in
|
|
the join->select_lex->sj_nests list only once
|
|
*/
|
|
List_iterator_fast<TABLE_LIST> sj_it(join->select_lex->sj_nests);
|
|
TABLE_LIST *sj_nest;
|
|
while ((sj_nest= sj_it++))
|
|
{
|
|
if (table == sj_nest)
|
|
break;
|
|
}
|
|
if (sj_nest)
|
|
continue;
|
|
join->select_lex->sj_nests.push_back(table, join->thd->mem_root);
|
|
|
|
/*
|
|
Also, walk through semi-join children and mark those that are now
|
|
top-level
|
|
*/
|
|
TABLE_LIST *tbl;
|
|
List_iterator<TABLE_LIST> it(nested_join->join_list);
|
|
while ((tbl= it++))
|
|
{
|
|
if (!tbl->on_expr && tbl->table)
|
|
tbl->table->maybe_null= FALSE;
|
|
}
|
|
}
|
|
else if (nested_join && !table->on_expr)
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator<TABLE_LIST> it(nested_join->join_list);
|
|
List<TABLE_LIST> repl_list;
|
|
while ((tbl= it++))
|
|
{
|
|
tbl->embedding= table->embedding;
|
|
if (!tbl->embedding && !tbl->on_expr && tbl->table)
|
|
tbl->table->maybe_null= FALSE;
|
|
tbl->join_list= table->join_list;
|
|
repl_list.push_back(tbl, join->thd->mem_root);
|
|
tbl->dep_tables|= table->dep_tables;
|
|
}
|
|
li.replace(repl_list);
|
|
}
|
|
}
|
|
DBUG_RETURN(conds);
|
|
}
|
|
|
|
|
|
/**
|
|
Assign each nested join structure a bit in nested_join_map.
|
|
|
|
Assign each nested join structure (except ones that embed only one element
|
|
and so are redundant) a bit in nested_join_map.
|
|
|
|
@param join Join being processed
|
|
@param join_list List of tables
|
|
@param first_unused Number of first unused bit in nested_join_map before the
|
|
call
|
|
|
|
@note
|
|
This function is called after simplify_joins(), when there are no
|
|
redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
|
|
we will not run out of bits in nested_join_map.
|
|
|
|
@return
|
|
First unused bit in nested_join_map after the call.
|
|
*/
|
|
|
|
static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
|
|
uint first_unused)
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *table;
|
|
DBUG_ENTER("build_bitmap_for_nested_joins");
|
|
while ((table= li++))
|
|
{
|
|
NESTED_JOIN *nested_join;
|
|
if ((nested_join= table->nested_join))
|
|
{
|
|
/*
|
|
It is guaranteed by simplify_joins() function that a nested join
|
|
that has only one child represents a single table VIEW (and the child
|
|
is an underlying table). We don't assign bits to such nested join
|
|
structures because
|
|
1. it is redundant (a "sequence" of one table cannot be interleaved
|
|
with anything)
|
|
2. we could run out bits in nested_join_map otherwise.
|
|
*/
|
|
if (nested_join->n_tables != 1)
|
|
{
|
|
/* Don't assign bits to sj-nests */
|
|
if (table->on_expr)
|
|
nested_join->nj_map= (nested_join_map) 1 << first_unused++;
|
|
first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
|
|
first_unused);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(first_unused);
|
|
}
|
|
|
|
|
|
/**
|
|
Set NESTED_JOIN::counter and n_tables in all nested joins in passed list.
|
|
|
|
For all nested joins contained in the passed join_list (including its
|
|
children), set:
|
|
- nested_join->counter=0
|
|
- nested_join->n_tables= {number of non-degenerate direct children}.
|
|
|
|
Non-degenerate means non-const base table or a join nest that has a
|
|
non-degenerate child.
|
|
|
|
@param join_list List of nested joins to process. It may also contain base
|
|
tables which will be ignored.
|
|
*/
|
|
|
|
static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *table;
|
|
DBUG_ENTER("reset_nj_counters");
|
|
uint n=0;
|
|
while ((table= li++))
|
|
{
|
|
NESTED_JOIN *nested_join;
|
|
bool is_eliminated_nest= FALSE;
|
|
if ((nested_join= table->nested_join))
|
|
{
|
|
nested_join->counter= 0;
|
|
nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
|
|
if (!nested_join->n_tables)
|
|
is_eliminated_nest= TRUE;
|
|
}
|
|
const table_map removed_tables= join->eliminated_tables |
|
|
join->const_table_map;
|
|
|
|
if ((table->nested_join && !is_eliminated_nest) ||
|
|
(!table->nested_join && (table->table->map & ~removed_tables)))
|
|
n++;
|
|
}
|
|
DBUG_RETURN(n);
|
|
}
|
|
|
|
|
|
/**
|
|
Check interleaving with an inner tables of an outer join for
|
|
extension table.
|
|
|
|
Check if table next_tab can be added to current partial join order, and
|
|
if yes, record that it has been added.
|
|
|
|
The function assumes that both current partial join order and its
|
|
extension with next_tab are valid wrt table dependencies.
|
|
|
|
@verbatim
|
|
IMPLEMENTATION
|
|
LIMITATIONS ON JOIN ORDER
|
|
The nested [outer] joins executioner algorithm imposes these
|
|
limitations on join order:
|
|
1. "Outer tables first" - any "outer" table must be before any
|
|
corresponding "inner" table.
|
|
2. "No interleaving" - tables inside a nested join must form a
|
|
continuous sequence in join order (i.e. the sequence must not be
|
|
interrupted by tables that are outside of this nested join).
|
|
|
|
#1 is checked elsewhere, this function checks #2 provided that #1 has
|
|
been already checked.
|
|
|
|
WHY NEED NON-INTERLEAVING
|
|
Consider an example:
|
|
|
|
select * from t0 join t1 left join (t2 join t3) on cond1
|
|
|
|
The join order "t1 t2 t0 t3" is invalid:
|
|
|
|
table t0 is outside of the nested join, so WHERE condition
|
|
for t0 is attached directly to t0 (without triggers, and it
|
|
may be used to access t0). Applying WHERE(t0) to (t2,t0,t3)
|
|
record is invalid as we may miss combinations of (t1, t2, t3)
|
|
that satisfy condition cond1, and produce a null-complemented
|
|
(t1, t2.NULLs, t3.NULLs) row, which should not have been
|
|
produced.
|
|
|
|
If table t0 is not between t2 and t3, the problem doesn't exist:
|
|
If t0 is located after (t2,t3), WHERE(t0) is applied after nested
|
|
join processing has finished.
|
|
If t0 is located before (t2,t3), predicates like WHERE_cond(t0, t2)
|
|
are wrapped into condition triggers, which takes care of correct
|
|
nested join processing.
|
|
|
|
HOW IT IS IMPLEMENTED
|
|
The limitations on join order can be rephrased as follows: for valid
|
|
join order one must be able to:
|
|
1. write down the used tables in the join order on one line.
|
|
2. for each nested join, put one '(' and one ')' on the said line
|
|
3. write "LEFT JOIN" and "ON (...)" where appropriate
|
|
4. get a query equivalent to the query we're trying to execute.
|
|
|
|
Calls to check_interleaving_with_nj() are equivalent to writing the
|
|
above described line from left to right.
|
|
|
|
A single check_interleaving_with_nj(A,B) call is equivalent
|
|
to writing table B and appropriate brackets on condition that
|
|
table A and appropriate brackets is the last what was
|
|
written. Graphically the transition is as follows:
|
|
|
|
+---- current position
|
|
|
|
|
... last_tab ))) | ( next_tab ) )..) | ...
|
|
X Y Z |
|
|
+- need to move to this
|
|
position.
|
|
|
|
Notes about the position:
|
|
The caller guarantees that there is no more then one X-bracket by
|
|
checking "!(remaining_tables & s->dependent)" before calling this
|
|
function. X-bracket may have a pair in Y-bracket.
|
|
|
|
When "writing" we store/update this auxilary info about the current
|
|
position:
|
|
1. join->cur_embedding_map - bitmap of pairs of brackets (aka nested
|
|
joins) we've opened but didn't close.
|
|
2. {each NESTED_JOIN structure not simplified away}->counter - number
|
|
of this nested join's children that have already been added to to
|
|
the partial join order.
|
|
@endverbatim
|
|
|
|
@param next_tab Table we're going to extend the current partial join with
|
|
|
|
@retval
|
|
FALSE Join order extended, nested joins info about current join
|
|
order (see NOTE section) updated.
|
|
@retval
|
|
TRUE Requested join order extension not allowed.
|
|
*/
|
|
|
|
static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
|
|
{
|
|
JOIN *join= next_tab->join;
|
|
|
|
if (join->cur_embedding_map & ~next_tab->embedding_map)
|
|
{
|
|
/*
|
|
next_tab is outside of the "pair of brackets" we're currently in.
|
|
Cannot add it.
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
TABLE_LIST *next_emb= next_tab->table->pos_in_table_list->embedding;
|
|
/*
|
|
Do update counters for "pairs of brackets" that we've left (marked as
|
|
X,Y,Z in the above picture)
|
|
*/
|
|
for (;next_emb && next_emb != join->emb_sjm_nest;
|
|
next_emb= next_emb->embedding)
|
|
{
|
|
if (!next_emb->sj_on_expr)
|
|
{
|
|
next_emb->nested_join->counter++;
|
|
if (next_emb->nested_join->counter == 1)
|
|
{
|
|
/*
|
|
next_emb is the first table inside a nested join we've "entered". In
|
|
the picture above, we're looking at the 'X' bracket. Don't exit yet
|
|
as X bracket might have Y pair bracket.
|
|
*/
|
|
join->cur_embedding_map |= next_emb->nested_join->nj_map;
|
|
}
|
|
|
|
DBUG_ASSERT(next_emb->nested_join->n_tables >=
|
|
next_emb->nested_join->counter);
|
|
|
|
if (next_emb->nested_join->n_tables !=
|
|
next_emb->nested_join->counter)
|
|
break;
|
|
/*
|
|
We're currently at Y or Z-bracket as depicted in the above picture.
|
|
Mark that we've left it and continue walking up the brackets hierarchy.
|
|
*/
|
|
join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Nested joins perspective: Remove the last table from the join order.
|
|
|
|
The algorithm is the reciprocal of check_interleaving_with_nj(), hence
|
|
parent join nest nodes are updated only when the last table in its child
|
|
node is removed. The ASCII graphic below will clarify.
|
|
|
|
%A table nesting such as <tt> t1 x [ ( t2 x t3 ) x ( t4 x t5 ) ] </tt>is
|
|
represented by the below join nest tree.
|
|
|
|
@verbatim
|
|
NJ1
|
|
_/ / \
|
|
_/ / NJ2
|
|
_/ / / \
|
|
/ / / \
|
|
t1 x [ (t2 x t3) x (t4 x t5) ]
|
|
@endverbatim
|
|
|
|
At the point in time when check_interleaving_with_nj() adds the table t5 to
|
|
the query execution plan, QEP, it also directs the node named NJ2 to mark
|
|
the table as covered. NJ2 does so by incrementing its @c counter
|
|
member. Since all of NJ2's tables are now covered by the QEP, the algorithm
|
|
proceeds up the tree to NJ1, incrementing its counter as well. All join
|
|
nests are now completely covered by the QEP.
|
|
|
|
restore_prev_nj_state() does the above in reverse. As seen above, the node
|
|
NJ1 contains the nodes t2, t3, and NJ2. Its counter being equal to 3 means
|
|
that the plan covers t2, t3, and NJ2, @e and that the sub-plan (t4 x t5)
|
|
completely covers NJ2. The removal of t5 from the partial plan will first
|
|
decrement NJ2's counter to 1. It will then detect that NJ2 went from being
|
|
completely to partially covered, and hence the algorithm must continue
|
|
upwards to NJ1 and decrement its counter to 2. %A subsequent removal of t4
|
|
will however not influence NJ1 since it did not un-cover the last table in
|
|
NJ2.
|
|
|
|
SYNOPSIS
|
|
restore_prev_nj_state()
|
|
last join table to remove, it is assumed to be the last in current
|
|
partial join order.
|
|
|
|
DESCRIPTION
|
|
|
|
Remove the last table from the partial join order and update the nested
|
|
joins counters and join->cur_embedding_map. It is ok to call this
|
|
function for the first table in join order (for which
|
|
check_interleaving_with_nj has not been called)
|
|
|
|
@param last join table to remove, it is assumed to be the last in current
|
|
partial join order.
|
|
*/
|
|
|
|
static void restore_prev_nj_state(JOIN_TAB *last)
|
|
{
|
|
TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
|
|
JOIN *join= last->join;
|
|
for (;last_emb != NULL && last_emb != join->emb_sjm_nest;
|
|
last_emb= last_emb->embedding)
|
|
{
|
|
if (!last_emb->sj_on_expr)
|
|
{
|
|
NESTED_JOIN *nest= last_emb->nested_join;
|
|
DBUG_ASSERT(nest->counter > 0);
|
|
|
|
bool was_fully_covered= nest->is_fully_covered();
|
|
|
|
join->cur_embedding_map|= nest->nj_map;
|
|
|
|
if (--nest->counter == 0)
|
|
join->cur_embedding_map&= ~nest->nj_map;
|
|
|
|
if (!was_fully_covered)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Compute allowed_top_level_tables - a bitmap of tables one can put into the
|
|
join order if the last table in the join prefix is not inside any outer
|
|
join nest.
|
|
|
|
NESTED_JOIN::direct_children_map - a bitmap of tables ... if the last
|
|
table in the join prefix is inside the join nest.
|
|
|
|
Note: it looks like a sensible way to do this is a top-down descent on
|
|
JOIN::join_list, but apparently that list is missing I_S tables.
|
|
e.g. for SHOW TABLES WHERE col IN (SELECT ...) it will just have a
|
|
semi-join nest.
|
|
*/
|
|
|
|
void JOIN::calc_allowed_top_level_tables(SELECT_LEX *lex)
|
|
{
|
|
TABLE_LIST *tl;
|
|
List_iterator<TABLE_LIST> ti(lex->leaf_tables);
|
|
DBUG_ENTER("JOIN::calc_allowed_top_level_tables");
|
|
DBUG_ASSERT(allowed_top_level_tables == 0); // Should only be called once
|
|
|
|
while ((tl= ti++))
|
|
{
|
|
table_map map;
|
|
TABLE_LIST *embedding= tl->embedding;
|
|
|
|
if (tl->table)
|
|
map= tl->table->map;
|
|
else
|
|
{
|
|
DBUG_ASSERT(tl->jtbm_subselect);
|
|
map= table_map(1) << tl->jtbm_table_no;
|
|
}
|
|
|
|
if (!(embedding= tl->embedding))
|
|
{
|
|
allowed_top_level_tables |= map;
|
|
continue;
|
|
}
|
|
|
|
// Walk out of any semi-join nests
|
|
while (embedding && !embedding->on_expr)
|
|
{
|
|
// semi-join nest or an INSERT-INTO view...
|
|
embedding->nested_join->direct_children_map |= map;
|
|
embedding= embedding->embedding;
|
|
}
|
|
|
|
// Ok we are in the parent nested outer join nest.
|
|
if (!embedding)
|
|
{
|
|
allowed_top_level_tables |= map;
|
|
continue;
|
|
}
|
|
embedding->nested_join->direct_children_map |= map;
|
|
|
|
// Walk to grand-parent join nest.
|
|
embedding= embedding->embedding;
|
|
|
|
// Walk out of any semi-join nests
|
|
while (embedding && !embedding->on_expr)
|
|
{
|
|
DBUG_ASSERT(embedding->sj_on_expr);
|
|
embedding->nested_join->direct_children_map |= map;
|
|
embedding= embedding->embedding;
|
|
}
|
|
|
|
if (embedding)
|
|
{
|
|
DBUG_ASSERT(embedding->on_expr); // Impossible, see above
|
|
embedding->nested_join->direct_children_map |= map;
|
|
}
|
|
else
|
|
allowed_top_level_tables |= map;
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Get the tables that one is allowed to have as the next table in the
|
|
current plan
|
|
*/
|
|
|
|
table_map JOIN::get_allowed_nj_tables(uint idx)
|
|
{
|
|
TABLE_LIST *last_emb;
|
|
if (idx > const_tables &&
|
|
(last_emb= positions[idx-1].table->table->pos_in_table_list->embedding))
|
|
{
|
|
for (;last_emb && last_emb != emb_sjm_nest;
|
|
last_emb= last_emb->embedding)
|
|
{
|
|
if (!last_emb->sj_on_expr)
|
|
{
|
|
NESTED_JOIN *nest= last_emb->nested_join;
|
|
if (!nest->is_fully_covered())
|
|
{
|
|
// Return tables that are direct members of this join nest
|
|
return nest->direct_children_map;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Return bitmap of tables not in any join nest
|
|
if (emb_sjm_nest)
|
|
return emb_sjm_nest->nested_join->direct_children_map;
|
|
return allowed_top_level_tables;
|
|
}
|
|
|
|
|
|
/*
|
|
Change access methods not to use join buffering and adjust costs accordingly
|
|
|
|
SYNOPSIS
|
|
optimize_wo_join_buffering()
|
|
join
|
|
first_tab The first tab to do re-optimization for
|
|
last_tab The last tab to do re-optimization for
|
|
last_remaining_tables Bitmap of tables that are not in the
|
|
[0...last_tab] join prefix
|
|
first_alt TRUE <=> Use the LooseScan plan for the first_tab
|
|
no_jbuf_before Don't allow to use join buffering before this
|
|
table
|
|
outer_rec_count OUT New output record count
|
|
reopt_cost OUT New join prefix cost
|
|
|
|
DESCRIPTION
|
|
Given a join prefix [0; ... first_tab], change the access to the tables
|
|
in the [first_tab; last_tab] not to use join buffering. This is needed
|
|
because some semi-join strategies cannot be used together with the join
|
|
buffering.
|
|
In general case the best table order in [first_tab; last_tab] range with
|
|
join buffering is different from the best order without join buffering but
|
|
we don't try finding a better join order. (TODO ask Igor why did we
|
|
chose not to do this in the end. that's actually the difference from the
|
|
forking approach)
|
|
*/
|
|
|
|
void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
|
|
table_map last_remaining_tables,
|
|
bool first_alt, uint no_jbuf_before,
|
|
double *outer_rec_count, double *reopt_cost)
|
|
{
|
|
double cost, rec_count;
|
|
table_map reopt_remaining_tables= last_remaining_tables;
|
|
uint i;
|
|
THD *thd= join->thd;
|
|
Json_writer_temp_disable trace_wo_join_buffering(thd);
|
|
|
|
if (first_tab > join->const_tables)
|
|
{
|
|
cost= join->positions[first_tab - 1].prefix_cost;
|
|
rec_count= join->positions[first_tab - 1].prefix_record_count;
|
|
}
|
|
else
|
|
{
|
|
cost= 0.0;
|
|
rec_count= 1;
|
|
}
|
|
|
|
*outer_rec_count= rec_count;
|
|
for (i= first_tab; i <= last_tab; i++)
|
|
reopt_remaining_tables |= join->positions[i].table->table->map;
|
|
|
|
/*
|
|
best_access_path() optimization depends on the value of
|
|
join->cur_sj_inner_tables. Our goal in this function is to do a
|
|
re-optimization with disabled join buffering, but no other changes.
|
|
In order to achieve this, cur_sj_inner_tables needs have the same
|
|
value it had during the original invocations of best_access_path.
|
|
|
|
We know that this function, optimize_wo_join_buffering() is called to
|
|
re-optimize semi-join join order range, which allows to conclude that
|
|
the "original" value of cur_sj_inner_tables was 0.
|
|
*/
|
|
table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
|
|
join->cur_sj_inner_tables= 0;
|
|
|
|
double inner_fanout= 1.0;
|
|
|
|
for (i= first_tab; i <= last_tab; i++)
|
|
{
|
|
JOIN_TAB *rs= join->positions[i].table;
|
|
POSITION pos, loose_scan_pos;
|
|
|
|
if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
|
|
{
|
|
/* Find the best access method that would not use join buffering */
|
|
best_access_path(join, rs, reopt_remaining_tables,
|
|
join->positions, i,
|
|
TRUE, rec_count,
|
|
&pos, &loose_scan_pos);
|
|
if ((i == first_tab && first_alt))
|
|
pos= loose_scan_pos;
|
|
}
|
|
else
|
|
pos= join->positions[i];
|
|
|
|
reopt_remaining_tables &= ~rs->table->map;
|
|
cost= COST_ADD(cost, pos.read_time);
|
|
|
|
double records_out= pos.records_out;
|
|
/*
|
|
The (i != last_tab) is here to mimic what
|
|
best_extension_by_limited_search() does: do not call
|
|
table_after_join_selectivity() for the join_tab where the semi-join
|
|
strategy is applied
|
|
*/
|
|
if (i != last_tab &&
|
|
join->thd->variables.optimizer_use_condition_selectivity > 1)
|
|
{
|
|
table_map real_table_bit= rs->table->map;
|
|
double __attribute__((unused)) pushdown_cond_selectivity;
|
|
pushdown_cond_selectivity=
|
|
table_after_join_selectivity(join, i, rs,
|
|
reopt_remaining_tables &
|
|
~real_table_bit, &records_out);
|
|
join->positions[i].partial_join_cardinality= rec_count * pushdown_cond_selectivity;
|
|
}
|
|
else
|
|
join->positions[i].partial_join_cardinality= COST_MULT(rec_count, records_out);
|
|
|
|
rec_count= COST_MULT(rec_count, records_out);
|
|
*outer_rec_count= COST_MULT(*outer_rec_count, records_out);
|
|
|
|
if (rs->emb_sj_nest)
|
|
inner_fanout= COST_MULT(inner_fanout, records_out);
|
|
}
|
|
|
|
/* Discount the fanout produced by the subquery */
|
|
if (inner_fanout > 1.0)
|
|
*outer_rec_count /= inner_fanout;
|
|
|
|
join->cur_sj_inner_tables= save_cur_sj_inner_tables;
|
|
|
|
*reopt_cost= cost;
|
|
if (rec_count < *outer_rec_count)
|
|
{
|
|
/*
|
|
The tables inside the subquery produce smaller fanout than outer tables.
|
|
This can happen in edge cases.
|
|
*/
|
|
*outer_rec_count= rec_count;
|
|
}
|
|
}
|
|
|
|
|
|
static COND *
|
|
optimize_cond(JOIN *join, COND *conds,
|
|
List<TABLE_LIST> *join_list, bool ignore_on_conds,
|
|
Item::cond_result *cond_value, COND_EQUAL **cond_equal,
|
|
int flags)
|
|
{
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("optimize_cond");
|
|
|
|
if (!conds)
|
|
{
|
|
*cond_value= Item::COND_TRUE;
|
|
if (!ignore_on_conds)
|
|
build_equal_items(join, NULL, NULL, join_list, ignore_on_conds,
|
|
cond_equal);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Build all multiple equality predicates and eliminate equality
|
|
predicates that can be inferred from these multiple equalities.
|
|
For each reference of a field included into a multiple equality
|
|
that occurs in a function set a pointer to the multiple equality
|
|
predicate. Substitute a constant instead of this field if the
|
|
multiple equality contains a constant.
|
|
*/
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_cond(thd, "condition_processing");
|
|
|
|
if (unlikely(trace_cond.trace_started()))
|
|
trace_cond.
|
|
add("condition", join->conds == conds ? "WHERE" : "HAVING").
|
|
add("original_condition", conds);
|
|
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY););
|
|
conds= build_equal_items(join, conds, NULL, join_list,
|
|
ignore_on_conds, cond_equal,
|
|
MY_TEST(flags & OPT_LINK_EQUAL_FIELDS));
|
|
DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object equal_prop_wrapper(thd);
|
|
equal_prop_wrapper.
|
|
add("transformation", "equality_propagation").
|
|
add("resulting_condition", conds);
|
|
}
|
|
|
|
/* change field = field to field = const for each found field = const */
|
|
propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
|
|
/*
|
|
Remove all instances of item == item
|
|
Remove all and-levels where CONST item != CONST item
|
|
*/
|
|
DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object const_prop_wrapper(thd);
|
|
const_prop_wrapper.
|
|
add("transformation", "constant_propagation").
|
|
add("resulting_condition", conds);
|
|
}
|
|
conds= conds->remove_eq_conds(thd, cond_value, true);
|
|
if (conds && conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
*cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object cond_removal_wrapper(thd);
|
|
cond_removal_wrapper.
|
|
add("transformation", "trivial_condition_removal").
|
|
add("resulting_condition", conds);
|
|
}
|
|
DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY););
|
|
}
|
|
DBUG_RETURN(conds);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Propagate multiple equalities to the sub-expressions of a condition
|
|
|
|
@param thd thread handle
|
|
@param cond the condition where equalities are to be propagated
|
|
@param *new_equalities the multiple equalities to be propagated
|
|
@param inherited path to all inherited multiple equality items
|
|
@param[out] is_simplifiable_cond 'cond' may be simplified after the
|
|
propagation of the equalities
|
|
|
|
@details
|
|
The function recursively traverses the tree of the condition 'cond' and
|
|
for each its AND sub-level of any depth the function merges the multiple
|
|
equalities from the list 'new_equalities' into the multiple equalities
|
|
attached to the AND item created for this sub-level.
|
|
The function also [re]sets references to the equalities formed by the
|
|
merges of multiple equalities in all field items occurred in 'cond'
|
|
that are encountered in the equalities.
|
|
If the result of any merge of multiple equalities is an impossible
|
|
condition the function returns TRUE in the parameter is_simplifiable_cond.
|
|
*/
|
|
|
|
void propagate_new_equalities(THD *thd, Item *cond,
|
|
List<Item_equal> *new_equalities,
|
|
COND_EQUAL *inherited,
|
|
bool *is_simplifiable_cond)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC;
|
|
if (and_level)
|
|
{
|
|
Item_cond_and *cond_and= (Item_cond_and *) cond;
|
|
List<Item_equal> *cond_equalities= &cond_and->m_cond_equal.current_level;
|
|
cond_and->m_cond_equal.upper_levels= inherited;
|
|
if (!cond_equalities->is_empty() && cond_equalities != new_equalities)
|
|
{
|
|
Item_equal *equal_item;
|
|
List_iterator<Item_equal> it(*new_equalities);
|
|
while ((equal_item= it++))
|
|
{
|
|
equal_item->merge_into_list(thd, cond_equalities, true, true);
|
|
}
|
|
List_iterator<Item_equal> ei(*cond_equalities);
|
|
while ((equal_item= ei++))
|
|
{
|
|
if (equal_item->const_item() && !equal_item->val_int())
|
|
{
|
|
*is_simplifiable_cond= true;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Item *item;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
while ((item= li++))
|
|
{
|
|
COND_EQUAL *new_inherited= and_level && item->type() == Item::COND_ITEM ?
|
|
&((Item_cond_and *) cond)->m_cond_equal :
|
|
inherited;
|
|
propagate_new_equalities(thd, item, new_equalities, new_inherited,
|
|
is_simplifiable_cond);
|
|
}
|
|
}
|
|
else if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
Item_equal *equal_item;
|
|
List_iterator<Item_equal> it(*new_equalities);
|
|
Item_equal *equality= (Item_equal *) cond;
|
|
equality->upper_levels= inherited;
|
|
while ((equal_item= it++))
|
|
{
|
|
equality->merge_with_check(thd, equal_item, true);
|
|
}
|
|
if (equality->const_item() && !equality->val_int())
|
|
*is_simplifiable_cond= true;
|
|
}
|
|
else
|
|
{
|
|
cond= cond->propagate_equal_fields(thd,
|
|
Item::Context_boolean(), inherited);
|
|
cond->update_used_tables();
|
|
}
|
|
}
|
|
|
|
/*
|
|
Check if cond_is_datetime_is_null() is true for the condition cond, or
|
|
for any of its AND/OR-children
|
|
*/
|
|
bool cond_has_datetime_is_null(Item *cond)
|
|
{
|
|
if (cond_is_datetime_is_null(cond))
|
|
return true;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List<Item> *cond_arg_list= ((Item_cond*) cond)->argument_list();
|
|
List_iterator<Item> li(*cond_arg_list);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
if (cond_has_datetime_is_null(item))
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Check if passed condtition has for of
|
|
|
|
not_null_date_col IS NULL
|
|
|
|
where not_null_date_col has a datte or datetime type
|
|
*/
|
|
|
|
bool cond_is_datetime_is_null(Item *cond)
|
|
{
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
|
|
{
|
|
return ((Item_func_isnull*) cond)->arg_is_datetime_notnull_field();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Evaluate all constant boolean sub-expressions in a condition
|
|
|
|
@param thd thread handle
|
|
@param cond condition where where to evaluate constant sub-expressions
|
|
@param[out] cond_value : the returned value of the condition
|
|
(TRUE/FALSE/UNKNOWN:
|
|
Item::COND_TRUE/Item::COND_FALSE/Item::COND_OK)
|
|
@return
|
|
the item that is the result of the substitution of all inexpensive constant
|
|
boolean sub-expressions into cond, or,
|
|
NULL if the condition is constant and is evaluated to FALSE.
|
|
|
|
@details
|
|
This function looks for all inexpensive constant boolean sub-expressions in
|
|
the given condition 'cond' and substitutes them for their values.
|
|
For example, the condition 2 > (5 + 1) or a < (10 / 2)
|
|
will be transformed to the condition a < (10 / 2).
|
|
Note that a constant sub-expression is evaluated only if it is constant and
|
|
inexpensive. A sub-expression with an uncorrelated subquery may be evaluated
|
|
only if the subquery is considered as inexpensive.
|
|
The function does not evaluate a constant sub-expression if it is not on one
|
|
of AND/OR levels of the condition 'cond'. For example, the subquery in the
|
|
condition a > (select max(b) from t1 where b > 5) will never be evaluated
|
|
by this function.
|
|
If a constant boolean sub-expression is evaluated to TRUE then:
|
|
- when the sub-expression is a conjunct of an AND formula it is simply
|
|
removed from this formula
|
|
- when the sub-expression is a disjunct of an OR formula the whole OR
|
|
formula is converted to TRUE
|
|
If a constant boolean sub-expression is evaluated to FALSE then:
|
|
- when the sub-expression is a disjunct of an OR formula it is simply
|
|
removed from this formula
|
|
- when the sub-expression is a conjuct of an AND formula the whole AND
|
|
formula is converted to FALSE
|
|
When a disjunct/conjunct is removed from an OR/AND formula it might happen
|
|
that there is only one conjunct/disjunct remaining. In this case this
|
|
remaining disjunct/conjunct must be merged into underlying AND/OR formula,
|
|
because AND/OR levels must alternate in the same way as they alternate
|
|
after fix_fields() is called for the original condition.
|
|
The specifics of merging a formula f into an AND formula A appears
|
|
when A contains multiple equalities and f contains multiple equalities.
|
|
In this case the multiple equalities from f and A have to be merged.
|
|
After this the resulting multiple equalities have to be propagated into
|
|
the all AND/OR levels of the formula A (see propagate_new_equalities()).
|
|
The propagation of multiple equalities might result in forming multiple
|
|
equalities that are always FALSE. This, in its turn, might trigger further
|
|
simplification of the condition.
|
|
|
|
@note
|
|
EXAMPLE 1:
|
|
SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5 OR 1 != 1);
|
|
First 1 != 1 will be removed from the second conjunct:
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5);
|
|
Then (b = 5 AND a = 5) will be merged into the top level condition:
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5) AND (a = 5);
|
|
Then (b = 5), (a = 5) will be propagated into the disjuncs of
|
|
(b = 1 OR a = 1):
|
|
=> SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
|
|
(a = 1) AND (b = 5) AND (a = 5)) AND
|
|
(b = 5) AND (a = 5)
|
|
=> SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
|
|
(FALSE AND (b = 5))) AND
|
|
(b = 5) AND (a = 5)
|
|
After this an additional call of remove_eq_conds() converts it
|
|
to FALSE
|
|
|
|
EXAMPLE 2:
|
|
SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5 OR 1 != 1);
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5);
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5) AND (a = 5);
|
|
=> SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
|
|
(a = 5) AND (b = 5) AND (a = 5)) AND
|
|
(b = 5) AND (a = 5)
|
|
=> SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
|
|
((b = 5) AND (a = 5))) AND
|
|
(b = 5) AND (a = 5)
|
|
After this an additional call of remove_eq_conds() converts it to
|
|
=> SELECT * FROM t1 WHERE (b = 5) AND (a = 5)
|
|
*/
|
|
|
|
|
|
COND *
|
|
Item_cond::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
|
|
bool top_level_arg)
|
|
{
|
|
bool and_level= functype() == Item_func::COND_AND_FUNC;
|
|
List<Item> *cond_arg_list= argument_list();
|
|
|
|
if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
|
|
{
|
|
*cond_value= Item::COND_FALSE;
|
|
return (COND*) 0; // Fatal error flag is set!
|
|
}
|
|
|
|
if (and_level)
|
|
{
|
|
/*
|
|
Remove multiple equalities that became always true (e.g. after
|
|
constant row substitution).
|
|
They would be removed later in the function anyway, but the list of
|
|
them cond_equal.current_level also must be adjusted correspondingly.
|
|
So it's easier to do it at one pass through the list of the equalities.
|
|
*/
|
|
List<Item_equal> *cond_equalities=
|
|
&((Item_cond_and *) this)->m_cond_equal.current_level;
|
|
cond_arg_list->disjoin((List<Item> *) cond_equalities);
|
|
List_iterator<Item_equal> it(*cond_equalities);
|
|
Item_equal *eq_item;
|
|
while ((eq_item= it++))
|
|
{
|
|
if (eq_item->const_item() && eq_item->val_int())
|
|
it.remove();
|
|
}
|
|
cond_arg_list->append((List<Item> *) cond_equalities);
|
|
}
|
|
|
|
List<Item_equal> new_equalities;
|
|
List_iterator<Item> li(*cond_arg_list);
|
|
bool should_fix_fields= 0;
|
|
Item::cond_result tmp_cond_value;
|
|
Item *item;
|
|
|
|
/*
|
|
If the list cond_arg_list became empty then it consisted only
|
|
of always true multiple equalities.
|
|
*/
|
|
*cond_value= cond_arg_list->elements ? Item::COND_UNDEF : Item::COND_TRUE;
|
|
|
|
while ((item=li++))
|
|
{
|
|
Item *new_item= item->remove_eq_conds(thd, &tmp_cond_value, false);
|
|
if (!new_item)
|
|
{
|
|
/* This can happen only when item is converted to TRUE or FALSE */
|
|
li.remove();
|
|
}
|
|
else if (item != new_item)
|
|
{
|
|
/*
|
|
This can happen when:
|
|
- item was an OR formula converted to one disjunct
|
|
- item was an AND formula converted to one conjunct
|
|
In these cases the disjunct/conjunct must be merged into the
|
|
argument list of cond.
|
|
*/
|
|
if (new_item->type() == Item::COND_ITEM &&
|
|
item->type() == Item::COND_ITEM)
|
|
{
|
|
DBUG_ASSERT(functype() == ((Item_cond *) new_item)->functype());
|
|
List<Item> *new_item_arg_list=
|
|
((Item_cond *) new_item)->argument_list();
|
|
if (and_level)
|
|
{
|
|
/*
|
|
If new_item is an AND formula then multiple equalities
|
|
of new_item_arg_list must merged into multiple equalities
|
|
of cond_arg_list.
|
|
*/
|
|
List<Item_equal> *new_item_equalities=
|
|
&((Item_cond_and *) new_item)->m_cond_equal.current_level;
|
|
if (!new_item_equalities->is_empty())
|
|
{
|
|
/*
|
|
Cut the multiple equalities from the new_item_arg_list and
|
|
append them on the list new_equalities. Later the equalities
|
|
from this list will be merged into the multiple equalities
|
|
of cond_arg_list all together.
|
|
*/
|
|
new_item_arg_list->disjoin((List<Item> *) new_item_equalities);
|
|
new_equalities.append(new_item_equalities);
|
|
}
|
|
}
|
|
if (new_item_arg_list->is_empty())
|
|
li.remove();
|
|
else
|
|
{
|
|
uint cnt= new_item_arg_list->elements;
|
|
li.replace(*new_item_arg_list);
|
|
/* Make iterator li ignore new items */
|
|
for (cnt--; cnt; cnt--)
|
|
li++;
|
|
should_fix_fields= 1;
|
|
}
|
|
}
|
|
else if (and_level &&
|
|
new_item->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) new_item)->functype() ==
|
|
Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
li.remove();
|
|
new_equalities.push_back((Item_equal *) new_item, thd->mem_root);
|
|
}
|
|
else
|
|
{
|
|
if (new_item->type() == Item::COND_ITEM &&
|
|
((Item_cond*) new_item)->functype() == functype())
|
|
{
|
|
List<Item> *new_item_arg_list=
|
|
((Item_cond *) new_item)->argument_list();
|
|
uint cnt= new_item_arg_list->elements;
|
|
li.replace(*new_item_arg_list);
|
|
/* Make iterator li ignore new items */
|
|
for (cnt--; cnt; cnt--)
|
|
li++;
|
|
}
|
|
else
|
|
li.replace(new_item);
|
|
should_fix_fields= 1;
|
|
}
|
|
}
|
|
if (*cond_value == Item::COND_UNDEF)
|
|
*cond_value= tmp_cond_value;
|
|
switch (tmp_cond_value) {
|
|
case Item::COND_OK: // Not TRUE or FALSE
|
|
if (and_level || *cond_value == Item::COND_FALSE)
|
|
*cond_value=tmp_cond_value;
|
|
break;
|
|
case Item::COND_FALSE:
|
|
if (and_level)
|
|
{
|
|
*cond_value= tmp_cond_value;
|
|
return (COND*) 0; // Always false
|
|
}
|
|
break;
|
|
case Item::COND_TRUE:
|
|
if (!and_level)
|
|
{
|
|
*cond_value= tmp_cond_value;
|
|
return (COND*) 0; // Always true
|
|
}
|
|
break;
|
|
case Item::COND_UNDEF: // Impossible
|
|
break; /* purecov: deadcode */
|
|
}
|
|
}
|
|
COND *cond= this;
|
|
if (!new_equalities.is_empty())
|
|
{
|
|
DBUG_ASSERT(and_level);
|
|
/*
|
|
Merge multiple equalities that were cut from the results of
|
|
simplification of OR formulas converted into AND formulas.
|
|
These multiple equalities are to be merged into the
|
|
multiple equalities of cond_arg_list.
|
|
*/
|
|
COND_EQUAL *cond_equal= &((Item_cond_and *) this)->m_cond_equal;
|
|
List<Item_equal> *cond_equalities= &cond_equal->current_level;
|
|
cond_arg_list->disjoin((List<Item> *) cond_equalities);
|
|
Item_equal *equality;
|
|
List_iterator_fast<Item_equal> it(new_equalities);
|
|
while ((equality= it++))
|
|
{
|
|
equality->upper_levels= cond_equal->upper_levels;
|
|
equality->merge_into_list(thd, cond_equalities, false, false);
|
|
List_iterator_fast<Item_equal> ei(*cond_equalities);
|
|
while ((equality= ei++))
|
|
{
|
|
if (equality->const_item() && !equality->val_int())
|
|
{
|
|
*cond_value= Item::COND_FALSE;
|
|
return (COND*) 0;
|
|
}
|
|
}
|
|
}
|
|
cond_arg_list->append((List<Item> *) cond_equalities);
|
|
/*
|
|
Propagate the newly formed multiple equalities to
|
|
the all AND/OR levels of cond
|
|
*/
|
|
bool is_simplifiable_cond= false;
|
|
propagate_new_equalities(thd, this, cond_equalities,
|
|
cond_equal->upper_levels,
|
|
&is_simplifiable_cond);
|
|
/*
|
|
If the above propagation of multiple equalities brings us
|
|
to multiple equalities that are always FALSE then try to
|
|
simplify the condition with remove_eq_cond() again.
|
|
*/
|
|
if (is_simplifiable_cond)
|
|
{
|
|
if (!(cond= cond->remove_eq_conds(thd, cond_value, false)))
|
|
return cond;
|
|
}
|
|
should_fix_fields= 1;
|
|
}
|
|
if (should_fix_fields)
|
|
cond->update_used_tables();
|
|
|
|
if (!((Item_cond*) cond)->argument_list()->elements ||
|
|
*cond_value != Item::COND_OK)
|
|
return (COND*) 0;
|
|
if (((Item_cond*) cond)->argument_list()->elements == 1)
|
|
{ // Remove list
|
|
item= ((Item_cond*) cond)->argument_list()->head();
|
|
((Item_cond*) cond)->argument_list()->empty();
|
|
return item;
|
|
}
|
|
*cond_value= Item::COND_OK;
|
|
return cond;
|
|
}
|
|
|
|
|
|
COND *
|
|
Item::remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level_arg)
|
|
{
|
|
if (can_eval_in_optimize())
|
|
{
|
|
*cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE;
|
|
return (COND*) 0;
|
|
}
|
|
*cond_value= Item::COND_OK;
|
|
return this; // Point at next and level
|
|
}
|
|
|
|
|
|
COND *
|
|
Item_bool_func2::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
|
|
bool top_level_arg)
|
|
{
|
|
if (can_eval_in_optimize())
|
|
{
|
|
*cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE;
|
|
return (COND*) 0;
|
|
}
|
|
if ((*cond_value= eq_cmp_result()) != Item::COND_OK)
|
|
{
|
|
if (args[0]->eq(args[1], true))
|
|
{
|
|
if (*cond_value == Item::COND_FALSE ||
|
|
!args[0]->maybe_null() || functype() == Item_func::EQUAL_FUNC)
|
|
return (COND*) 0; // Compare of identical items
|
|
}
|
|
}
|
|
*cond_value= Item::COND_OK;
|
|
return this; // Point at next and level
|
|
}
|
|
|
|
|
|
/**
|
|
Remove const and eq items. Return new item, or NULL if no condition
|
|
cond_value is set to according:
|
|
COND_OK query is possible (field = constant)
|
|
COND_TRUE always true ( 1 = 1 )
|
|
COND_FALSE always false ( 1 = 2 )
|
|
|
|
SYNPOSIS
|
|
remove_eq_conds()
|
|
thd THD environment
|
|
cond the condition to handle
|
|
cond_value the resulting value of the condition
|
|
|
|
NOTES
|
|
calls the inner_remove_eq_conds to check all the tree reqursively
|
|
|
|
RETURN
|
|
*COND with the simplified condition
|
|
*/
|
|
|
|
COND *
|
|
Item_func_isnull::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
|
|
bool top_level_arg)
|
|
{
|
|
Item *real_item= args[0]->real_item();
|
|
if (real_item->type() == Item::FIELD_ITEM)
|
|
{
|
|
Field *field= ((Item_field*) real_item)->field;
|
|
|
|
if ((field->flags & NOT_NULL_FLAG) &&
|
|
field->type_handler()->cond_notnull_field_isnull_to_field_eq_zero())
|
|
{
|
|
/* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
|
|
/*
|
|
See BUG#12594011
|
|
Documentation says that
|
|
SELECT datetime_notnull d FROM t1 WHERE d IS NULL
|
|
shall return rows where d=='0000-00-00'
|
|
|
|
Thus, for DATE and DATETIME columns defined as NOT NULL,
|
|
"date_notnull IS NULL" has to be modified to
|
|
"date_notnull IS NULL OR date_notnull == 0" (if outer join)
|
|
"date_notnull == 0" (otherwise)
|
|
|
|
*/
|
|
|
|
Item *item0= (Item*) Item_false;
|
|
Item *eq_cond= new(thd->mem_root) Item_func_eq(thd, args[0], item0);
|
|
if (!eq_cond)
|
|
return this;
|
|
|
|
COND *cond= this;
|
|
if (field->table->pos_in_table_list->is_inner_table_of_outer_join())
|
|
{
|
|
// outer join: transform "col IS NULL" to "col IS NULL or col=0"
|
|
Item *or_cond= new(thd->mem_root) Item_cond_or(thd, eq_cond, this);
|
|
if (!or_cond)
|
|
return this;
|
|
cond= or_cond;
|
|
}
|
|
else
|
|
{
|
|
// not outer join: transform "col IS NULL" to "col=0"
|
|
cond= eq_cond;
|
|
}
|
|
|
|
cond->fix_fields(thd, &cond);
|
|
/*
|
|
Note: although args[0] is a field, cond can still be a constant
|
|
(in case field is a part of a dependent subquery).
|
|
|
|
Note: we call cond->Item::remove_eq_conds() non-virtually (statically)
|
|
for performance purpose.
|
|
A non-qualified call, i.e. just cond->remove_eq_conds(),
|
|
would call Item_bool_func2::remove_eq_conds() instead, which would
|
|
try to do some extra job to detect if args[0] and args[1] are
|
|
equivalent items. We know they are not (we have field=0 here).
|
|
*/
|
|
return cond->Item::remove_eq_conds(thd, cond_value, false);
|
|
}
|
|
|
|
/*
|
|
Handles this special case for some ODBC applications:
|
|
The are requesting the row that was just updated with a auto_increment
|
|
value with this construct:
|
|
|
|
SELECT * from table_name where auto_increment_column IS NULL
|
|
This will be changed to:
|
|
SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
|
|
|
|
Note, this substitution is done if the NULL test is the only condition!
|
|
If the NULL test is a part of a more complex condition, it is not
|
|
substituted and is treated normally:
|
|
WHERE auto_increment IS NULL AND something_else
|
|
*/
|
|
|
|
if (top_level_arg) // "auto_increment_column IS NULL" is the only condition
|
|
{
|
|
if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null &&
|
|
(thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
|
|
(thd->first_successful_insert_id_in_prev_stmt > 0 &&
|
|
thd->substitute_null_with_insert_id))
|
|
{
|
|
#ifdef HAVE_QUERY_CACHE
|
|
query_cache_abort(thd, &thd->query_cache_tls);
|
|
#endif
|
|
COND *new_cond, *cond= this;
|
|
/* If this fails, we will catch it later before executing query */
|
|
if ((new_cond= new (thd->mem_root) Item_func_eq(thd, args[0],
|
|
new (thd->mem_root) Item_int(thd, "last_insert_id()",
|
|
thd->read_first_successful_insert_id_in_prev_stmt(),
|
|
MY_INT64_NUM_DECIMAL_DIGITS))))
|
|
{
|
|
cond= new_cond;
|
|
/*
|
|
Item_func_eq can't be fixed after creation so we do not check
|
|
cond->fixed(), also it do not need tables so we use 0 as second
|
|
argument.
|
|
*/
|
|
cond->fix_fields(thd, &cond);
|
|
}
|
|
/*
|
|
IS NULL should be mapped to LAST_INSERT_ID only for first row, so
|
|
clear for next row
|
|
*/
|
|
thd->substitute_null_with_insert_id= FALSE;
|
|
|
|
*cond_value= Item::COND_OK;
|
|
return cond;
|
|
}
|
|
}
|
|
}
|
|
return Item::remove_eq_conds(thd, cond_value, top_level_arg);
|
|
}
|
|
|
|
|
|
/**
|
|
Check if equality can be used in removing components of GROUP BY/DISTINCT
|
|
|
|
@param l the left comparison argument (a field if any)
|
|
@param r the right comparison argument (a const of any)
|
|
|
|
@details
|
|
Checks if an equality predicate can be used to take away
|
|
DISTINCT/GROUP BY because it is known to be true for exactly one
|
|
distinct value (e.g. <expr> == <const>).
|
|
Arguments must be compared in the native type of the left argument
|
|
and (for strings) in the native collation of the left argument.
|
|
Otherwise, for example,
|
|
<string_field> = <int_const> may match more than 1 distinct value or
|
|
the <string_field>.
|
|
|
|
@note We don't need to aggregate l and r collations here, because r -
|
|
the constant item - has already been converted to a proper collation
|
|
for comparison. We only need to compare this collation with field's collation.
|
|
|
|
@retval true can be used
|
|
@retval false cannot be used
|
|
*/
|
|
|
|
/*
|
|
psergey-todo: this returns false for int_column='1234' (here '1234' is a
|
|
constant. Need to discuss this with Bar).
|
|
|
|
See also Field::test_if_equality_guaranees_uniqueness(const Item *item);
|
|
*/
|
|
static bool
|
|
test_if_equality_guarantees_uniqueness(Item *l, Item *r)
|
|
{
|
|
return (r->const_item() || !(r->used_tables() & ~OUTER_REF_TABLE_BIT)) &&
|
|
item_cmp_type(l, r) == l->cmp_type() &&
|
|
(l->cmp_type() != STRING_RESULT ||
|
|
l->collation.collation == r->collation.collation);
|
|
}
|
|
|
|
|
|
/*
|
|
Return TRUE if i1 and i2 (if any) are equal items,
|
|
or if i1 is a wrapper item around the f2 field.
|
|
*/
|
|
|
|
static bool equal(Item *i1, Item *i2, Field *f2)
|
|
{
|
|
DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL));
|
|
|
|
if (i2 != NULL)
|
|
return i1->eq(i2, 1);
|
|
else if (i1->type() == Item::FIELD_ITEM)
|
|
return f2->eq(((Item_field *) i1)->field);
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Test if a field or an item is equal to a constant value in WHERE
|
|
|
|
@param cond WHERE clause expression
|
|
@param comp_item Item to find in WHERE expression
|
|
(if comp_field != NULL)
|
|
@param comp_field Field to find in WHERE expression
|
|
(if comp_item != NULL)
|
|
@param[out] const_item intermediate arg, set to Item pointer to NULL
|
|
|
|
@return TRUE if the field is a constant value in WHERE
|
|
|
|
@note
|
|
comp_item and comp_field parameters are mutually exclusive.
|
|
*/
|
|
bool
|
|
const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
|
|
Item **const_item)
|
|
{
|
|
DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL));
|
|
|
|
Item *intermediate= NULL;
|
|
if (const_item == NULL)
|
|
const_item= &intermediate;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= (((Item_cond*) cond)->functype()
|
|
== Item_func::COND_AND_FUNC);
|
|
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
bool res=const_expression_in_where(item, comp_item, comp_field,
|
|
const_item);
|
|
if (res) // Is a const value
|
|
{
|
|
if (and_level)
|
|
return 1;
|
|
}
|
|
else if (!and_level)
|
|
return 0;
|
|
}
|
|
return and_level ? 0 : 1;
|
|
}
|
|
else if (cond->eq_cmp_result() != Item::COND_OK)
|
|
{ // boolean compare function
|
|
Item_func* func= (Item_func*) cond;
|
|
if (func->functype() != Item_func::EQUAL_FUNC &&
|
|
func->functype() != Item_func::EQ_FUNC)
|
|
return 0;
|
|
Item *left_item= ((Item_func*) cond)->arguments()[0];
|
|
Item *right_item= ((Item_func*) cond)->arguments()[1];
|
|
if (equal(left_item, comp_item, comp_field))
|
|
{
|
|
if (test_if_equality_guarantees_uniqueness (left_item, right_item))
|
|
{
|
|
if (*const_item)
|
|
return right_item->eq(*const_item, 1);
|
|
*const_item=right_item;
|
|
return 1;
|
|
}
|
|
}
|
|
else if (equal(right_item, comp_item, comp_field))
|
|
{
|
|
if (test_if_equality_guarantees_uniqueness (right_item, left_item))
|
|
{
|
|
if (*const_item)
|
|
return left_item->eq(*const_item, 1);
|
|
*const_item=left_item;
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
Create internal temporary table
|
|
****************************************************************************/
|
|
|
|
Field *Item::create_tmp_field_int(MEM_ROOT *root, TABLE *table,
|
|
uint convert_int_length)
|
|
{
|
|
const Type_handler *h= &type_handler_slong;
|
|
if (max_char_length() > convert_int_length)
|
|
h= &type_handler_slonglong;
|
|
if (unsigned_flag)
|
|
h= h->type_handler_unsigned();
|
|
return h->make_and_init_table_field(root, &name, Record_addr(maybe_null()),
|
|
*this, table);
|
|
}
|
|
|
|
Field *Item::tmp_table_field_from_field_type_maybe_null(MEM_ROOT *root,
|
|
TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param,
|
|
bool is_explicit_null)
|
|
{
|
|
/*
|
|
item->type() == CONST_ITEM excluded due to making fields for counter
|
|
With help of Item_uint
|
|
*/
|
|
DBUG_ASSERT(!param->make_copy_field() || type() == CONST_ITEM);
|
|
DBUG_ASSERT(!is_result_field());
|
|
Field *result;
|
|
if ((result= tmp_table_field_from_field_type(root, table)))
|
|
{
|
|
if (result && is_explicit_null)
|
|
result->is_created_from_null_item= true;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_sum::create_tmp_field(MEM_ROOT *root, bool group, TABLE *table)
|
|
{
|
|
Field *UNINIT_VAR(new_field);
|
|
|
|
switch (cmp_type()) {
|
|
case REAL_RESULT:
|
|
{
|
|
new_field= new (root)
|
|
Field_double(max_char_length(), maybe_null(), &name, decimals, TRUE);
|
|
break;
|
|
}
|
|
case INT_RESULT:
|
|
case TIME_RESULT:
|
|
case DECIMAL_RESULT:
|
|
case STRING_RESULT:
|
|
new_field= tmp_table_field_from_field_type(root, table);
|
|
break;
|
|
case ROW_RESULT:
|
|
// This case should never be chosen
|
|
DBUG_ASSERT(0);
|
|
new_field= 0;
|
|
break;
|
|
}
|
|
if (new_field)
|
|
new_field->init(table);
|
|
return new_field;
|
|
}
|
|
|
|
|
|
/**
|
|
Create a temporary field for Item_field (or its descendant),
|
|
either direct or referenced by an Item_ref.
|
|
|
|
param->modify_item is set when we create a field for an internal temporary
|
|
table. In this case we have to ensure the new field name is identical to
|
|
the original field name as the field will info will be sent to the client.
|
|
In other cases, the field name is set from orig_item or name if org_item is
|
|
not set.
|
|
*/
|
|
|
|
Field *
|
|
Item_field::create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table,
|
|
Item_ref *orig_item,
|
|
const Tmp_field_param *param)
|
|
{
|
|
DBUG_ASSERT(!is_result_field());
|
|
Field *result;
|
|
const Lex_ident_column *new_name= (orig_item ? &orig_item->name :
|
|
!param->modify_item() ? &name :
|
|
&field->field_name);
|
|
|
|
/*
|
|
If item have to be able to store NULLs but underlaid field can't do it,
|
|
create_tmp_field_from_field() can't be used for tmp field creation.
|
|
*/
|
|
if (((maybe_null() && in_rollup()) ||
|
|
(new_table->in_use->create_tmp_table_for_derived && /* for mat. view/dt */
|
|
orig_item && orig_item->maybe_null())) &&
|
|
!field->maybe_null())
|
|
{
|
|
/*
|
|
The item the ref points to may have maybe_null flag set while
|
|
the ref doesn't have it. This may happen for outer fields
|
|
when the outer query decided at some point after name resolution phase
|
|
that this field might be null. Take this into account here.
|
|
*/
|
|
Record_addr rec(orig_item ? orig_item->maybe_null() : maybe_null());
|
|
const Type_handler *handler= type_handler()->
|
|
type_handler_for_tmp_table(this);
|
|
result= handler->make_and_init_table_field(root, new_name,
|
|
rec, *this, new_table);
|
|
}
|
|
else if (param->table_cant_handle_bit_fields() &&
|
|
field->type() == MYSQL_TYPE_BIT)
|
|
{
|
|
const Type_handler *handler=
|
|
Type_handler::type_handler_long_or_longlong(max_char_length(), true);
|
|
result= handler->make_and_init_table_field(root, new_name,
|
|
Record_addr(maybe_null()),
|
|
*this, new_table);
|
|
}
|
|
else
|
|
{
|
|
bool tmp_maybe_null= param->modify_item() ? maybe_null() :
|
|
field->maybe_null();
|
|
result= field->create_tmp_field(root, new_table, tmp_maybe_null);
|
|
if (result && ! param->modify_item())
|
|
result->field_name= *new_name;
|
|
}
|
|
if (result && param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_field::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
DBUG_ASSERT(!is_result_field());
|
|
Field *result;
|
|
src->set_field(field);
|
|
if (!(result= create_tmp_field_from_item_field(root, table, NULL, param)))
|
|
return NULL;
|
|
if (!(field->flags & NO_DEFAULT_VALUE_FLAG) &&
|
|
field->eq_def(result))
|
|
src->set_default_field(field);
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_default_value::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
if (field->default_value || (field->flags & BLOB_FLAG))
|
|
{
|
|
/*
|
|
We have to use a copy function when using a blob with default value
|
|
as the we have to calculate the default value before we can use it.
|
|
*/
|
|
get_tmp_field_src(src, param);
|
|
Field *result= tmp_table_field_from_field_type(root, table);
|
|
if (result && param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
/*
|
|
Same code as in Item_field::create_tmp_field_ex, except no default field
|
|
handling
|
|
*/
|
|
src->set_field(field);
|
|
return create_tmp_field_from_item_field(root, table, nullptr, param);
|
|
}
|
|
|
|
|
|
Field *Item_ref::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
Item *item= real_item();
|
|
DBUG_ASSERT(is_result_field());
|
|
if (item->type() == Item::FIELD_ITEM)
|
|
{
|
|
Field *result;
|
|
Item_field *field= (Item_field*) item;
|
|
Tmp_field_param prm2(*param);
|
|
prm2.set_modify_item(false);
|
|
src->set_field(field->field);
|
|
if (!(result= field->create_tmp_field_from_item_field(root, table,
|
|
this, &prm2)))
|
|
return NULL;
|
|
if (param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
return Item_result_field::create_tmp_field_ex(root, table, src, param);
|
|
}
|
|
|
|
|
|
void Item_result_field::get_tmp_field_src(Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
if (param->make_copy_field())
|
|
{
|
|
DBUG_ASSERT(result_field);
|
|
src->set_field(result_field);
|
|
}
|
|
else
|
|
{
|
|
src->set_item_result_field(this); // Save for copy_funcs
|
|
}
|
|
}
|
|
|
|
|
|
Field *
|
|
Item_result_field::create_tmp_field_ex_from_handler(
|
|
MEM_ROOT *root,
|
|
TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param,
|
|
const Type_handler *h)
|
|
{
|
|
/*
|
|
Possible Item types:
|
|
- Item_cache_wrapper (only for CREATE..SELECT ?)
|
|
- Item_func
|
|
- Item_subselect
|
|
*/
|
|
DBUG_ASSERT(fixed());
|
|
DBUG_ASSERT(is_result_field());
|
|
DBUG_ASSERT(type() != NULL_ITEM);
|
|
get_tmp_field_src(src, param);
|
|
Field *result;
|
|
if ((result= h->make_and_init_table_field(root, &name,
|
|
Record_addr(maybe_null()),
|
|
*this, table)) &&
|
|
param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_func_sp::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
Field *result;
|
|
get_tmp_field_src(src, param);
|
|
if ((result= sp_result_field->create_tmp_field(root, table)))
|
|
{
|
|
result->field_name= name;
|
|
if (param->modify_item())
|
|
result_field= result;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
static bool make_json_valid_expr(TABLE *table, Field *field)
|
|
{
|
|
THD *thd= table->in_use;
|
|
Query_arena backup_arena;
|
|
Item *expr, *item_field;
|
|
|
|
if (!table->expr_arena && table->init_expr_arena(thd->mem_root))
|
|
return 1;
|
|
|
|
thd->set_n_backup_active_arena(table->expr_arena, &backup_arena);
|
|
if ((item_field= new (thd->mem_root) Item_field(thd, field)) &&
|
|
(expr= new (thd->mem_root) Item_func_json_valid(thd, item_field)))
|
|
field->check_constraint= add_virtual_expression(thd, expr);
|
|
thd->restore_active_arena(table->expr_arena, &backup_arena);
|
|
return field->check_constraint == NULL;
|
|
}
|
|
|
|
|
|
/**
|
|
Create field for temporary table.
|
|
|
|
@param table Temporary table
|
|
@param item Item to create a field for
|
|
@param type Type of item (normally item->type)
|
|
@param copy_func If set and item is a function, store copy of item
|
|
in this array
|
|
@param from_field if field will be created using other field as example,
|
|
pointer example field will be written here
|
|
@param default_field If field has a default value field, store it here
|
|
@param group 1 if we are going to do a relative group by on result
|
|
@param modify_item 1 if item->result_field should point to new item.
|
|
This is relevent for how fill_record() is going to
|
|
work:
|
|
If modify_item is 1 then fill_record() will update
|
|
the record in the original table.
|
|
If modify_item is 0 then fill_record() will update
|
|
the temporary table
|
|
@param table_cant_handle_bit_fields
|
|
Set to 1 if the temporary table cannot handle bit
|
|
fields. Only set for heap tables when the bit field
|
|
is part of an index.
|
|
@param make_copy_field
|
|
Set when using with rollup when we want to have
|
|
an exact copy of the field.
|
|
@retval
|
|
0 on error
|
|
@retval
|
|
new_created field
|
|
Create a temporary field for Item_field (or its descendant),
|
|
either direct or referenced by an Item_ref.
|
|
*/
|
|
Field *create_tmp_field(TABLE *table, Item *item,
|
|
Item ***copy_func, Field **from_field,
|
|
Field **default_field,
|
|
bool group, bool modify_item,
|
|
bool table_cant_handle_bit_fields,
|
|
bool make_copy_field)
|
|
{
|
|
Tmp_field_src src;
|
|
Tmp_field_param prm(group, modify_item, table_cant_handle_bit_fields,
|
|
make_copy_field);
|
|
Field *result= item->create_tmp_field_ex(table->in_use->mem_root,
|
|
table, &src, &prm);
|
|
if (is_json_type(item) && make_json_valid_expr(table, result))
|
|
result= NULL;
|
|
|
|
*from_field= src.field();
|
|
*default_field= src.default_field();
|
|
if (src.item_result_field())
|
|
*((*copy_func)++)= src.item_result_field();
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Set up column usage bitmaps for a temporary table
|
|
|
|
IMPLEMENTATION
|
|
For temporary tables, we need one bitmap with all columns set and
|
|
a tmp_set bitmap to be used by things like filesort.
|
|
*/
|
|
|
|
void
|
|
setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count)
|
|
{
|
|
uint bitmap_size= bitmap_buffer_size(field_count);
|
|
|
|
DBUG_ASSERT(table->s->virtual_fields == 0);
|
|
|
|
my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->tmp_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->eq_join_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->cond_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->has_value_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
/* write_set and all_set are copies of read_set */
|
|
table->def_write_set= table->def_read_set;
|
|
table->s->all_set= table->def_read_set;
|
|
bitmap_set_all(&table->s->all_set);
|
|
table->default_column_bitmaps();
|
|
}
|
|
|
|
|
|
Create_tmp_table::Create_tmp_table(ORDER *group, bool distinct,
|
|
bool save_sum_fields,
|
|
ulonglong select_options,
|
|
ha_rows rows_limit)
|
|
:m_alloced_field_count(0),
|
|
m_using_unique_constraint(false),
|
|
m_temp_pool_slot(MY_BIT_NONE),
|
|
m_group(group),
|
|
m_distinct(distinct),
|
|
m_save_sum_fields(save_sum_fields),
|
|
m_with_cycle(false),
|
|
m_select_options(select_options),
|
|
m_rows_limit(rows_limit),
|
|
m_group_null_items(0),
|
|
current_counter(other)
|
|
{
|
|
m_field_count[Create_tmp_table::distinct]= 0;
|
|
m_field_count[Create_tmp_table::other]= 0;
|
|
m_null_count[Create_tmp_table::distinct]= 0;
|
|
m_null_count[Create_tmp_table::other]= 0;
|
|
m_blobs_count[Create_tmp_table::distinct]= 0;
|
|
m_blobs_count[Create_tmp_table::other]= 0;
|
|
m_uneven_bit[Create_tmp_table::distinct]= 0;
|
|
m_uneven_bit[Create_tmp_table::other]= 0;
|
|
}
|
|
|
|
|
|
void Create_tmp_table::add_field(TABLE *table, Field *field, uint fieldnr,
|
|
bool force_not_null_cols)
|
|
{
|
|
DBUG_ASSERT(!field->field_name.str ||
|
|
strlen(field->field_name.str) == field->field_name.length);
|
|
|
|
if (force_not_null_cols)
|
|
{
|
|
field->flags|= NOT_NULL_FLAG;
|
|
field->null_ptr= NULL;
|
|
}
|
|
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
m_null_count[current_counter]++;
|
|
|
|
table->s->reclength+= field->pack_length();
|
|
|
|
// Assign it here, before update_data_type_statistics() changes m_blob_count
|
|
if (field->flags & BLOB_FLAG)
|
|
{
|
|
table->s->blob_field[m_blob_count]= fieldnr;
|
|
m_blobs_count[current_counter]++;
|
|
}
|
|
|
|
table->field[fieldnr]= field;
|
|
field->field_index= fieldnr;
|
|
|
|
field->update_data_type_statistics(this);
|
|
}
|
|
|
|
|
|
/**
|
|
Create a temp table according to a field list.
|
|
|
|
Given field pointers are changed to point at tmp_table for
|
|
send_result_set_metadata. The table object is self contained: it's
|
|
allocated in its own memory root, as well as Field objects
|
|
created for table columns.
|
|
This function will replace Item_sum items in 'fields' list with
|
|
corresponding Item_field items, pointing at the fields in the
|
|
temporary table, unless this was prohibited by TRUE
|
|
value of argument save_sum_fields. The Item_field objects
|
|
are created in THD memory root.
|
|
|
|
@param thd thread handle
|
|
@param param a description used as input to create the table
|
|
@param fields list of items that will be used to define
|
|
column types of the table (also see NOTES)
|
|
@param group Create an unique key over all group by fields.
|
|
This is used to retrive the row during
|
|
end_write_group() and update them.
|
|
@param distinct should table rows be distinct
|
|
@param save_sum_fields see NOTES
|
|
@param select_options Optiions for how the select is run.
|
|
See sql_priv.h for a list of options.
|
|
@param rows_limit Maximum number of rows to insert into the
|
|
temporary table
|
|
@param table_alias possible name of the temporary table that can
|
|
be used for name resolving; can be "".
|
|
@param do_not_open only create the TABLE object, do not
|
|
open the table in the engine
|
|
@param keep_row_order rows need to be read in the order they were
|
|
inserted, the engine should preserve this order
|
|
*/
|
|
|
|
TABLE *Create_tmp_table::start(THD *thd,
|
|
TMP_TABLE_PARAM *param,
|
|
const LEX_CSTRING *table_alias)
|
|
{
|
|
MEM_ROOT *mem_root_save, own_root;
|
|
TABLE *table;
|
|
TABLE_SHARE *share;
|
|
uint copy_func_count= param->func_count;
|
|
char *tmpname,path[FN_REFLEN];
|
|
Field **reg_field;
|
|
uint *blob_field;
|
|
key_part_map *const_key_parts;
|
|
/* Treat sum functions as normal ones when loose index scan is used. */
|
|
m_save_sum_fields|= param->precomputed_group_by;
|
|
DBUG_ENTER("Create_tmp_table::start");
|
|
DBUG_PRINT("enter",
|
|
("table_alias: '%s' distinct: %d save_sum_fields: %d "
|
|
"rows_limit: %lu group: %d", table_alias->str,
|
|
(int) m_distinct, (int) m_save_sum_fields,
|
|
(ulong) m_rows_limit, MY_TEST(m_group)));
|
|
|
|
if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
|
|
m_temp_pool_slot = temp_pool_set_next();
|
|
|
|
if (m_temp_pool_slot != MY_BIT_NONE) // we got a slot
|
|
sprintf(path, "%s-%s-%lx-%i", tmp_file_prefix, param->tmp_name,
|
|
current_pid, m_temp_pool_slot);
|
|
else
|
|
{
|
|
/* if we run out of slots or we are not using tempool */
|
|
sprintf(path, "%s-%s-%lx-%llx-%x", tmp_file_prefix, param->tmp_name,
|
|
current_pid, thd->thread_id, thd->tmp_table++);
|
|
}
|
|
|
|
/*
|
|
No need to change table name to lower case as we are only creating
|
|
MyISAM, Aria or HEAP tables here
|
|
*/
|
|
fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
|
|
|
|
if (m_group)
|
|
{
|
|
ORDER **prev= &m_group;
|
|
if (!param->quick_group)
|
|
m_group= 0; // Can't use group key
|
|
else for (ORDER *tmp= m_group ; tmp ; tmp= tmp->next)
|
|
{
|
|
/* Exclude found constant from the list */
|
|
if ((*tmp->item)->const_item())
|
|
{
|
|
*prev= tmp->next;
|
|
param->group_parts--;
|
|
continue;
|
|
}
|
|
else
|
|
prev= &(tmp->next);
|
|
/*
|
|
marker == 4 means two things:
|
|
- store NULLs in the key, and
|
|
- convert BIT fields to 64-bit long, needed because MEMORY tables
|
|
can't index BIT fields.
|
|
*/
|
|
(*tmp->item)->marker= MARKER_NULL_KEY; // Store null in key
|
|
if ((*tmp->item)->too_big_for_varchar())
|
|
m_using_unique_constraint= true;
|
|
}
|
|
if (param->group_length >= MAX_BLOB_WIDTH)
|
|
m_using_unique_constraint= true;
|
|
if (m_group)
|
|
m_distinct= 0; // Can't use distinct
|
|
}
|
|
|
|
m_alloced_field_count= param->field_count+param->func_count+param->sum_func_count;
|
|
DBUG_ASSERT(m_alloced_field_count);
|
|
const uint field_count= m_alloced_field_count;
|
|
|
|
/*
|
|
When loose index scan is employed as access method, it already
|
|
computes all groups and the result of all aggregate functions. We
|
|
make space for the items of the aggregate function in the list of
|
|
functions TMP_TABLE_PARAM::items_to_copy, so that the values of
|
|
these items are stored in the temporary table.
|
|
*/
|
|
if (param->precomputed_group_by)
|
|
copy_func_count+= param->sum_func_count;
|
|
param->copy_func_count= copy_func_count;
|
|
|
|
init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0,
|
|
MYF(MY_THREAD_SPECIFIC));
|
|
|
|
if (!multi_alloc_root(&own_root,
|
|
&table, sizeof(*table),
|
|
&share, sizeof(*share),
|
|
®_field, sizeof(Field*) * (field_count+1),
|
|
&m_default_field, sizeof(Field*) * (field_count),
|
|
&blob_field, sizeof(uint)*(field_count+1),
|
|
&m_from_field, sizeof(Field*)*field_count,
|
|
¶m->items_to_copy,
|
|
sizeof(param->items_to_copy[0])*(copy_func_count+1),
|
|
¶m->keyinfo, sizeof(*param->keyinfo),
|
|
&m_key_part_info,
|
|
sizeof(*m_key_part_info)*(param->group_parts+1),
|
|
¶m->start_recinfo,
|
|
sizeof(*param->start_recinfo)*(field_count*2+4),
|
|
¶m->rec_per_key, sizeof(ulong)*param->group_parts,
|
|
&tmpname, (uint) strlen(path)+1,
|
|
&m_group_buff, (m_group && ! m_using_unique_constraint ?
|
|
param->group_length : 0),
|
|
&m_bitmaps, bitmap_buffer_size(field_count)*6,
|
|
&const_key_parts, sizeof(*const_key_parts),
|
|
NullS))
|
|
{
|
|
DBUG_RETURN(NULL); /* purecov: inspected */
|
|
}
|
|
/* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */
|
|
if (!(param->copy_field= new (thd->mem_root) Copy_field[field_count]))
|
|
{
|
|
free_root(&own_root, MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NULL); /* purecov: inspected */
|
|
}
|
|
strmov(tmpname, path);
|
|
/* make table according to fields */
|
|
|
|
bzero((char*) table,sizeof(*table));
|
|
bzero((char*) reg_field, sizeof(Field*) * (field_count+1));
|
|
bzero((char*) m_default_field, sizeof(Field*) * (field_count));
|
|
bzero((char*) m_from_field, sizeof(Field*) * field_count);
|
|
/* const_key_parts is used in sort_and_filter_keyuse */
|
|
bzero((char*) const_key_parts, sizeof(*const_key_parts));
|
|
|
|
table->mem_root= own_root;
|
|
mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
table->field=reg_field;
|
|
table->const_key_parts= const_key_parts;
|
|
table->alias.set(table_alias->str, table_alias->length, table_alias_charset);
|
|
|
|
table->reginfo.lock_type=TL_WRITE; /* Will be updated */
|
|
table->map=1;
|
|
table->temp_pool_slot= m_temp_pool_slot;
|
|
table->copy_blobs= 1;
|
|
table->in_use= thd;
|
|
table->no_rows_with_nulls= param->force_not_null_cols;
|
|
table->expr_arena= thd;
|
|
|
|
table->s= share;
|
|
init_tmp_table_share(thd, share, "", 0, "(temporary)", tmpname);
|
|
share->blob_field= blob_field;
|
|
share->table_charset= param->table_charset;
|
|
share->primary_key= MAX_KEY; // Indicate no primary key
|
|
if (param->schema_table)
|
|
share->db= Lex_ident_db(INFORMATION_SCHEMA_NAME);
|
|
|
|
param->using_outer_summary_function= 0;
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(table);
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::add_fields(THD *thd,
|
|
TABLE *table,
|
|
TMP_TABLE_PARAM *param,
|
|
List<Item> &fields)
|
|
{
|
|
DBUG_ENTER("Create_tmp_table::add_fields");
|
|
DBUG_ASSERT(table);
|
|
DBUG_ASSERT(table->field);
|
|
DBUG_ASSERT(table->s->blob_field);
|
|
DBUG_ASSERT(table->s->reclength == 0);
|
|
DBUG_ASSERT(table->s->fields == 0);
|
|
DBUG_ASSERT(table->s->blob_fields == 0);
|
|
|
|
const bool not_all_columns= !(m_select_options & TMP_TABLE_ALL_COLUMNS);
|
|
bool distinct_record_structure= m_distinct;
|
|
uint fieldnr= 0;
|
|
TABLE_SHARE *share= table->s;
|
|
Item **copy_func= param->items_to_copy;
|
|
|
|
MEM_ROOT *mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
List_iterator_fast<Item> li(fields);
|
|
Item *item;
|
|
Field **tmp_from_field= m_from_field;
|
|
while (!m_with_cycle && (item= li++))
|
|
if (item->is_in_with_cycle())
|
|
{
|
|
m_with_cycle= true;
|
|
/*
|
|
Following distinct_record_structure is (m_distinct || m_with_cycle)
|
|
|
|
Note: distinct_record_structure can be true even if m_distinct is
|
|
false, for example for incr_table in recursive CTE
|
|
(see select_union_recursive::create_result_table)
|
|
*/
|
|
distinct_record_structure= true;
|
|
}
|
|
li.rewind();
|
|
while ((item=li++))
|
|
{
|
|
uint uneven_delta;
|
|
current_counter= (((param->hidden_field_count < (fieldnr + 1)) &&
|
|
distinct_record_structure &&
|
|
(!m_with_cycle || item->is_in_with_cycle())) ?
|
|
distinct :
|
|
other);
|
|
Item::Type type= item->type();
|
|
if (type == Item::COPY_STR_ITEM)
|
|
{
|
|
item= ((Item_copy *)item)->get_item();
|
|
type= item->type();
|
|
}
|
|
if (not_all_columns)
|
|
{
|
|
if (item->with_sum_func() && type != Item::SUM_FUNC_ITEM)
|
|
{
|
|
if (item->used_tables() & OUTER_REF_TABLE_BIT)
|
|
item->update_used_tables();
|
|
if ((item->real_type() == Item::SUBSELECT_ITEM) ||
|
|
(item->used_tables() & ~OUTER_REF_TABLE_BIT))
|
|
{
|
|
/*
|
|
Mark that the we have ignored an item that refers to a summary
|
|
function. We need to know this if someone is going to use
|
|
DISTINCT on the result.
|
|
*/
|
|
param->using_outer_summary_function=1;
|
|
continue;
|
|
}
|
|
}
|
|
if (item->const_item() &&
|
|
param->hidden_field_count < (fieldnr + 1))
|
|
continue; // We don't have to store this
|
|
}
|
|
if (type == Item::SUM_FUNC_ITEM && !m_group && !m_save_sum_fields)
|
|
{ /* Can't calc group yet */
|
|
Item_sum *sum_item= (Item_sum *) item;
|
|
sum_item->result_field=0;
|
|
for (uint i= 0 ; i < sum_item->get_arg_count() ; i++)
|
|
{
|
|
Item *arg= sum_item->get_arg(i);
|
|
if (!arg->const_item())
|
|
{
|
|
Item *tmp_item;
|
|
Field *new_field=
|
|
create_tmp_field(table, arg, ©_func,
|
|
tmp_from_field, &m_default_field[fieldnr],
|
|
m_group != 0, not_all_columns,
|
|
distinct_record_structure , false);
|
|
if (!new_field)
|
|
goto err; // Should be OOM
|
|
tmp_from_field++;
|
|
|
|
thd->mem_root= mem_root_save;
|
|
if (!(tmp_item= new (thd->mem_root)
|
|
Item_field(thd, new_field)))
|
|
goto err;
|
|
((Item_field*) tmp_item)->set_refers_to_temp_table();
|
|
arg= sum_item->set_arg(i, thd, tmp_item);
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
uneven_delta= m_uneven_bit_length;
|
|
add_field(table, new_field, fieldnr++, param->force_not_null_cols);
|
|
m_field_count[current_counter]++;
|
|
m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta);
|
|
|
|
if (!(new_field->flags & NOT_NULL_FLAG))
|
|
{
|
|
/*
|
|
new_field->maybe_null() is still false, it will be
|
|
changed below. But we have to setup Item_field correctly
|
|
*/
|
|
arg->set_maybe_null();
|
|
}
|
|
if (current_counter == distinct)
|
|
new_field->flags|= FIELD_PART_OF_TMP_UNIQUE;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
The last parameter to create_tmp_field_ex() is a bit tricky:
|
|
|
|
We need to set it to 0 in union, to get fill_record() to modify the
|
|
temporary table.
|
|
We need to set it to 1 on multi-table-update and in select to
|
|
write rows to the temporary table.
|
|
We here distinguish between UNION and multi-table-updates by the fact
|
|
that in the later case group is set to the row pointer.
|
|
|
|
The test for item->marker == MARKER_NULL_KEY is ensure we
|
|
don't create a group-by key over a bit field as heap tables
|
|
can't handle that.
|
|
*/
|
|
DBUG_ASSERT(!param->schema_table);
|
|
Field *new_field=
|
|
create_tmp_field(table, item, ©_func,
|
|
tmp_from_field, &m_default_field[fieldnr],
|
|
m_group != 0,
|
|
!param->force_copy_fields &&
|
|
(not_all_columns || m_group !=0),
|
|
/*
|
|
If item->marker == MARKER_NULL_KEY then we
|
|
force create_tmp_field to create a 64-bit
|
|
longs for BIT fields because HEAP tables
|
|
can't index BIT fields directly. We do the
|
|
same for distinct, as we want the distinct
|
|
index to be usable in this case too.
|
|
*/
|
|
item->marker == MARKER_NULL_KEY ||
|
|
param->bit_fields_as_long,
|
|
param->force_copy_fields);
|
|
if (unlikely(!new_field))
|
|
{
|
|
if (unlikely(thd->is_fatal_error))
|
|
goto err; // Got OOM
|
|
continue; // Some kind of const item
|
|
}
|
|
if (type == Item::SUM_FUNC_ITEM)
|
|
{
|
|
Item_sum *agg_item= (Item_sum *) item;
|
|
/*
|
|
Update the result field only if it has never been set, or if the
|
|
created temporary table is not to be used for subquery
|
|
materialization.
|
|
|
|
The reason is that for subqueries that require
|
|
materialization as part of their plan, we create the
|
|
'external' temporary table needed for IN execution, after
|
|
the 'internal' temporary table needed for grouping. Since
|
|
both the external and the internal temporary tables are
|
|
created for the same list of SELECT fields of the subquery,
|
|
setting 'result_field' for each invocation of
|
|
create_tmp_table overrides the previous value of
|
|
'result_field'.
|
|
|
|
The condition below prevents the creation of the external
|
|
temp table to override the 'result_field' that was set for
|
|
the internal temp table.
|
|
*/
|
|
if (!agg_item->result_field || !param->materialized_subquery)
|
|
agg_item->result_field= new_field;
|
|
}
|
|
tmp_from_field++;
|
|
|
|
uneven_delta= m_uneven_bit_length;
|
|
add_field(table, new_field, fieldnr++, param->force_not_null_cols);
|
|
m_field_count[current_counter]++;
|
|
m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta);
|
|
|
|
if (item->marker == MARKER_NULL_KEY && item->maybe_null())
|
|
{
|
|
m_group_null_items++;
|
|
new_field->flags|= GROUP_FLAG;
|
|
}
|
|
if (current_counter == distinct)
|
|
new_field->flags|= FIELD_PART_OF_TMP_UNIQUE;
|
|
}
|
|
}
|
|
|
|
DBUG_ASSERT(fieldnr == m_field_count[other] + m_field_count[distinct]);
|
|
DBUG_ASSERT(m_blob_count == m_blobs_count[other] + m_blobs_count[distinct]);
|
|
share->fields= fieldnr;
|
|
share->blob_fields= m_blob_count;
|
|
table->field[fieldnr]= 0; // End marker
|
|
share->blob_field[m_blob_count]= 0; // End marker
|
|
copy_func[0]= 0; // End marker
|
|
param->func_count= (uint) (copy_func - param->items_to_copy);
|
|
DBUG_ASSERT(param->func_count <= param->copy_func_count);
|
|
|
|
share->column_bitmap_size= bitmap_buffer_size(share->fields);
|
|
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(false);
|
|
|
|
err:
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::choose_engine(THD *thd, TABLE *table,
|
|
TMP_TABLE_PARAM *param)
|
|
{
|
|
TABLE_SHARE *share= table->s;
|
|
DBUG_ENTER("Create_tmp_table::choose_engine");
|
|
/*
|
|
If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria)
|
|
In the future we should try making storage engine selection more dynamic
|
|
*/
|
|
|
|
if (share->blob_fields || m_using_unique_constraint ||
|
|
(thd->variables.big_tables &&
|
|
!(m_select_options & SELECT_SMALL_RESULT)) ||
|
|
(m_select_options & TMP_TABLE_FORCE_MYISAM) ||
|
|
thd->variables.tmp_memory_table_size == 0)
|
|
{
|
|
share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
|
|
table->file= get_new_handler(share, &table->mem_root,
|
|
share->db_type());
|
|
if (m_group &&
|
|
(param->group_parts > table->file->max_key_parts() ||
|
|
param->group_length > table->file->max_key_length()))
|
|
m_using_unique_constraint= true;
|
|
}
|
|
else
|
|
{
|
|
share->db_plugin= ha_lock_engine(0, heap_hton);
|
|
table->file= get_new_handler(share, &table->mem_root,
|
|
share->db_type());
|
|
}
|
|
DBUG_RETURN(!table->file);
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::finalize(THD *thd,
|
|
TABLE *table,
|
|
TMP_TABLE_PARAM *param,
|
|
bool do_not_open, bool keep_row_order)
|
|
{
|
|
DBUG_ENTER("Create_tmp_table::finalize");
|
|
DBUG_ASSERT(table);
|
|
|
|
uint null_pack_length[2];
|
|
uint null_pack_base[2];
|
|
uint null_counter[2]= {0, 0};
|
|
uint whole_null_pack_length;
|
|
bool use_packed_rows= false;
|
|
bool save_abort_on_warning;
|
|
uchar *pos;
|
|
uchar *null_flags;
|
|
KEY *keyinfo= param->keyinfo;
|
|
TMP_ENGINE_COLUMNDEF *recinfo;
|
|
TABLE_SHARE *share= table->s;
|
|
Copy_field *copy= param->copy_field;
|
|
MEM_ROOT *mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
DBUG_ASSERT(m_alloced_field_count >= share->fields);
|
|
DBUG_ASSERT(m_alloced_field_count >= share->blob_fields);
|
|
|
|
if (choose_engine(thd, table, param))
|
|
goto err;
|
|
|
|
if (table->file->set_ha_share_ref(&share->ha_share))
|
|
{
|
|
delete table->file;
|
|
table->file= 0;
|
|
goto err;
|
|
}
|
|
table->file->set_table(table);
|
|
|
|
if (!m_using_unique_constraint)
|
|
share->reclength+= m_group_null_items; // null flag is stored separately
|
|
|
|
if (share->blob_fields == 0)
|
|
{
|
|
/* We need to ensure that first byte is not 0 for the delete link */
|
|
if (m_field_count[other])
|
|
m_null_count[other]++;
|
|
else
|
|
m_null_count[distinct]++;
|
|
}
|
|
|
|
null_pack_length[other]= (m_null_count[other] + 7 +
|
|
m_uneven_bit[other]) / 8;
|
|
null_pack_base[other]= 0;
|
|
null_pack_length[distinct]= (m_null_count[distinct] + 7 +
|
|
m_uneven_bit[distinct]) / 8;
|
|
null_pack_base[distinct]= null_pack_length[other];
|
|
whole_null_pack_length= null_pack_length[other] +
|
|
null_pack_length[distinct];
|
|
share->reclength+= whole_null_pack_length;
|
|
if (!share->reclength)
|
|
share->reclength= 1; // Dummy select
|
|
share->stored_rec_length= share->reclength;
|
|
/* Use packed rows if there is blobs or a lot of space to gain */
|
|
if (share->blob_fields ||
|
|
(string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
|
|
(share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS ||
|
|
string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
|
|
use_packed_rows= 1;
|
|
|
|
{
|
|
uint alloc_length= ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1);
|
|
share->rec_buff_length= alloc_length;
|
|
if (!(table->record[0]= (uchar*)
|
|
alloc_root(&table->mem_root, alloc_length*3)))
|
|
goto err;
|
|
table->record[1]= table->record[0]+alloc_length;
|
|
share->default_values= table->record[1]+alloc_length;
|
|
}
|
|
|
|
setup_tmp_table_column_bitmaps(table, m_bitmaps, table->s->fields);
|
|
|
|
recinfo=param->start_recinfo;
|
|
null_flags=(uchar*) table->record[0];
|
|
pos=table->record[0]+ whole_null_pack_length;
|
|
if (whole_null_pack_length)
|
|
{
|
|
bzero((uchar*) recinfo,sizeof(*recinfo));
|
|
recinfo->type=FIELD_NORMAL;
|
|
recinfo->length= whole_null_pack_length;
|
|
recinfo++;
|
|
bfill(null_flags, whole_null_pack_length, 255); // Set null fields
|
|
|
|
table->null_flags= (uchar*) table->record[0];
|
|
share->null_fields= m_null_count[other] + m_null_count[distinct];
|
|
share->null_bytes= share->null_bytes_for_compare= whole_null_pack_length;
|
|
}
|
|
|
|
if (share->blob_fields == 0)
|
|
{
|
|
null_counter[(m_field_count[other] ? other : distinct)]++;
|
|
}
|
|
|
|
/* Protect against warnings in field_conv() in the next loop*/
|
|
save_abort_on_warning= thd->abort_on_warning;
|
|
thd->abort_on_warning= 0;
|
|
|
|
for (uint i= 0; i < share->fields; i++, recinfo++)
|
|
{
|
|
Field *field= table->field[i];
|
|
uint length;
|
|
bzero((uchar*) recinfo,sizeof(*recinfo));
|
|
|
|
current_counter= ((field->flags & FIELD_PART_OF_TMP_UNIQUE) ?
|
|
distinct :
|
|
other);
|
|
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
{
|
|
recinfo->null_bit= (uint8)1 << (null_counter[current_counter] & 7);
|
|
recinfo->null_pos= (null_pack_base[current_counter] +
|
|
null_counter[current_counter]/8);
|
|
field->move_field(pos, null_flags + recinfo->null_pos, recinfo->null_bit);
|
|
null_counter[current_counter]++;
|
|
}
|
|
else
|
|
field->move_field(pos,(uchar*) 0,0);
|
|
if (field->type() == MYSQL_TYPE_BIT)
|
|
{
|
|
/* We have to reserve place for extra bits among null bits */
|
|
((Field_bit*) field)->set_bit_ptr(null_flags +
|
|
null_pack_base[current_counter] +
|
|
null_counter[current_counter]/8,
|
|
null_counter[current_counter] & 7);
|
|
null_counter[current_counter]+= (field->field_length & 7);
|
|
}
|
|
field->reset();
|
|
|
|
/*
|
|
Test if there is a default field value. The test for ->ptr is to skip
|
|
'offset' fields generated by initialize_tables
|
|
*/
|
|
if (m_default_field[i] && m_default_field[i]->ptr)
|
|
{
|
|
/*
|
|
default_field[i] is set only in the cases when 'field' can
|
|
inherit the default value that is defined for the field referred
|
|
by the Item_field object from which 'field' has been created.
|
|
*/
|
|
Field *orig_field= m_default_field[i];
|
|
/* Get the value from default_values */
|
|
if (orig_field->is_null_in_record(orig_field->table->s->default_values))
|
|
field->set_null();
|
|
else
|
|
{
|
|
/*
|
|
Copy default value. We have to use field_conv() for copy, instead of
|
|
memcpy(), because bit_fields may be stored differently.
|
|
But otherwise we copy as is, in particular, ignore NO_ZERO_DATE, etc
|
|
*/
|
|
Use_relaxed_field_copy urfc(thd);
|
|
my_ptrdiff_t ptr_diff= (orig_field->table->s->default_values -
|
|
orig_field->table->record[0]);
|
|
field->set_notnull();
|
|
orig_field->move_field_offset(ptr_diff);
|
|
field_conv(field, orig_field);
|
|
orig_field->move_field_offset(-ptr_diff);
|
|
}
|
|
}
|
|
|
|
if (m_from_field[i])
|
|
{ /* Not a table Item */
|
|
copy->set(field, m_from_field[i], m_save_sum_fields);
|
|
copy++;
|
|
}
|
|
length=field->pack_length_in_rec();
|
|
pos+= length;
|
|
|
|
/* Make entry for create table */
|
|
recinfo->length=length;
|
|
recinfo->type= field->tmp_engine_column_type(use_packed_rows);
|
|
|
|
// fix table name in field entry
|
|
field->set_table_name(&table->alias);
|
|
}
|
|
/* Handle group_null_items */
|
|
bzero(pos, table->s->reclength - (pos - table->record[0]));
|
|
MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
|
|
|
|
thd->abort_on_warning= save_abort_on_warning;
|
|
param->copy_field_end= copy;
|
|
param->recinfo= recinfo; // Pointer to after last field
|
|
store_record(table,s->default_values); // Make empty default record
|
|
|
|
if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0) // No limit
|
|
share->max_rows= ~(ha_rows) 0;
|
|
else
|
|
share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
|
|
MY_MIN(thd->variables.tmp_memory_table_size,
|
|
thd->variables.max_heap_table_size) :
|
|
thd->variables.tmp_disk_table_size) /
|
|
share->reclength);
|
|
set_if_bigger(share->max_rows,1); // For dummy start options
|
|
/*
|
|
Push the LIMIT clause to the temporary table creation, so that we
|
|
materialize only up to 'rows_limit' records instead of all result records.
|
|
*/
|
|
set_if_smaller(share->max_rows, m_rows_limit);
|
|
param->end_write_records= m_rows_limit;
|
|
|
|
if (m_group)
|
|
{
|
|
DBUG_PRINT("info",("Creating group key in temporary table"));
|
|
table->group= m_group; /* Table is grouped by key */
|
|
param->group_buff= m_group_buff;
|
|
share->keys=1;
|
|
table->key_info= table->s->key_info= keyinfo;
|
|
table->keys_in_use_for_query.set_bit(0);
|
|
share->keys_in_use.set_bit(0);
|
|
keyinfo->key_part= m_key_part_info;
|
|
keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
|
|
if (m_using_unique_constraint)
|
|
keyinfo->flags|= HA_UNIQUE_HASH;
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
|
keyinfo->usable_key_parts=keyinfo->user_defined_key_parts=
|
|
param->group_parts;
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
share->ext_key_parts= share->key_parts= keyinfo->ext_key_parts;
|
|
keyinfo->key_length=0;
|
|
keyinfo->rec_per_key= param->rec_per_key;
|
|
keyinfo->read_stats= NULL;
|
|
keyinfo->collected_stats= NULL;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
|
keyinfo->is_statistics_from_stat_tables= FALSE;
|
|
keyinfo->name= group_key;
|
|
keyinfo->comment.str= 0;
|
|
ORDER *cur_group= m_group;
|
|
for (; cur_group ; cur_group= cur_group->next, m_key_part_info++)
|
|
{
|
|
Field *field=(*cur_group->item)->get_tmp_table_field();
|
|
DBUG_ASSERT(field->table == table);
|
|
bool maybe_null=(*cur_group->item)->maybe_null();
|
|
m_key_part_info->null_bit=0;
|
|
m_key_part_info->field= field;
|
|
m_key_part_info->fieldnr= field->field_index + 1;
|
|
if (cur_group == m_group)
|
|
field->key_start.set_bit(0);
|
|
m_key_part_info->offset= field->offset(table->record[0]);
|
|
m_key_part_info->length= (uint16) field->key_length();
|
|
m_key_part_info->type= (uint8) field->key_type();
|
|
m_key_part_info->key_type =
|
|
((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
|
|
0 : FIELDFLAG_BINARY;
|
|
m_key_part_info->key_part_flag= 0;
|
|
if (!m_using_unique_constraint)
|
|
{
|
|
cur_group->buff=(char*) m_group_buff;
|
|
|
|
if (maybe_null && !field->null_bit)
|
|
{
|
|
/*
|
|
This can only happen in the unusual case where an outer join
|
|
table was found to be not-nullable by the optimizer and we
|
|
the item can't really be null.
|
|
We solve this by marking the item as !maybe_null to ensure
|
|
that the key,field and item definition match.
|
|
*/
|
|
maybe_null= 0;
|
|
(*cur_group->item)->base_flags&= ~item_base_t::MAYBE_NULL;
|
|
}
|
|
|
|
if (!(cur_group->field= field->new_key_field(thd->mem_root,table,
|
|
m_group_buff +
|
|
MY_TEST(maybe_null),
|
|
m_key_part_info->length,
|
|
field->null_ptr,
|
|
field->null_bit)))
|
|
goto err; /* purecov: inspected */
|
|
|
|
if (maybe_null)
|
|
{
|
|
/*
|
|
To be able to group on NULL, we reserved place in group_buff
|
|
for the NULL flag just before the column. (see above).
|
|
The field data is after this flag.
|
|
The NULL flag is updated in 'end_update()' and 'end_write()'
|
|
*/
|
|
keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL
|
|
m_key_part_info->null_bit=field->null_bit;
|
|
m_key_part_info->null_offset= (uint) (field->null_ptr -
|
|
(uchar*) table->record[0]);
|
|
cur_group->buff++; // Pointer to field data
|
|
m_group_buff++; // Skipp null flag
|
|
}
|
|
m_group_buff+= cur_group->field->pack_length();
|
|
}
|
|
keyinfo->key_length+= m_key_part_info->length;
|
|
}
|
|
/*
|
|
Ensure we didn't overrun the group buffer. The < is only true when
|
|
some maybe_null fields was changed to be not null fields.
|
|
*/
|
|
DBUG_ASSERT(m_using_unique_constraint ||
|
|
m_group_buff <= param->group_buff + param->group_length);
|
|
}
|
|
|
|
if (m_distinct && (share->fields != param->hidden_field_count ||
|
|
m_with_cycle))
|
|
{
|
|
uint i;
|
|
Field **reg_field;
|
|
/*
|
|
Create an unique key or an unique constraint over all columns
|
|
that should be in the result. In the temporary table, there are
|
|
'param->hidden_field_count' extra columns, whose null bits are stored
|
|
in the first 'hidden_null_pack_length' bytes of the row.
|
|
*/
|
|
DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
|
|
|
|
keyinfo->flags= 0;
|
|
if (m_blobs_count[distinct])
|
|
{
|
|
/*
|
|
Special mode for index creation in MyISAM used to support unique
|
|
indexes on blobs with arbitrary length. Such indexes cannot be
|
|
used for lookups.
|
|
*/
|
|
keyinfo->flags|= HA_UNIQUE_HASH;
|
|
}
|
|
keyinfo->user_defined_key_parts= m_field_count[distinct] +
|
|
((keyinfo->flags & HA_UNIQUE_HASH) ?
|
|
MY_TEST(null_pack_length[distinct]) : 0);
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts;
|
|
table->distinct= 1;
|
|
share->keys= 1;
|
|
share->ext_key_parts= share->key_parts= keyinfo->ext_key_parts;
|
|
if (!(m_key_part_info= (KEY_PART_INFO*)
|
|
alloc_root(&table->mem_root,
|
|
keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO))))
|
|
goto err;
|
|
bzero((void*) m_key_part_info, keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO));
|
|
table->keys_in_use_for_query.set_bit(0);
|
|
share->keys_in_use.set_bit(0);
|
|
table->key_info= table->s->key_info= keyinfo;
|
|
keyinfo->key_part= m_key_part_info;
|
|
keyinfo->flags|= (HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY |
|
|
HA_PACK_KEY);
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
|
keyinfo->key_length= 0; // Will compute the sum of the parts below.
|
|
keyinfo->name= distinct_key;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
|
keyinfo->is_statistics_from_stat_tables= FALSE;
|
|
keyinfo->read_stats= NULL;
|
|
keyinfo->collected_stats= NULL;
|
|
|
|
/*
|
|
Needed by non-merged semi-joins: SJ-Materialized table must have a valid
|
|
rec_per_key array, because it participates in join optimization. Since
|
|
the table has no data, the only statistics we can provide is "unknown",
|
|
i.e. zero values.
|
|
|
|
(For table record count, we calculate and set JOIN_TAB::found_records,
|
|
see get_delayed_table_estimates()).
|
|
*/
|
|
size_t rpk_size= keyinfo->user_defined_key_parts * sizeof(keyinfo->rec_per_key[0]);
|
|
if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root,
|
|
rpk_size)))
|
|
goto err;
|
|
bzero(keyinfo->rec_per_key, rpk_size);
|
|
|
|
/*
|
|
Create an extra field to hold NULL bits so that unique indexes on
|
|
blobs can distinguish NULL from 0. This extra field is not needed
|
|
when we do not use UNIQUE indexes for blobs.
|
|
*/
|
|
if (null_pack_length[distinct] && (keyinfo->flags & HA_UNIQUE_HASH))
|
|
{
|
|
m_key_part_info->null_bit=0;
|
|
m_key_part_info->offset= null_pack_base[distinct];
|
|
m_key_part_info->length= null_pack_length[distinct];
|
|
m_key_part_info->field= new Field_string(table->record[0],
|
|
(uint32) m_key_part_info->length,
|
|
(uchar*) 0,
|
|
(uint) 0,
|
|
Field::NONE,
|
|
&null_clex_str, &my_charset_bin);
|
|
if (!m_key_part_info->field)
|
|
goto err;
|
|
m_key_part_info->field->init(table);
|
|
m_key_part_info->key_type=FIELDFLAG_BINARY;
|
|
m_key_part_info->type= HA_KEYTYPE_BINARY;
|
|
m_key_part_info->fieldnr= m_key_part_info->field->field_index + 1;
|
|
m_key_part_info++;
|
|
}
|
|
/* Create a distinct key over the columns we are going to return */
|
|
for (i= param->hidden_field_count, reg_field= table->field + i ;
|
|
i < share->fields;
|
|
i++, reg_field++)
|
|
{
|
|
if (!((*reg_field)->flags & FIELD_PART_OF_TMP_UNIQUE))
|
|
continue;
|
|
m_key_part_info->field= *reg_field;
|
|
(*reg_field)->flags |= PART_KEY_FLAG;
|
|
if (m_key_part_info == keyinfo->key_part)
|
|
(*reg_field)->key_start.set_bit(0);
|
|
m_key_part_info->null_bit= (*reg_field)->null_bit;
|
|
m_key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
|
|
(uchar*) table->record[0]);
|
|
|
|
m_key_part_info->offset= (*reg_field)->offset(table->record[0]);
|
|
m_key_part_info->length= (uint16) (*reg_field)->pack_length();
|
|
m_key_part_info->fieldnr= (*reg_field)->field_index + 1;
|
|
/* TODO:
|
|
The below method of computing the key format length of the
|
|
key part is a copy/paste from opt_range.cc, and table.cc.
|
|
This should be factored out, e.g. as a method of Field.
|
|
In addition it is not clear if any of the Field::*_length
|
|
methods is supposed to compute the same length. If so, it
|
|
might be reused.
|
|
*/
|
|
m_key_part_info->store_length= m_key_part_info->length;
|
|
|
|
if ((*reg_field)->real_maybe_null())
|
|
{
|
|
m_key_part_info->store_length+= HA_KEY_NULL_LENGTH;
|
|
m_key_part_info->key_part_flag |= HA_NULL_PART;
|
|
}
|
|
m_key_part_info->key_part_flag|= (*reg_field)->key_part_flag();
|
|
m_key_part_info->store_length+= (*reg_field)->key_part_length_bytes();
|
|
keyinfo->key_length+= m_key_part_info->store_length;
|
|
|
|
m_key_part_info->type= (uint8) (*reg_field)->key_type();
|
|
m_key_part_info->key_type =
|
|
((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
|
|
0 : FIELDFLAG_BINARY;
|
|
|
|
m_key_part_info++;
|
|
}
|
|
}
|
|
if (share->keys)
|
|
keyinfo->index_flags= table->file->index_flags(0, 0, 1);
|
|
|
|
if (unlikely(thd->is_fatal_error)) // If end of memory
|
|
goto err; /* purecov: inspected */
|
|
share->db_record_offset= 1;
|
|
table->used_for_duplicate_elimination= (param->sum_func_count == 0 &&
|
|
(table->group || table->distinct));
|
|
table->keep_row_order= keep_row_order;
|
|
|
|
if (!do_not_open)
|
|
{
|
|
if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo,
|
|
¶m->recinfo, m_select_options))
|
|
goto err;
|
|
}
|
|
|
|
/* record[0] and share->default_values should now have been set up */
|
|
MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
|
|
MEM_CHECK_DEFINED(share->default_values, table->s->reclength);
|
|
|
|
empty_record(table);
|
|
table->status= STATUS_NO_RECORD;
|
|
thd->mem_root= mem_root_save;
|
|
|
|
DBUG_RETURN(false);
|
|
|
|
err:
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(true); /* purecov: inspected */
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::add_schema_fields(THD *thd, TABLE *table,
|
|
TMP_TABLE_PARAM *param,
|
|
const ST_SCHEMA_TABLE &schema_table)
|
|
{
|
|
DBUG_ENTER("Create_tmp_table::add_schema_fields");
|
|
DBUG_ASSERT(table);
|
|
DBUG_ASSERT(table->field);
|
|
DBUG_ASSERT(table->s->blob_field);
|
|
DBUG_ASSERT(table->s->reclength == 0);
|
|
DBUG_ASSERT(table->s->fields == 0);
|
|
DBUG_ASSERT(table->s->blob_fields == 0);
|
|
|
|
TABLE_SHARE *share= table->s;
|
|
ST_FIELD_INFO *defs= schema_table.fields_info;
|
|
uint fieldnr;
|
|
MEM_ROOT *mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
for (fieldnr= 0; !defs[fieldnr].end_marker(); fieldnr++)
|
|
{
|
|
const ST_FIELD_INFO &def= defs[fieldnr];
|
|
Record_addr addr(def.nullable());
|
|
const Type_handler *h= def.type_handler();
|
|
Field *field= h->make_schema_field(&table->mem_root, table, addr, def);
|
|
if (!field)
|
|
{
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(true); // EOM
|
|
}
|
|
field->init(table);
|
|
field->flags|= NO_DEFAULT_VALUE_FLAG;
|
|
add_field(table, field, fieldnr, param->force_not_null_cols);
|
|
}
|
|
|
|
share->fields= fieldnr;
|
|
share->blob_fields= m_blob_count;
|
|
table->field[fieldnr]= 0; // End marker
|
|
share->blob_field[m_blob_count]= 0; // End marker
|
|
param->func_count= 0;
|
|
share->column_bitmap_size= bitmap_buffer_size(share->fields);
|
|
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
void Create_tmp_table::cleanup_on_failure(THD *thd, TABLE *table)
|
|
{
|
|
if (table)
|
|
free_tmp_table(thd, table);
|
|
if (m_temp_pool_slot != MY_BIT_NONE)
|
|
temp_pool_clear_bit(m_temp_pool_slot);
|
|
}
|
|
|
|
|
|
TABLE *create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
|
|
ORDER *group, bool distinct, bool save_sum_fields,
|
|
ulonglong select_options, ha_rows rows_limit,
|
|
const LEX_CSTRING *table_alias, bool do_not_open,
|
|
bool keep_row_order)
|
|
{
|
|
TABLE *table;
|
|
Create_tmp_table maker(group, distinct, save_sum_fields, select_options,
|
|
rows_limit);
|
|
if (!(table= maker.start(thd, param, table_alias)) ||
|
|
maker.add_fields(thd, table, param, fields) ||
|
|
maker.finalize(thd, table, param, do_not_open, keep_row_order))
|
|
{
|
|
maker.cleanup_on_failure(thd, table);
|
|
return NULL;
|
|
}
|
|
return table;
|
|
}
|
|
|
|
|
|
TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param,
|
|
const ST_SCHEMA_TABLE &schema_table,
|
|
longlong select_options,
|
|
const LEX_CSTRING &table_alias,
|
|
bool do_not_open, bool keep_row_order)
|
|
{
|
|
TABLE *table;
|
|
Create_tmp_table maker((ORDER *) NULL, false, false,
|
|
select_options, HA_ROWS_MAX);
|
|
if (!(table= maker.start(thd, param, &table_alias)) ||
|
|
maker.add_schema_fields(thd, table, param, schema_table) ||
|
|
maker.finalize(thd, table, param, do_not_open, keep_row_order))
|
|
{
|
|
maker.cleanup_on_failure(thd, table);
|
|
return NULL;
|
|
}
|
|
return table;
|
|
}
|
|
|
|
|
|
/****************************************************************************/
|
|
|
|
void *Virtual_tmp_table::operator new(size_t size, THD *thd) throw()
|
|
{
|
|
return (Virtual_tmp_table *) alloc_root(thd->mem_root, size);
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::init(uint field_count)
|
|
{
|
|
uint *blob_field;
|
|
uchar *bitmaps;
|
|
DBUG_ENTER("Virtual_tmp_table::init");
|
|
if (!multi_alloc_root(in_use->mem_root,
|
|
&s, sizeof(*s),
|
|
&field, (field_count + 1) * sizeof(Field*),
|
|
&blob_field, (field_count + 1) * sizeof(uint),
|
|
&bitmaps, bitmap_buffer_size(field_count) * 6,
|
|
NullS))
|
|
DBUG_RETURN(true);
|
|
s->reset();
|
|
s->blob_field= blob_field;
|
|
setup_tmp_table_column_bitmaps(this, bitmaps, field_count);
|
|
m_alloced_field_count= field_count;
|
|
DBUG_RETURN(false);
|
|
};
|
|
|
|
|
|
bool Virtual_tmp_table::add(List<Spvar_definition> &field_list)
|
|
{
|
|
/* Create all fields and calculate the total length of record */
|
|
Spvar_definition *cdef; /* column definition */
|
|
List_iterator_fast<Spvar_definition> it(field_list);
|
|
DBUG_ENTER("Virtual_tmp_table::add");
|
|
while ((cdef= it++))
|
|
{
|
|
Field *tmp;
|
|
Record_addr addr(f_maybe_null(cdef->pack_flag));
|
|
if (!(tmp= cdef->make_field(s, in_use->mem_root, &addr, &cdef->field_name)))
|
|
DBUG_RETURN(true);
|
|
add(tmp);
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
void Virtual_tmp_table::setup_field_pointers()
|
|
{
|
|
uchar *null_pos= record[0];
|
|
uchar *field_pos= null_pos + s->null_bytes;
|
|
uint null_bit= 1;
|
|
|
|
for (Field **cur_ptr= field; *cur_ptr; ++cur_ptr)
|
|
{
|
|
Field *cur_field= *cur_ptr;
|
|
if ((cur_field->flags & NOT_NULL_FLAG))
|
|
cur_field->move_field(field_pos);
|
|
else
|
|
{
|
|
cur_field->move_field(field_pos, (uchar*) null_pos, null_bit);
|
|
null_bit<<= 1;
|
|
if (null_bit == (uint)1 << 8)
|
|
{
|
|
++null_pos;
|
|
null_bit= 1;
|
|
}
|
|
}
|
|
if (cur_field->key_type() == HA_KEYTYPE_BIT)
|
|
{
|
|
/* This is a Field_bit since key_type is HA_KEYTYPE_BIT */
|
|
DBUG_ASSERT(cur_field->type() == MYSQL_TYPE_BIT);
|
|
static_cast<Field_bit*>(cur_field)->set_bit_ptr(null_pos, null_bit);
|
|
null_bit+= cur_field->field_length & 7;
|
|
if (null_bit > 7)
|
|
{
|
|
null_pos++;
|
|
null_bit-= 8;
|
|
}
|
|
}
|
|
cur_field->reset();
|
|
field_pos+= cur_field->pack_length();
|
|
}
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::open()
|
|
{
|
|
// Make sure that we added all the fields we planned to:
|
|
DBUG_ASSERT(s->fields == m_alloced_field_count);
|
|
field[s->fields]= NULL; // mark the end of the list
|
|
s->blob_field[s->blob_fields]= 0; // mark the end of the list
|
|
|
|
uint null_pack_length= (s->null_fields + 7) / 8; // NULL-bit array length
|
|
s->reclength+= null_pack_length;
|
|
s->rec_buff_length= ALIGN_SIZE(s->reclength + 1);
|
|
if (!(record[0]= (uchar*) in_use->alloc(s->rec_buff_length)))
|
|
return true;
|
|
if (null_pack_length)
|
|
{
|
|
null_flags= (uchar*) record[0];
|
|
s->null_bytes= s->null_bytes_for_compare= null_pack_length;
|
|
}
|
|
setup_field_pointers();
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::sp_find_field_by_name(uint *idx,
|
|
const LEX_CSTRING &name) const
|
|
{
|
|
Field *f;
|
|
for (uint i= 0; (f= field[i]); i++)
|
|
{
|
|
// Use the same comparison style with sp_context::find_variable()
|
|
if (f->field_name.streq(name))
|
|
{
|
|
*idx= i;
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool
|
|
Virtual_tmp_table::sp_find_field_by_name_or_error(uint *idx,
|
|
const LEX_CSTRING &var_name,
|
|
const LEX_CSTRING &field_name)
|
|
const
|
|
{
|
|
if (sp_find_field_by_name(idx, field_name))
|
|
{
|
|
my_error(ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD, MYF(0),
|
|
var_name.str, field_name.str);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::sp_set_all_fields_from_item_list(THD *thd,
|
|
List<Item> &items)
|
|
{
|
|
DBUG_ASSERT(s->fields == items.elements);
|
|
List_iterator<Item> it(items);
|
|
Item *item;
|
|
for (uint i= 0 ; (item= it++) ; i++)
|
|
{
|
|
if (field[i]->sp_prepare_and_store_item(thd, &item))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value)
|
|
{
|
|
DBUG_ASSERT(value->fixed());
|
|
DBUG_ASSERT(value->cols() == s->fields);
|
|
for (uint i= 0; i < value->cols(); i++)
|
|
{
|
|
if (field[i]->sp_prepare_and_store_item(thd, value->addr(i)))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool open_tmp_table(TABLE *table)
|
|
{
|
|
int error;
|
|
if (unlikely((error= table->file->ha_open(table, table->s->path.str, O_RDWR,
|
|
HA_OPEN_TMP_TABLE |
|
|
HA_OPEN_INTERNAL_TABLE |
|
|
HA_OPEN_SIZE_TRACKING))))
|
|
{
|
|
table->file->print_error(error, MYF(0)); /* purecov: inspected */
|
|
table->db_stat= 0;
|
|
return 1;
|
|
}
|
|
table->db_stat= HA_OPEN_KEYFILE;
|
|
(void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
|
|
table->file->set_optimizer_costs(table->in_use);
|
|
if (!table->is_created())
|
|
{
|
|
table->set_created();
|
|
table->in_use->inc_status_created_tmp_tables();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
#ifdef USE_ARIA_FOR_TMP_TABLES
|
|
/*
|
|
Create internal (MyISAM or Maria) temporary table
|
|
|
|
SYNOPSIS
|
|
create_internal_tmp_table()
|
|
table Table object that descrimes the table to be created
|
|
keyinfo Description of the index (there is always one index)
|
|
start_recinfo engine's column descriptions
|
|
recinfo INOUT End of engine's column descriptions
|
|
options Option bits
|
|
|
|
DESCRIPTION
|
|
Create an internal emporary table according to passed description. The is
|
|
assumed to have one unique index or constraint.
|
|
|
|
The passed array or TMP_ENGINE_COLUMNDEF structures must have this form:
|
|
|
|
1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
|
|
when there are many nullable columns)
|
|
2. Table columns
|
|
3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here)
|
|
|
|
This function may use the free element to create hash column for unique
|
|
constraint.
|
|
|
|
RETURN
|
|
FALSE - OK
|
|
TRUE - Error. my_error() have been called
|
|
*/
|
|
|
|
|
|
bool create_internal_tmp_table(TABLE *table, KEY *org_keyinfo,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
ulonglong options)
|
|
{
|
|
int error;
|
|
MARIA_KEYDEF *keydefs= 0, *keydef;
|
|
MARIA_UNIQUEDEF uniquedef;
|
|
TABLE_SHARE *share= table->s;
|
|
MARIA_CREATE_INFO create_info;
|
|
bool use_unique= false;
|
|
DBUG_ENTER("create_internal_tmp_table");
|
|
|
|
if (share->keys)
|
|
{ // Get keys for ni_create
|
|
HA_KEYSEG *seg;
|
|
DBUG_ASSERT(share->key_parts);
|
|
|
|
if (!(multi_alloc_root(&table->mem_root,
|
|
&seg, sizeof(*seg) * share->key_parts,
|
|
&keydefs, sizeof(*keydefs) * share->keys,
|
|
NullS)))
|
|
goto err;
|
|
keydef= keydefs;
|
|
|
|
bzero(seg, sizeof(*seg) * share->key_parts);
|
|
|
|
/* Note that share->keys may change in the loop ! */
|
|
for (KEY *keyinfo= org_keyinfo, *end_keyinfo= keyinfo + share->keys;
|
|
keyinfo < end_keyinfo ;
|
|
keyinfo++)
|
|
{
|
|
/*
|
|
Note that a similar check is performed during
|
|
subquery_types_allow_materialization. See MDEV-7122 for more details as
|
|
to why. Whenever this changes, it must be updated there as well, for
|
|
all tmp_table engines.
|
|
*/
|
|
if (keyinfo->key_length > table->file->max_key_length() ||
|
|
keyinfo->user_defined_key_parts > table->file->max_key_parts() ||
|
|
(keyinfo->flags & HA_UNIQUE_HASH))
|
|
{
|
|
if (!(keyinfo->flags & (HA_NOSAME | HA_UNIQUE_HASH)))
|
|
{
|
|
my_error(ER_INTERNAL_ERROR, MYF(0),
|
|
"Using too big key for internal temp tables");
|
|
DBUG_RETURN(1);
|
|
}
|
|
/* Can't create a key; Make a unique constraint instead of a key */
|
|
share->keys--;
|
|
share->key_parts-= keyinfo->user_defined_key_parts;
|
|
share->ext_key_parts-= keyinfo->ext_key_parts;
|
|
use_unique= true;
|
|
bzero((char*) &uniquedef,sizeof(uniquedef));
|
|
uniquedef.keysegs= keyinfo->user_defined_key_parts;
|
|
uniquedef.seg=seg;
|
|
uniquedef.null_are_equal=1;
|
|
keyinfo->flags|= HA_UNIQUE_HASH;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNIQUE_HASH;
|
|
|
|
/* Create extra column for hash value */
|
|
bzero((uchar*) *recinfo,sizeof(**recinfo));
|
|
(*recinfo)->type= FIELD_CHECK;
|
|
(*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
|
|
(*recinfo)++;
|
|
|
|
/* Avoid warnings from valgrind */
|
|
bzero(table->record[0]+ share->reclength, MARIA_UNIQUE_HASH_LENGTH);
|
|
bzero(share->default_values+ share->reclength,
|
|
MARIA_UNIQUE_HASH_LENGTH);
|
|
share->reclength+= MARIA_UNIQUE_HASH_LENGTH;
|
|
}
|
|
else
|
|
{
|
|
/* Create a key */
|
|
bzero((char*) keydef,sizeof(*keydef));
|
|
/*
|
|
We are using a GROUP BY on something that contains NULL
|
|
In this case we have to tell Aria that two NULL should
|
|
on INSERT be regarded at the same value.
|
|
*/
|
|
keydef->flag= (keyinfo->flags & HA_NOSAME) | HA_NULL_ARE_EQUAL;
|
|
keydef->keysegs= keyinfo->user_defined_key_parts;
|
|
keydef->seg= seg;
|
|
keydef++;
|
|
}
|
|
for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++)
|
|
{
|
|
Field *field=keyinfo->key_part[i].field;
|
|
seg->flag= 0;
|
|
seg->language= field->charset()->number;
|
|
seg->length= keyinfo->key_part[i].length;
|
|
seg->start= keyinfo->key_part[i].offset;
|
|
if (field->flags & BLOB_FLAG)
|
|
{
|
|
seg->type=
|
|
((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
|
|
HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
|
|
seg->bit_start= (uint8)(field->pack_length() -
|
|
portable_sizeof_char_ptr);
|
|
seg->flag= HA_BLOB_PART;
|
|
seg->length=0; // Whole blob in unique constraint
|
|
}
|
|
else
|
|
{
|
|
seg->type= keyinfo->key_part[i].type;
|
|
/* Tell handler if it can do suffic space compression */
|
|
if (field->real_type() == MYSQL_TYPE_STRING &&
|
|
keyinfo->key_part[i].length > 32)
|
|
seg->flag|= HA_SPACE_PACK;
|
|
}
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
{
|
|
seg->null_bit= field->null_bit;
|
|
seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
|
|
}
|
|
}
|
|
keyinfo->index_flags= table->file->index_flags(0, 0, 1);
|
|
}
|
|
}
|
|
bzero((char*) &create_info,sizeof(create_info));
|
|
create_info.data_file_length= table->in_use->variables.tmp_disk_table_size;
|
|
|
|
/*
|
|
The logic for choosing the record format:
|
|
The STATIC_RECORD format is the fastest one, because it's so simple,
|
|
so we use this by default for short rows.
|
|
BLOCK_RECORD caches both row and data, so this is generally faster than
|
|
DYNAMIC_RECORD. The one exception is when we write to tmp table and
|
|
want to use keys for duplicate elimination as with BLOCK RECORD
|
|
we first write the row, then check for key conflicts and then we have to
|
|
delete the row. The cases when this can happen is when there is
|
|
a group by and no sum functions or if distinct is used.
|
|
*/
|
|
{
|
|
enum data_file_type file_type= table->no_rows ? NO_RECORD :
|
|
(share->reclength < 64 && !share->blob_fields ? STATIC_RECORD :
|
|
table->used_for_duplicate_elimination ? DYNAMIC_RECORD : BLOCK_RECORD);
|
|
uint create_flags= HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE |
|
|
(table->keep_row_order ? HA_PRESERVE_INSERT_ORDER : 0);
|
|
|
|
if (file_type != NO_RECORD && encrypt_tmp_disk_tables)
|
|
{
|
|
/* encryption is only supported for BLOCK_RECORD */
|
|
file_type= BLOCK_RECORD;
|
|
if (table->used_for_duplicate_elimination)
|
|
{
|
|
/*
|
|
sql-layer expect the last column to be stored/restored also
|
|
when it's null.
|
|
|
|
This is probably a bug (that sql-layer doesn't annotate
|
|
the column as not-null) but both heap, aria-static, aria-dynamic and
|
|
myisam has this property. aria-block_record does not since it
|
|
does not store null-columns at all.
|
|
Emulate behaviour by making column not-nullable when creating the
|
|
table.
|
|
*/
|
|
uint cols= (uint)(*recinfo-start_recinfo);
|
|
start_recinfo[cols-1].null_bit= 0;
|
|
}
|
|
}
|
|
|
|
if (unlikely((error= maria_create(share->path.str, file_type, share->keys,
|
|
keydefs, (uint) (*recinfo-start_recinfo),
|
|
start_recinfo, use_unique, &uniquedef,
|
|
&create_info, create_flags))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
table->db_stat=0;
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
table->in_use->inc_status_created_tmp_disk_tables();
|
|
table->in_use->inc_status_created_tmp_tables();
|
|
share->db_record_offset= 1;
|
|
table->set_created();
|
|
DBUG_RETURN(0);
|
|
err:
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
#else
|
|
|
|
/*
|
|
Create internal (MyISAM or Maria) temporary table
|
|
|
|
SYNOPSIS
|
|
create_internal_tmp_table()
|
|
table Table object that descrimes the table to be created
|
|
keyinfo Description of the index (there is always one index)
|
|
start_recinfo engine's column descriptions
|
|
recinfo INOUT End of engine's column descriptions
|
|
options Option bits
|
|
|
|
DESCRIPTION
|
|
Create an internal emporary table according to passed description. The is
|
|
assumed to have one unique index or constraint.
|
|
|
|
The passed array or TMP_ENGINE_COLUMNDEF structures must have this form:
|
|
|
|
1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
|
|
when there are many nullable columns)
|
|
2. Table columns
|
|
3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here)
|
|
|
|
This function may use the free element to create hash column for unique
|
|
constraint.
|
|
|
|
RETURN
|
|
FALSE - OK
|
|
TRUE - Error ; my_error() has been called.
|
|
*/
|
|
|
|
/* Create internal MyISAM temporary table */
|
|
|
|
bool create_internal_tmp_table(TABLE *table, KEY *org_keyinfo,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
ulonglong options)
|
|
{
|
|
int error;
|
|
MI_KEYDEF keydef;
|
|
MI_UNIQUEDEF uniquedef;
|
|
TABLE_SHARE *share= table->s;
|
|
DBUG_ENTER("create_internal_tmp_table");
|
|
|
|
if (share->keys)
|
|
{ // Get keys for ni_create
|
|
bool using_unique_constraint=0;
|
|
HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
|
|
sizeof(*seg) *
|
|
share->user_defined_key_parts);
|
|
if (!seg)
|
|
goto err;
|
|
|
|
bzero(seg, sizeof(*seg) * share->user_defined_key_parts);
|
|
/*
|
|
Note that a similar check is performed during
|
|
subquery_types_allow_materialization. See MDEV-7122 for more details as
|
|
to why. Whenever this changes, it must be updated there as well, for
|
|
all tmp_table engines.
|
|
*/
|
|
if (keyinfo->key_length > table->file->max_key_length() ||
|
|
keyinfo->user_defined_key_parts > table->file->max_key_parts() ||
|
|
share->uniques)
|
|
{
|
|
/* Can't create a key; Make a unique constraint instead of a key */
|
|
share->keys= 0;
|
|
share->key_parts= share->ext_key_parts= 0;
|
|
share->uniques= 1;
|
|
using_unique_constraint=1;
|
|
bzero((char*) &uniquedef,sizeof(uniquedef));
|
|
uniquedef.keysegs=keyinfo->user_defined_key_parts;
|
|
uniquedef.seg=seg;
|
|
uniquedef.null_are_equal=1;
|
|
|
|
/* Create extra column for hash value */
|
|
bzero((uchar*) *recinfo,sizeof(**recinfo));
|
|
(*recinfo)->type= FIELD_CHECK;
|
|
(*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
|
|
(*recinfo)++;
|
|
/* Avoid warnings from valgrind */
|
|
bzero(table->record[0]+ share->reclength, MI_UNIQUE_HASH_LENGTH);
|
|
bzero(share->default_values+ share->reclength, MI_UNIQUE_HASH_LENGTH);
|
|
share->reclength+= MI_UNIQUE_HASH_LENGTH;
|
|
}
|
|
else
|
|
{
|
|
/* Create an unique key */
|
|
bzero((char*) &keydef,sizeof(keydef));
|
|
keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY |
|
|
HA_PACK_KEY);
|
|
keydef.keysegs= keyinfo->user_defined_key_parts;
|
|
keydef.seg= seg;
|
|
}
|
|
for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++)
|
|
{
|
|
Field *field=keyinfo->key_part[i].field;
|
|
seg->flag= 0;
|
|
seg->language= field->charset()->number;
|
|
seg->length= keyinfo->key_part[i].length;
|
|
seg->start= keyinfo->key_part[i].offset;
|
|
if (field->flags & BLOB_FLAG)
|
|
{
|
|
seg->type=
|
|
((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
|
|
HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
|
|
seg->bit_start= (uint8)(field->pack_length() - portable_sizeof_char_ptr);
|
|
seg->flag= HA_BLOB_PART;
|
|
seg->length=0; // Whole blob in unique constraint
|
|
}
|
|
else
|
|
{
|
|
seg->type= keyinfo->key_part[i].type;
|
|
/* Tell handler if it can do suffic space compression */
|
|
if (field->real_type() == MYSQL_TYPE_STRING &&
|
|
keyinfo->key_part[i].length > 4)
|
|
seg->flag|= HA_SPACE_PACK;
|
|
}
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
{
|
|
seg->null_bit= field->null_bit;
|
|
seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
|
|
/*
|
|
We are using a GROUP BY on something that contains NULL
|
|
In this case we have to tell MyISAM that two NULL should
|
|
on INSERT be regarded at the same value
|
|
*/
|
|
if (!using_unique_constraint)
|
|
keydef.flag|= HA_NULL_ARE_EQUAL;
|
|
}
|
|
}
|
|
if (share->keys)
|
|
keyinfo->index_flags= table->file->index_flags(0, 0, 1);
|
|
}
|
|
MI_CREATE_INFO create_info;
|
|
bzero((char*) &create_info,sizeof(create_info));
|
|
create_info.data_file_length= table->in_use->variables.tmp_disk_table_size;
|
|
|
|
if (unlikely((error= mi_create(share->path.str, share->keys, &keydef,
|
|
(uint) (*recinfo-start_recinfo),
|
|
start_recinfo,
|
|
share->uniques, &uniquedef,
|
|
&create_info,
|
|
HA_CREATE_TMP_TABLE |
|
|
HA_CREATE_INTERNAL_TABLE |
|
|
((share->db_create_options &
|
|
HA_OPTION_PACK_RECORD) ?
|
|
HA_PACK_RECORD : 0)
|
|
))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
table->db_stat=0;
|
|
goto err;
|
|
}
|
|
table->in_use->inc_status_created_tmp_disk_tables();
|
|
table->in_use->inc_status_created_tmp_tables();
|
|
share->db_record_offset= 1;
|
|
table->set_created();
|
|
DBUG_RETURN(0);
|
|
err:
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
#endif /* USE_ARIA_FOR_TMP_TABLES */
|
|
|
|
|
|
/*
|
|
If a HEAP table gets full, create a internal table in MyISAM or Maria
|
|
and copy all rows to this
|
|
|
|
In case of error, my_error() or handler::print_error() will be called.
|
|
Note that in case of error, table->file->ha_rnd_end() may have been called!
|
|
*/
|
|
|
|
|
|
bool
|
|
create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
int error,
|
|
bool ignore_last_dupp_key_error,
|
|
bool *is_duplicate)
|
|
{
|
|
TABLE new_table;
|
|
TABLE_SHARE share;
|
|
const char *save_proc_info;
|
|
int write_err= 0;
|
|
String tmp_alias;
|
|
DBUG_ENTER("create_internal_tmp_table_from_heap");
|
|
if (is_duplicate)
|
|
*is_duplicate= FALSE;
|
|
|
|
if (table->s->db_type() != heap_hton || error != HA_ERR_RECORD_FILE_FULL)
|
|
{
|
|
/*
|
|
We don't want this error to be converted to a warning, e.g. in case of
|
|
INSERT IGNORE ... SELECT.
|
|
*/
|
|
table->file->print_error(error, MYF(ME_FATAL));
|
|
DBUG_RETURN(1);
|
|
}
|
|
new_table= *table;
|
|
share= *table->s;
|
|
new_table.s= &share;
|
|
new_table.s->db_plugin= ha_lock_engine(thd, TMP_ENGINE_HTON);
|
|
if (unlikely(!(new_table.file= get_new_handler(&share, &new_table.mem_root,
|
|
TMP_ENGINE_HTON))))
|
|
DBUG_RETURN(1); // End of memory
|
|
|
|
if (unlikely(new_table.file->set_ha_share_ref(&share.ha_share)))
|
|
{
|
|
delete new_table.file;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
save_proc_info=thd->proc_info;
|
|
THD_STAGE_INFO(thd, stage_converting_heap_to_myisam);
|
|
|
|
new_table.no_rows= table->no_rows;
|
|
if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
|
|
recinfo,
|
|
thd->lex->first_select_lex()->options |
|
|
thd->variables.option_bits))
|
|
goto err2;
|
|
if (open_tmp_table(&new_table))
|
|
{
|
|
TMP_ENGINE_HTON->drop_table(TMP_ENGINE_HTON, new_table.s->path.str);
|
|
goto err2;
|
|
}
|
|
if (table->file->indexes_are_disabled())
|
|
new_table.file->ha_disable_indexes(key_map(0), false);
|
|
table->file->ha_index_or_rnd_end();
|
|
if (table->file->ha_rnd_init_with_error(1))
|
|
DBUG_RETURN(1);
|
|
if (new_table.no_rows)
|
|
new_table.file->extra(HA_EXTRA_NO_ROWS);
|
|
else
|
|
{
|
|
/* update table->file->stats.records */
|
|
table->file->info(HA_STATUS_VARIABLE);
|
|
new_table.file->ha_start_bulk_insert(table->file->stats.records);
|
|
}
|
|
|
|
/*
|
|
copy all old rows from heap table to MyISAM table
|
|
This is the only code that uses record[1] to read/write but this
|
|
is safe as this is a temporary MyISAM table without timestamp/autoincrement
|
|
or partitioning.
|
|
*/
|
|
while (!table->file->ha_rnd_next(new_table.record[1]))
|
|
{
|
|
write_err= new_table.file->ha_write_tmp_row(new_table.record[1]);
|
|
DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
|
|
if (write_err)
|
|
goto err;
|
|
if (unlikely(thd->check_killed()))
|
|
goto err_killed;
|
|
}
|
|
if (!new_table.no_rows && (write_err= new_table.file->ha_end_bulk_insert()))
|
|
goto err;
|
|
/* copy row that filled HEAP table */
|
|
if (unlikely((write_err=new_table.file->ha_write_tmp_row(table->record[0]))))
|
|
{
|
|
if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
|
|
!ignore_last_dupp_key_error)
|
|
goto err;
|
|
if (is_duplicate)
|
|
*is_duplicate= TRUE;
|
|
}
|
|
else
|
|
{
|
|
if (is_duplicate)
|
|
*is_duplicate= FALSE;
|
|
}
|
|
|
|
/* remove heap table and change to use myisam table */
|
|
(void) table->file->ha_rnd_end();
|
|
(void) table->file->ha_close(); // This deletes the table !
|
|
delete table->file;
|
|
table->file=0;
|
|
plugin_unlock(0, table->s->db_plugin);
|
|
share.db_plugin= my_plugin_lock(0, share.db_plugin);
|
|
new_table.s= table->s; // Keep old share
|
|
|
|
/*
|
|
The following work with alias has to be done as new_table.alias() may have
|
|
been reallocated and we want to keep the original one.
|
|
*/
|
|
tmp_alias.move(table->alias);
|
|
*table= new_table;
|
|
table->alias.move(tmp_alias);
|
|
new_table.alias.free();
|
|
/* Get the new share */
|
|
*table->s= share;
|
|
|
|
table->file->change_table_ptr(table, table->s);
|
|
table->use_all_columns();
|
|
if (save_proc_info)
|
|
thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
|
|
"Copying to tmp table on disk" : save_proc_info));
|
|
DBUG_RETURN(0);
|
|
|
|
err:
|
|
DBUG_PRINT("error",("Got error: %d",write_err));
|
|
table->file->print_error(write_err, MYF(0));
|
|
err_killed:
|
|
(void) table->file->ha_rnd_end();
|
|
(void) new_table.file->drop_table(new_table.s->path.str);
|
|
err2:
|
|
delete new_table.file;
|
|
thd_proc_info(thd, save_proc_info);
|
|
table->mem_root= new_table.mem_root;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
void
|
|
free_tmp_table(THD *thd, TABLE *entry)
|
|
{
|
|
MEM_ROOT own_root= entry->mem_root;
|
|
const char *save_proc_info;
|
|
DBUG_ENTER("free_tmp_table");
|
|
DBUG_PRINT("enter",("table: %s alias: %s",entry->s->table_name.str,
|
|
entry->alias.c_ptr()));
|
|
|
|
save_proc_info=thd->proc_info;
|
|
THD_STAGE_INFO(thd, stage_removing_tmp_table);
|
|
|
|
if (entry->file && entry->is_created())
|
|
{
|
|
if (entry->db_stat)
|
|
{
|
|
/* The table was properly opened in open_tmp_table() */
|
|
entry->file->ha_index_or_rnd_end();
|
|
entry->file->info(HA_STATUS_VARIABLE);
|
|
thd->tmp_tables_size+= (entry->file->stats.data_file_length +
|
|
entry->file->stats.index_file_length);
|
|
}
|
|
/*
|
|
This is an internal temporary table, we should not call ha_drop_table()
|
|
as it will mark the transaction read/write
|
|
*/
|
|
DBUG_ASSERT(entry->s->tmp_table == SYSTEM_TMP_TABLE ||
|
|
entry->s->tmp_table == INTERNAL_TMP_TABLE);
|
|
entry->file->drop_table(entry->s->path.str);
|
|
delete entry->file;
|
|
entry->file= NULL;
|
|
entry->reset_created();
|
|
}
|
|
|
|
/* free blobs */
|
|
for (Field **ptr=entry->field ; *ptr ; ptr++)
|
|
(*ptr)->free();
|
|
|
|
if (entry->temp_pool_slot != MY_BIT_NONE)
|
|
temp_pool_clear_bit(entry->temp_pool_slot);
|
|
|
|
plugin_unlock(0, entry->s->db_plugin);
|
|
entry->alias.free();
|
|
|
|
if (entry->pos_in_table_list && entry->pos_in_table_list->table)
|
|
{
|
|
DBUG_ASSERT(entry->pos_in_table_list->table == entry);
|
|
entry->pos_in_table_list->table= NULL;
|
|
}
|
|
|
|
free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
|
|
thd_proc_info(thd, save_proc_info);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Set write_func of AGGR_OP object
|
|
|
|
@param join_tab JOIN_TAB of the corresponding tmp table
|
|
|
|
@details
|
|
Function sets up write_func according to how AGGR_OP object that
|
|
is attached to the given join_tab will be used in the query.
|
|
*/
|
|
|
|
void set_postjoin_aggr_write_func(JOIN_TAB *tab)
|
|
{
|
|
JOIN *join= tab->join;
|
|
TABLE *table= tab->table;
|
|
AGGR_OP *aggr= tab->aggr;
|
|
TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param;
|
|
|
|
DBUG_ASSERT(table && aggr);
|
|
|
|
if (table->group && tmp_tbl->sum_func_count &&
|
|
!tmp_tbl->precomputed_group_by)
|
|
{
|
|
/*
|
|
Note for MyISAM tmp tables: if uniques is true keys won't be
|
|
created.
|
|
*/
|
|
if (table->s->keys && !table->s->have_unique_constraint())
|
|
{
|
|
DBUG_PRINT("info",("Using end_update"));
|
|
aggr->set_write_func(end_update);
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info",("Using end_unique_update"));
|
|
aggr->set_write_func(end_unique_update);
|
|
}
|
|
}
|
|
else if (join->sort_and_group && !tmp_tbl->precomputed_group_by &&
|
|
!join->sort_and_group_aggr_tab && join->tables_list &&
|
|
join->top_join_tab_count)
|
|
{
|
|
DBUG_PRINT("info",("Using end_write_group"));
|
|
aggr->set_write_func(end_write_group);
|
|
join->sort_and_group_aggr_tab= tab;
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info",("Using end_write"));
|
|
aggr->set_write_func(end_write);
|
|
if (tmp_tbl->precomputed_group_by)
|
|
{
|
|
/*
|
|
A preceding call to create_tmp_table in the case when loose
|
|
index scan is used guarantees that
|
|
TMP_TABLE_PARAM::items_to_copy has enough space for the group
|
|
by functions. It is OK here to use memcpy since we copy
|
|
Item_sum pointers into an array of Item pointers.
|
|
*/
|
|
memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count,
|
|
join->sum_funcs,
|
|
sizeof(Item*)*tmp_tbl->sum_func_count);
|
|
tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
@details
|
|
Rows produced by a join sweep may end up in a temporary table or be sent
|
|
to a client. Set the function of the nested loop join algorithm which
|
|
handles final fully constructed and matched records.
|
|
|
|
@param join join to setup the function for.
|
|
|
|
@return
|
|
end_select function to use. This function can't fail.
|
|
*/
|
|
|
|
Next_select_func setup_end_select_func(JOIN *join)
|
|
{
|
|
TMP_TABLE_PARAM *tmp_tbl= &join->tmp_table_param;
|
|
|
|
/*
|
|
Choose method for presenting result to user. Use end_send_group
|
|
if the query requires grouping (has a GROUP BY clause and/or one or
|
|
more aggregate functions). Use end_send if the query should not
|
|
be grouped.
|
|
*/
|
|
if (join->sort_and_group && !tmp_tbl->precomputed_group_by)
|
|
{
|
|
DBUG_PRINT("info",("Using end_send_group"));
|
|
return end_send_group;
|
|
}
|
|
DBUG_PRINT("info",("Using end_send"));
|
|
return end_send;
|
|
}
|
|
|
|
|
|
/**
|
|
Make a join of all tables and write it on socket or to table.
|
|
|
|
@retval
|
|
0 if ok
|
|
@retval
|
|
1 if error is sent
|
|
@retval
|
|
-1 if error should be sent
|
|
*/
|
|
|
|
static int
|
|
do_select(JOIN *join, Procedure *procedure)
|
|
{
|
|
int rc= 0;
|
|
enum_nested_loop_state error= NESTED_LOOP_OK;
|
|
uint top_level_tables= join->exec_join_tab_cnt();
|
|
DBUG_ENTER("do_select");
|
|
|
|
if (join->pushdown_query)
|
|
{
|
|
/* Select fields are in the temporary table */
|
|
join->fields= &join->tmp_fields_list1;
|
|
/* Setup HAVING to work with fields in temporary table */
|
|
join->set_items_ref_array(join->items1);
|
|
/* The storage engine will take care of the group by query result */
|
|
int res= join->pushdown_query->execute(join);
|
|
|
|
if (res)
|
|
DBUG_RETURN(res);
|
|
|
|
if (join->pushdown_query->store_data_in_temp_table)
|
|
{
|
|
JOIN_TAB *last_tab= join->join_tab + top_level_tables;
|
|
last_tab->next_select= end_send;
|
|
last_tab->cached_pfs_batch_update= last_tab->pfs_batch_update();
|
|
|
|
enum_nested_loop_state state= last_tab->aggr->end_send();
|
|
if (state >= NESTED_LOOP_OK)
|
|
state= sub_select(join, last_tab, true);
|
|
|
|
if (state < NESTED_LOOP_OK)
|
|
res= 1;
|
|
|
|
if (join->result->send_eof())
|
|
res= 1;
|
|
}
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
join->procedure= procedure;
|
|
join->duplicate_rows= join->send_records=0;
|
|
|
|
if (join->only_const_tables() && !join->need_tmp)
|
|
{
|
|
Next_select_func end_select= setup_end_select_func(join);
|
|
|
|
/*
|
|
HAVING will be checked after processing aggregate functions,
|
|
But WHERE should checked here (we alredy have read tables).
|
|
Notice that make_join_select() splits all conditions in this case
|
|
into two groups exec_const_cond and outer_ref_cond.
|
|
If join->table_count == join->const_tables then it is
|
|
sufficient to check only the condition pseudo_bits_cond.
|
|
*/
|
|
DBUG_ASSERT(join->outer_ref_cond == NULL);
|
|
if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int())
|
|
{
|
|
// HAVING will be checked by end_select
|
|
error= (*end_select)(join, 0, 0);
|
|
if (error >= NESTED_LOOP_OK)
|
|
error= (*end_select)(join, 0, 1);
|
|
|
|
/*
|
|
If we don't go through evaluate_join_record(), do the counting
|
|
here. join->send_records is increased on success in end_send(),
|
|
so we don't touch it here.
|
|
*/
|
|
join->thd->inc_examined_row_count_fast();
|
|
}
|
|
else if (join->send_row_on_empty_set())
|
|
{
|
|
table_map cleared_tables= (table_map) 0;
|
|
if (end_select == end_send_group)
|
|
{
|
|
/*
|
|
Was a grouping query but we did not find any rows. In this case
|
|
we clear all tables to get null in any referenced fields,
|
|
like in case of:
|
|
SELECT MAX(a) AS f1, a AS f2 FROM t1 WHERE VALUE(a) IS NOT NULL
|
|
*/
|
|
clear_tables(join, &cleared_tables);
|
|
}
|
|
if (!join->having || join->having->val_int())
|
|
{
|
|
List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
|
|
join->fields);
|
|
rc= join->result->send_data_with_check(*columns_list,
|
|
join->unit, 0) > 0;
|
|
}
|
|
/*
|
|
We have to remove the null markings from the tables as this table
|
|
may be part of a sub query that is re-evaluated
|
|
*/
|
|
if (cleared_tables)
|
|
unclear_tables(join, &cleared_tables);
|
|
}
|
|
/*
|
|
An error can happen when evaluating the conds
|
|
(the join condition and piece of where clause
|
|
relevant to this join table).
|
|
*/
|
|
if (unlikely(join->thd->is_error()))
|
|
error= NESTED_LOOP_ERROR;
|
|
}
|
|
else
|
|
{
|
|
DBUG_EXECUTE_IF("show_explain_probe_do_select",
|
|
if (dbug_user_var_equals_int(join->thd,
|
|
"show_explain_probe_select_id",
|
|
join->select_lex->select_number))
|
|
dbug_serve_apcs(join->thd, 1);
|
|
);
|
|
|
|
/*
|
|
We have to update the cached_pfs_batch_update as
|
|
join_tab->select_cond may have changed.
|
|
|
|
This can happen in case of group by where some sub queries are not
|
|
needed anymore. This is checked by main.ps
|
|
*/
|
|
if (top_level_tables)
|
|
join->join_tab[top_level_tables-1].cached_pfs_batch_update=
|
|
join->join_tab[top_level_tables-1].pfs_batch_update();
|
|
|
|
JOIN_TAB *join_tab= join->join_tab +
|
|
(join->tables_list ? join->const_tables : 0);
|
|
if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
|
|
error= NESTED_LOOP_NO_MORE_ROWS;
|
|
else
|
|
error= join->first_select(join,join_tab,0);
|
|
if (error >= NESTED_LOOP_OK && likely(join->thd->killed != ABORT_QUERY))
|
|
error= join->first_select(join,join_tab,1);
|
|
}
|
|
|
|
join->thd->limit_found_rows= join->send_records - join->duplicate_rows;
|
|
|
|
if (error == NESTED_LOOP_NO_MORE_ROWS ||
|
|
unlikely(join->thd->killed == ABORT_QUERY))
|
|
error= NESTED_LOOP_OK;
|
|
|
|
/*
|
|
For "order by with limit", we cannot rely on send_records, but need
|
|
to use the rowcount read originally into the join_tab applying the
|
|
filesort. There cannot be any post-filtering conditions, nor any
|
|
following join_tabs in this case, so this rowcount properly represents
|
|
the correct number of qualifying rows.
|
|
*/
|
|
if (join->order)
|
|
{
|
|
// Save # of found records prior to cleanup
|
|
JOIN_TAB *sort_tab;
|
|
JOIN_TAB *join_tab= join->join_tab;
|
|
uint const_tables= join->const_tables;
|
|
|
|
// Take record count from first non constant table or from last tmp table
|
|
if (join->aggr_tables > 0)
|
|
sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1;
|
|
else
|
|
{
|
|
DBUG_ASSERT(!join->only_const_tables());
|
|
sort_tab= join_tab + const_tables;
|
|
}
|
|
if (sort_tab->filesort &&
|
|
join->select_options & OPTION_FOUND_ROWS &&
|
|
sort_tab->filesort->sortorder &&
|
|
sort_tab->filesort->limit != HA_POS_ERROR)
|
|
{
|
|
join->thd->limit_found_rows= sort_tab->records;
|
|
}
|
|
}
|
|
|
|
{
|
|
/*
|
|
The following will unlock all cursors if the command wasn't an
|
|
update command
|
|
*/
|
|
join->join_free(); // Unlock all cursors
|
|
}
|
|
if (error == NESTED_LOOP_OK)
|
|
{
|
|
/*
|
|
Sic: this branch works even if rc != 0, e.g. when
|
|
send_data above returns an error.
|
|
*/
|
|
if (unlikely(join->result->send_eof()))
|
|
rc= 1; // Don't send error
|
|
DBUG_PRINT("info",("%ld records output", (long) join->send_records));
|
|
}
|
|
else
|
|
rc= -1;
|
|
#ifndef DBUG_OFF
|
|
if (rc)
|
|
{
|
|
DBUG_PRINT("error",("Error: do_select() failed"));
|
|
}
|
|
#endif
|
|
rc= join->thd->is_error() ? -1 : rc;
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Instantiates temporary table
|
|
|
|
@param table Table object that describes the table to be
|
|
instantiated
|
|
@param keyinfo Description of the index (there is always one index)
|
|
@param start_recinfo Column descriptions
|
|
@param recinfo INOUT End of column descriptions
|
|
@param options Option bits
|
|
|
|
@details
|
|
Creates tmp table and opens it.
|
|
|
|
@return
|
|
FALSE - OK
|
|
TRUE - Error
|
|
*/
|
|
|
|
bool instantiate_tmp_table(TABLE *table, KEY *keyinfo,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
ulonglong options)
|
|
{
|
|
DBUG_ASSERT(table->s->keys == 0 || table->key_info == keyinfo);
|
|
DBUG_ASSERT(table->s->keys <= 1);
|
|
if (table->s->db_type() == TMP_ENGINE_HTON)
|
|
{
|
|
/*
|
|
If it is not heap (in-memory) table then convert index to unique
|
|
constrain.
|
|
*/
|
|
MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
|
|
if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo,
|
|
options))
|
|
return TRUE;
|
|
// Make empty record so random data is not written to disk
|
|
empty_record(table);
|
|
table->status= STATUS_NO_RECORD;
|
|
}
|
|
if (open_tmp_table(table))
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Accumulate rows of the result of an aggregation operation in a tmp table
|
|
|
|
@param join pointer to the structure providing all context info for the query
|
|
@param join_tab the JOIN_TAB object to which the operation is attached
|
|
@param end_records TRUE <=> all records were accumulated, send them further
|
|
|
|
@details
|
|
This function accumulates records of the aggreagation operation for
|
|
the node join_tab from the execution plan in a tmp table. To add a new
|
|
record the function calls join_tab->aggr->put_records.
|
|
When there is no more records to save, in this
|
|
case the end_of_records argument == true, function tells the operation to
|
|
send records further by calling aggr->send_records().
|
|
When all records are sent this function passes 'end_of_records' signal
|
|
further by calling sub_select() with end_of_records argument set to
|
|
true. After that aggr->end_send() is called to tell the operation that
|
|
it could end internal buffer scan.
|
|
|
|
@note
|
|
This function is not expected to be called when dynamic range scan is
|
|
used to scan join_tab because range scans aren't used for tmp tables.
|
|
|
|
@return
|
|
return one of enum_nested_loop_state.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
enum_nested_loop_state rc;
|
|
AGGR_OP *aggr= join_tab->aggr;
|
|
|
|
/* This function cannot be called if join_tab has no associated aggregation */
|
|
DBUG_ASSERT(aggr != NULL);
|
|
|
|
DBUG_ENTER("sub_select_aggr_tab");
|
|
|
|
if (join->thd->killed)
|
|
{
|
|
/* The user has aborted the execution of the query */
|
|
join->thd->send_kill_message();
|
|
DBUG_RETURN(NESTED_LOOP_KILLED);
|
|
}
|
|
|
|
if (end_of_records)
|
|
{
|
|
rc= aggr->end_send();
|
|
if (rc >= NESTED_LOOP_OK)
|
|
rc= sub_select(join, join_tab, end_of_records);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
rc= aggr->put_record();
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
Fill the join buffer with partial records, retrieve all full matches for
|
|
them
|
|
|
|
SYNOPSIS
|
|
sub_select_cache()
|
|
join pointer to the structure providing all context info for the
|
|
query
|
|
join_tab the first next table of the execution plan to be retrieved
|
|
end_records true when we need to perform final steps of the retrieval
|
|
|
|
DESCRIPTION
|
|
For a given table Ti= join_tab from the sequence of tables of the chosen
|
|
execution plan T1,...,Ti,...,Tn the function just put the partial record
|
|
t1,...,t[i-1] into the join buffer associated with table Ti unless this
|
|
is the last record added into the buffer. In this case, the function
|
|
additionally finds all matching full records for all partial
|
|
records accumulated in the buffer, after which it cleans the buffer up.
|
|
If a partial join record t1,...,ti is extended utilizing a dynamic
|
|
range scan then it is not put into the join buffer. Rather all matching
|
|
records are found for it at once by the function sub_select.
|
|
|
|
NOTES
|
|
The function implements the algorithmic schema for both Blocked Nested
|
|
Loop Join and Batched Key Access Join. The difference can be seen only at
|
|
the level of of the implementation of the put_record and join_records
|
|
virtual methods for the cache object associated with the join_tab.
|
|
The put_record method accumulates records in the cache, while the
|
|
join_records method builds all matching join records and send them into
|
|
the output stream.
|
|
|
|
RETURN
|
|
return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
enum_nested_loop_state rc;
|
|
JOIN_CACHE *cache= join_tab->cache;
|
|
int err;
|
|
DBUG_ENTER("sub_select_cache");
|
|
|
|
/*
|
|
This function cannot be called if join_tab has no associated join
|
|
buffer
|
|
*/
|
|
DBUG_ASSERT(cache != NULL);
|
|
|
|
join_tab->cache->reset_join(join);
|
|
|
|
if (end_of_records)
|
|
{
|
|
rc= cache->join_records(FALSE);
|
|
if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
|
|
rc == NESTED_LOOP_QUERY_LIMIT)
|
|
rc= sub_select(join, join_tab, end_of_records);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
/* The user has aborted the execution of the query */
|
|
DBUG_RETURN(NESTED_LOOP_KILLED);
|
|
}
|
|
join_tab->jbuf_loops_tracker->on_scan_init();
|
|
|
|
if (!(err= test_if_use_dynamic_range_scan(join_tab)))
|
|
{
|
|
if (!cache->put_record())
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
/*
|
|
We has decided that after the record we've just put into the buffer
|
|
won't add any more records. Now try to find all the matching
|
|
extensions for all records in the buffer.
|
|
*/
|
|
rc= cache->join_records(FALSE);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
if (err < 0)
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
/*
|
|
TODO: Check whether we really need the call below and we can't do
|
|
without it. If it's not the case remove it.
|
|
*/
|
|
rc= cache->join_records(TRUE);
|
|
if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
|
|
rc == NESTED_LOOP_QUERY_LIMIT)
|
|
rc= sub_select(join, join_tab, end_of_records);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/**
|
|
Retrieve records ends with a given beginning from the result of a join.
|
|
|
|
For a given partial join record consisting of records from the tables
|
|
preceding the table join_tab in the execution plan, the function
|
|
retrieves all matching full records from the result set and
|
|
send them to the result set stream.
|
|
|
|
@note
|
|
The function effectively implements the final (n-k) nested loops
|
|
of nested loops join algorithm, where k is the ordinal number of
|
|
the join_tab table and n is the total number of tables in the join query.
|
|
It performs nested loops joins with all conjunctive predicates from
|
|
the where condition pushed as low to the tables as possible.
|
|
E.g. for the query
|
|
@code
|
|
SELECT * FROM t1,t2,t3
|
|
WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
|
|
@endcode
|
|
the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1,
|
|
given the selected plan prescribes to nest retrievals of the
|
|
joined tables in the following order: t1,t2,t3.
|
|
A pushed down predicate are attached to the table which it pushed to,
|
|
at the field join_tab->select_cond.
|
|
When executing a nested loop of level k the function runs through
|
|
the rows of 'join_tab' and for each row checks the pushed condition
|
|
attached to the table.
|
|
If it is false the function moves to the next row of the
|
|
table. If the condition is true the function recursively executes (n-k-1)
|
|
remaining embedded nested loops.
|
|
The situation becomes more complicated if outer joins are involved in
|
|
the execution plan. In this case the pushed down predicates can be
|
|
checked only at certain conditions.
|
|
Suppose for the query
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a
|
|
WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
|
|
@endcode
|
|
the optimizer has chosen a plan with the table order t1,t2,t3.
|
|
The predicate P1=t1>2 will be pushed down to the table t1, while the
|
|
predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table
|
|
t2. But the second predicate can not be unconditionally tested right
|
|
after a row from t2 has been read. This can be done only after the
|
|
first row with t3.a=t1.a has been encountered.
|
|
Thus, the second predicate P2 is supplied with a guarded value that are
|
|
stored in the field 'found' of the first inner table for the outer join
|
|
(table t2). When the first row with t3.a=t1.a for the current row
|
|
of table t1 appears, the value becomes true. For now on the predicate
|
|
is evaluated immediately after the row of table t2 has been read.
|
|
When the first row with t3.a=t1.a has been encountered all
|
|
conditions attached to the inner tables t2,t3 must be evaluated.
|
|
Only when all of them are true the row is sent to the output stream.
|
|
If not, the function returns to the lowest nest level that has a false
|
|
attached condition.
|
|
The predicates from on expressions are also pushed down. If in the
|
|
the above example the on expression were (t3.a=t1.a AND t2.a=t1.a),
|
|
then t1.a=t2.a would be pushed down to table t2, and without any
|
|
guard.
|
|
If after the run through all rows of table t2, the first inner table
|
|
for the outer join operation, it turns out that no matches are
|
|
found for the current row of t1, then current row from table t1
|
|
is complemented by nulls for t2 and t3. Then the pushed down predicates
|
|
are checked for the composed row almost in the same way as it had
|
|
been done for the first row with a match. The only difference is
|
|
the predicates from on expressions are not checked.
|
|
|
|
@par
|
|
@b IMPLEMENTATION
|
|
@par
|
|
The function forms output rows for a current partial join of k
|
|
tables tables recursively.
|
|
For each partial join record ending with a certain row from
|
|
join_tab it calls sub_select that builds all possible matching
|
|
tails from the result set.
|
|
To be able check predicates conditionally items of the class
|
|
Item_func_trig_cond are employed.
|
|
An object of this class is constructed from an item of class COND
|
|
and a pointer to a guarding boolean variable.
|
|
When the value of the guard variable is true the value of the object
|
|
is the same as the value of the predicate, otherwise it's just returns
|
|
true.
|
|
To carry out a return to a nested loop level of join table t the pointer
|
|
to t is remembered in the field 'return_rtab' of the join structure.
|
|
Consider the following query:
|
|
@code
|
|
SELECT * FROM t1,
|
|
LEFT JOIN
|
|
(t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
|
|
ON t4.a=t2.a
|
|
WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)
|
|
@endcode
|
|
Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5
|
|
and suppose for a given joined rows from tables t1,t2,t3 there are
|
|
no rows in the result set yet.
|
|
When first row from t5 that satisfies the on condition
|
|
t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL
|
|
becomes 'activated', as well the predicate t4.a=t2.a. But
|
|
the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until
|
|
t4.a=t2.a becomes true.
|
|
In order not to re-evaluate the predicates that were already evaluated
|
|
as attached pushed down predicates, a pointer to the the first
|
|
most inner unmatched table is maintained in join_tab->first_unmatched.
|
|
Thus, when the first row from t5 with t5.a=t3.a is found
|
|
this pointer for t5 is changed from t4 to t2.
|
|
|
|
@par
|
|
@b STRUCTURE @b NOTES
|
|
@par
|
|
join_tab->first_unmatched points always backwards to the first inner
|
|
table of the embedding nested join, if any.
|
|
|
|
@param join pointer to the structure providing all context info for
|
|
the query
|
|
@param join_tab the first next table of the execution plan to be retrieved
|
|
@param end_records true when we need to perform final steps of retrival
|
|
|
|
@return
|
|
return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
|
|
{
|
|
int error;
|
|
enum_nested_loop_state rc;
|
|
DBUG_ENTER("sub_select");
|
|
|
|
if (join_tab->split_derived_to_update && !end_of_records)
|
|
{
|
|
table_map tab_map= join_tab->split_derived_to_update;
|
|
for (uint i= 0; tab_map; i++, tab_map>>= 1)
|
|
{
|
|
if (tab_map & 1)
|
|
join->map2table[i]->preread_init_done= false;
|
|
}
|
|
}
|
|
|
|
/* Restore state if mark_as_null_row() have been called */
|
|
if (join_tab->last_inner)
|
|
{
|
|
JOIN_TAB *last_inner_tab= join_tab->last_inner;
|
|
for (JOIN_TAB *jt= join_tab; jt <= last_inner_tab; jt++)
|
|
jt->table->null_row= 0;
|
|
}
|
|
else
|
|
join_tab->table->null_row=0;
|
|
|
|
if (end_of_records)
|
|
{
|
|
enum_nested_loop_state nls=
|
|
(*join_tab->next_select)(join,join_tab+1,end_of_records);
|
|
DBUG_RETURN(nls);
|
|
}
|
|
join_tab->tracker->r_scans++;
|
|
|
|
rc= NESTED_LOOP_OK;
|
|
|
|
for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table;
|
|
flush_dups_table;
|
|
flush_dups_table= flush_dups_table->next_flush_table)
|
|
{
|
|
flush_dups_table->sj_weedout_delete_rows();
|
|
}
|
|
|
|
if (!join_tab->preread_init_done && join_tab->preread_init())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
if (unlikely(join_tab->rowid_filter))
|
|
{
|
|
if (unlikely(join_tab->need_to_build_rowid_filter))
|
|
{
|
|
if (join_tab->build_range_rowid_filter())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
/*
|
|
We have to check join_tab->rowid_filter again as the above
|
|
function may have cleared it in case of errors.
|
|
*/
|
|
if (join_tab->rowid_filter && join_tab->rowid_filter->is_empty())
|
|
rc= NESTED_LOOP_NO_MORE_ROWS;
|
|
}
|
|
else if (join_tab->rowid_filter->is_empty())
|
|
rc= NESTED_LOOP_NO_MORE_ROWS;
|
|
}
|
|
|
|
join->return_tab= join_tab;
|
|
|
|
if (join_tab->last_inner)
|
|
{
|
|
/* join_tab is the first inner table for an outer join operation. */
|
|
|
|
/* Set initial state of guard variables for this table.*/
|
|
join_tab->found=0;
|
|
join_tab->not_null_compl= 1;
|
|
|
|
/* Set first_unmatched for the last inner table of this group */
|
|
join_tab->last_inner->first_unmatched= join_tab;
|
|
if (join_tab->on_precond && !join_tab->on_precond->val_int())
|
|
rc= NESTED_LOOP_NO_MORE_ROWS;
|
|
}
|
|
join->thd->get_stmt_da()->reset_current_row_for_warning(1);
|
|
|
|
if (rc != NESTED_LOOP_NO_MORE_ROWS &&
|
|
(rc= join_tab_execution_startup(join_tab)) < 0)
|
|
DBUG_RETURN(rc);
|
|
|
|
if (join_tab->loosescan_match_tab)
|
|
join_tab->loosescan_match_tab->found_match= FALSE;
|
|
|
|
DBUG_ASSERT(join_tab->cached_pfs_batch_update == join_tab->pfs_batch_update());
|
|
if (join_tab->cached_pfs_batch_update)
|
|
join_tab->table->file->start_psi_batch_mode();
|
|
|
|
if (rc != NESTED_LOOP_NO_MORE_ROWS)
|
|
{
|
|
error= (*join_tab->read_first_record)(join_tab);
|
|
if (!error && join_tab->keep_current_rowid)
|
|
join_tab->table->file->position(join_tab->table->record[0]);
|
|
rc= evaluate_join_record(join, join_tab, error);
|
|
}
|
|
|
|
bool skip_over= FALSE;
|
|
READ_RECORD *info= &join_tab->read_record;
|
|
|
|
while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
|
|
{
|
|
if (join_tab->loosescan_match_tab &&
|
|
join_tab->loosescan_match_tab->found_match)
|
|
{
|
|
KEY *key= join_tab->table->key_info + join_tab->loosescan_key;
|
|
key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key,
|
|
join_tab->loosescan_key_len);
|
|
skip_over= TRUE;
|
|
}
|
|
|
|
error= info->read_record();
|
|
|
|
if (skip_over && likely(!error))
|
|
{
|
|
if (!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part,
|
|
join_tab->loosescan_buf, join_tab->loosescan_key_len))
|
|
{
|
|
/*
|
|
This is the LooseScan action: skip over records with the same key
|
|
value if we already had a match for them.
|
|
*/
|
|
continue;
|
|
}
|
|
join_tab->loosescan_match_tab->found_match= FALSE;
|
|
skip_over= FALSE;
|
|
}
|
|
|
|
if (join_tab->keep_current_rowid && likely(!error))
|
|
join_tab->table->file->position(join_tab->table->record[0]);
|
|
|
|
rc= evaluate_join_record(join, join_tab, error);
|
|
}
|
|
|
|
if (rc == NESTED_LOOP_NO_MORE_ROWS)
|
|
{
|
|
if (join_tab->last_inner && !join_tab->found)
|
|
{
|
|
rc= evaluate_null_complemented_join_record(join, join_tab);
|
|
if (rc == NESTED_LOOP_NO_MORE_ROWS)
|
|
rc= NESTED_LOOP_OK;
|
|
}
|
|
else
|
|
rc= NESTED_LOOP_OK;
|
|
}
|
|
|
|
if (join_tab->cached_pfs_batch_update)
|
|
join_tab->table->file->end_psi_batch_mode();
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/**
|
|
@brief Process one row of the nested loop join.
|
|
|
|
This function will evaluate parts of WHERE/ON clauses that are
|
|
applicable to the partial row on hand and in case of success
|
|
submit this row to the next level of the nested loop.
|
|
|
|
@param join - The join object
|
|
@param join_tab - The most inner join_tab being processed
|
|
@param error > 0: Error, terminate processing
|
|
= 0: (Partial) row is available
|
|
< 0: No more rows available at this level
|
|
@return Nested loop state (Ok, No_more_rows, Error, Killed)
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
|
|
int error)
|
|
{
|
|
bool shortcut_for_distinct= join_tab->shortcut_for_distinct;
|
|
ha_rows found_records=join->found_records;
|
|
COND *select_cond= join_tab->select_cond;
|
|
bool select_cond_result= TRUE;
|
|
DBUG_ENTER("evaluate_join_record");
|
|
DBUG_PRINT("enter",
|
|
("evaluate_join_record join: %p join_tab: %p "
|
|
"cond: %p abort: %d alias %s",
|
|
join, join_tab, select_cond, error,
|
|
join_tab->table->alias.ptr()));
|
|
|
|
if (error > 0 || unlikely(join->thd->is_error())) // Fatal error
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (error < 0)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
if (unlikely(join->thd->check_killed())) // Aborted by user
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
|
|
join_tab->tracker->r_rows++;
|
|
|
|
if (select_cond)
|
|
{
|
|
select_cond_result= MY_TEST(select_cond->val_int());
|
|
|
|
/* check for errors evaluating the condition */
|
|
if (unlikely(join->thd->is_error()))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
|
|
if (select_cond_result)
|
|
{
|
|
/*
|
|
There is no select condition or the attached pushed down
|
|
condition is true => a match is found.
|
|
*/
|
|
join_tab->tracker->r_rows_after_where++;
|
|
|
|
bool found= 1;
|
|
while (join_tab->first_unmatched && found)
|
|
{
|
|
/*
|
|
The while condition is always false if join_tab is not
|
|
the last inner join table of an outer join operation.
|
|
*/
|
|
JOIN_TAB *first_unmatched= join_tab->first_unmatched;
|
|
/*
|
|
Mark that a match for current outer table is found.
|
|
This activates push down conditional predicates attached
|
|
to the all inner tables of the outer join.
|
|
*/
|
|
first_unmatched->found= 1;
|
|
for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
|
|
{
|
|
/*
|
|
Check whether 'not exists' optimization can be used here.
|
|
If tab->table->reginfo.not_exists_optimize is set to true
|
|
then WHERE contains a conjunctive predicate IS NULL over
|
|
a non-nullable field of tab. When activated this predicate
|
|
will filter out all records with matches for the left part
|
|
of the outer join whose inner tables start from the
|
|
first_unmatched table and include table tab. To safely use
|
|
'not exists' optimization we have to check that the
|
|
IS NULL predicate is really activated, i.e. all guards
|
|
that wrap it are in the 'open' state.
|
|
*/
|
|
bool not_exists_opt_is_applicable=
|
|
tab->table->reginfo.not_exists_optimize;
|
|
for (JOIN_TAB *first_upper= first_unmatched->first_upper;
|
|
not_exists_opt_is_applicable && first_upper;
|
|
first_upper= first_upper->first_upper)
|
|
{
|
|
if (!first_upper->found)
|
|
not_exists_opt_is_applicable= false;
|
|
}
|
|
/* Check all predicates that has just been activated. */
|
|
/*
|
|
Actually all predicates non-guarded by first_unmatched->found
|
|
will be re-evaluated again. It could be fixed, but, probably,
|
|
it's not worth doing now.
|
|
*/
|
|
if (tab->select_cond)
|
|
{
|
|
const longlong res= tab->select_cond->val_int();
|
|
if (join->thd->is_error())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
if (!res)
|
|
{
|
|
/* The condition attached to table tab is false */
|
|
if (tab == join_tab)
|
|
{
|
|
found= 0;
|
|
if (not_exists_opt_is_applicable)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Set a return point if rejected predicate is attached
|
|
not to the last table of the current nest level.
|
|
*/
|
|
join->return_tab= tab;
|
|
if (not_exists_opt_is_applicable)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
else
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
Check whether join_tab is not the last inner table
|
|
for another embedding outer join.
|
|
*/
|
|
if ((first_unmatched= first_unmatched->first_upper) &&
|
|
first_unmatched->last_inner != join_tab)
|
|
first_unmatched= 0;
|
|
join_tab->first_unmatched= first_unmatched;
|
|
}
|
|
|
|
JOIN_TAB *return_tab= join->return_tab;
|
|
join_tab->found_match= TRUE;
|
|
|
|
if (join_tab->check_weed_out_table && found)
|
|
{
|
|
int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
|
|
DBUG_PRINT("info", ("weedout_check: %d", res));
|
|
if (res == -1)
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
else if (res == 1)
|
|
found= FALSE;
|
|
}
|
|
else if (join_tab->do_firstmatch)
|
|
{
|
|
/*
|
|
We should return to the join_tab->do_firstmatch after we have
|
|
enumerated all the suffixes for current prefix row combination
|
|
*/
|
|
return_tab= join_tab->do_firstmatch;
|
|
}
|
|
|
|
/*
|
|
It was not just a return to lower loop level when one
|
|
of the newly activated predicates is evaluated as false
|
|
(See above join->return_tab= tab).
|
|
*/
|
|
join->thd->inc_examined_row_count_fast();
|
|
DBUG_PRINT("counts", ("examined_rows: %llu found: %d",
|
|
(ulonglong) join->thd->m_examined_row_count, (int) found));
|
|
|
|
if (found)
|
|
{
|
|
enum enum_nested_loop_state rc;
|
|
/* A match from join_tab is found for the current partial join. */
|
|
rc= (*join_tab->next_select)(join, join_tab+1, 0);
|
|
join->thd->get_stmt_da()->inc_current_row_for_warning();
|
|
if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
|
|
DBUG_RETURN(rc);
|
|
if (return_tab < join->return_tab)
|
|
join->return_tab= return_tab;
|
|
|
|
/* check for errors evaluating the condition */
|
|
if (unlikely(join->thd->is_error()))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
if (join->return_tab < join_tab)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
/*
|
|
Test if this was a SELECT DISTINCT query on a table that
|
|
was not in the field list; In this case we can abort if
|
|
we found a row, as no new rows can be added to the result.
|
|
*/
|
|
if (shortcut_for_distinct && found_records != join->found_records)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
The condition pushed down to the table join_tab rejects all rows
|
|
with the beginning coinciding with the current partial join.
|
|
*/
|
|
join->thd->inc_examined_row_count_fast();
|
|
}
|
|
|
|
join->thd->get_stmt_da()->inc_current_row_for_warning();
|
|
join_tab->read_record.unlock_row(join_tab);
|
|
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
/**
|
|
|
|
@details
|
|
Construct a NULL complimented partial join record and feed it to the next
|
|
level of the nested loop. This function is used in case we have
|
|
an OUTER join and no matching record was found.
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
|
|
{
|
|
/*
|
|
The table join_tab is the first inner table of a outer join operation
|
|
and no matches has been found for the current outer row.
|
|
*/
|
|
JOIN_TAB *last_inner_tab= join_tab->last_inner;
|
|
/* Cache variables for faster loop */
|
|
COND *select_cond;
|
|
for ( ; join_tab <= last_inner_tab ; join_tab++)
|
|
{
|
|
/* Change the the values of guard predicate variables. */
|
|
join_tab->found= 1;
|
|
join_tab->not_null_compl= 0;
|
|
/* The outer row is complemented by nulls for each inner tables */
|
|
restore_record(join_tab->table,s->default_values); // Make empty record
|
|
mark_as_null_row(join_tab->table); // For group by without error
|
|
select_cond= join_tab->select_cond;
|
|
/* Check all attached conditions for inner table rows. */
|
|
if (select_cond && !select_cond->val_int())
|
|
return NESTED_LOOP_OK;
|
|
}
|
|
join_tab--;
|
|
/*
|
|
The row complemented by nulls might be the first row
|
|
of embedding outer joins.
|
|
If so, perform the same actions as in the code
|
|
for the first regular outer join row above.
|
|
*/
|
|
for ( ; ; )
|
|
{
|
|
JOIN_TAB *first_unmatched= join_tab->first_unmatched;
|
|
if ((first_unmatched= first_unmatched->first_upper) &&
|
|
first_unmatched->last_inner != join_tab)
|
|
first_unmatched= 0;
|
|
join_tab->first_unmatched= first_unmatched;
|
|
if (!first_unmatched)
|
|
break;
|
|
first_unmatched->found= 1;
|
|
for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
|
|
{
|
|
if (tab->select_cond && !tab->select_cond->val_int())
|
|
{
|
|
join->return_tab= tab;
|
|
return NESTED_LOOP_OK;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
The row complemented by nulls satisfies all conditions
|
|
attached to inner tables.
|
|
*/
|
|
if (join_tab->check_weed_out_table)
|
|
{
|
|
int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
|
|
if (res == -1)
|
|
return NESTED_LOOP_ERROR;
|
|
else if (res == 1)
|
|
return NESTED_LOOP_OK;
|
|
}
|
|
else if (join_tab->do_firstmatch)
|
|
{
|
|
/*
|
|
We should return to the join_tab->do_firstmatch after we have
|
|
enumerated all the suffixes for current prefix row combination
|
|
*/
|
|
if (join_tab->do_firstmatch < join->return_tab)
|
|
join->return_tab= join_tab->do_firstmatch;
|
|
}
|
|
|
|
/*
|
|
Send the row complemented by nulls to be joined with the
|
|
remaining tables.
|
|
*/
|
|
return (*join_tab->next_select)(join, join_tab+1, 0);
|
|
}
|
|
|
|
/*****************************************************************************
|
|
The different ways to read a record
|
|
Returns -1 if row was not found, 0 if row was found and 1 on errors
|
|
*****************************************************************************/
|
|
|
|
/** Help function when we get some an error from the table handler. */
|
|
|
|
int report_error(TABLE *table, int error)
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
|
|
{
|
|
table->status= STATUS_GARBAGE;
|
|
return -1; // key not found; ok
|
|
}
|
|
/*
|
|
Locking reads can legally return also these errors, do not
|
|
print them to the .err log
|
|
*/
|
|
if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT
|
|
&& error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed)
|
|
sql_print_error("Got error %d when reading table '%s'",
|
|
error, table->s->path.str);
|
|
table->file->print_error(error,MYF(0));
|
|
return 1;
|
|
}
|
|
|
|
|
|
int safe_index_read(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
if (unlikely((error=
|
|
table->file->ha_index_read_map(table->record[0],
|
|
tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_KEY_EXACT))))
|
|
return report_error(table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Reads content of constant table
|
|
|
|
@param tab table
|
|
@param pos position of table in query plan
|
|
|
|
@retval 0 ok, one row was found or one NULL-complemented row was created
|
|
@retval -1 ok, no row was found and no NULL-complemented row was created
|
|
@retval 1 error
|
|
*/
|
|
|
|
static int
|
|
join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
|
|
{
|
|
int error;
|
|
TABLE_LIST *tbl;
|
|
DBUG_ENTER("join_read_const_table");
|
|
TABLE *table=tab->table;
|
|
table->const_table=1;
|
|
table->null_row=0;
|
|
table->status=STATUS_NO_RECORD;
|
|
|
|
if (tab->table->pos_in_table_list->is_materialized_derived() &&
|
|
!tab->table->pos_in_table_list->fill_me)
|
|
{
|
|
DBUG_ASSERT(0);
|
|
//TODO: don't get here at all
|
|
/*
|
|
Skip materialized derived tables/views as they temporary table is not
|
|
opened yet.
|
|
*/
|
|
DBUG_RETURN(0);
|
|
}
|
|
else if (tab->table->pos_in_table_list->jtbm_subselect &&
|
|
tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
/* Row will not be found */
|
|
int res;
|
|
if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found)
|
|
res= 0;
|
|
else
|
|
res= -1;
|
|
DBUG_RETURN(res);
|
|
}
|
|
else if (tab->type == JT_SYSTEM)
|
|
{
|
|
if (unlikely((error=join_read_system(tab))))
|
|
{ // Info for DESCRIBE
|
|
tab->info= ET_CONST_ROW_NOT_FOUND;
|
|
/* Mark for EXPLAIN that the row was not found */
|
|
pos->records_read= pos->records_out= 0.0;
|
|
pos->ref_depend_map= 0;
|
|
if (!table->pos_in_table_list->outer_join || error > 0)
|
|
DBUG_RETURN(error);
|
|
}
|
|
/*
|
|
The optimizer trust the engine that when stats.records is 0, there
|
|
was no found rows
|
|
*/
|
|
DBUG_ASSERT(table->file->stats.records > 0 || error);
|
|
}
|
|
else
|
|
{
|
|
error=join_read_const(tab);
|
|
if (unlikely(error))
|
|
{
|
|
tab->info= ET_UNIQUE_ROW_NOT_FOUND;
|
|
/* Mark for EXPLAIN that the row was not found */
|
|
pos->records_read= pos->records_out= 0.0;
|
|
pos->ref_depend_map= 0;
|
|
if (!table->pos_in_table_list->outer_join || error > 0)
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
/*
|
|
Evaluate an on-expression only if it is not considered expensive.
|
|
This mainly prevents executing subqueries in optimization phase.
|
|
This is necessary since proper setup for such execution has not been
|
|
done at this stage.
|
|
*/
|
|
if (*tab->on_expr_ref && !table->null_row &&
|
|
!(*tab->on_expr_ref)->is_expensive())
|
|
{
|
|
#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
|
|
/*
|
|
This test could be very useful to find bugs in the optimizer
|
|
where we would call this function with an expression that can't be
|
|
evaluated yet. We can't have this enabled by default as long as
|
|
have items like Item_equal, that doesn't report they are const but
|
|
they can still be called even if they contain not const items.
|
|
*/
|
|
(*tab->on_expr_ref)->update_used_tables();
|
|
DBUG_ASSERT((*tab->on_expr_ref)->const_item());
|
|
#endif
|
|
if ((table->null_row= MY_TEST((*tab->on_expr_ref)->val_int() == 0)))
|
|
mark_as_null_row(table);
|
|
}
|
|
if (!table->null_row && ! tab->join->mixed_implicit_grouping)
|
|
table->maybe_null= 0;
|
|
|
|
{
|
|
JOIN *join= tab->join;
|
|
List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
|
|
/* Check appearance of new constant items in Item_equal objects */
|
|
if (join->conds)
|
|
update_const_equal_items(thd, join->conds, tab, TRUE);
|
|
while ((tbl= ti++))
|
|
{
|
|
TABLE_LIST *embedded;
|
|
TABLE_LIST *embedding= tbl;
|
|
do
|
|
{
|
|
embedded= embedding;
|
|
if (embedded->on_expr)
|
|
update_const_equal_items(thd, embedded->on_expr, tab, TRUE);
|
|
embedding= embedded->embedding;
|
|
}
|
|
while (embedding &&
|
|
embedding->nested_join->join_list.head() == embedded);
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
Read a constant table when there is at most one matching row, using a table
|
|
scan.
|
|
|
|
@param tab Table to read
|
|
|
|
@retval 0 Row was found
|
|
@retval -1 Row was not found
|
|
@retval 1 Got an error (other than row not found) during read
|
|
*/
|
|
static int
|
|
join_read_system(JOIN_TAB *tab)
|
|
{
|
|
TABLE *table= tab->table;
|
|
int error;
|
|
if (table->status & STATUS_GARBAGE) // If first read
|
|
{
|
|
if (unlikely((error=
|
|
table->file->ha_read_first_row(table->record[0],
|
|
table->s->primary_key))))
|
|
{
|
|
if (error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
table->const_table= 1;
|
|
mark_as_null_row(tab->table);
|
|
empty_record(table); // Make empty record
|
|
return -1;
|
|
}
|
|
store_record(table,record[1]);
|
|
}
|
|
else if (!table->status) // Only happens with left join
|
|
restore_record(table,record[1]); // restore old record
|
|
table->null_row=0;
|
|
return table->status ? -1 : 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Read a table when there is at most one matching row.
|
|
|
|
@param tab Table to read
|
|
|
|
@retval 0 Row was found
|
|
@retval -1 Row was not found
|
|
@retval 1 Got an error (other than row not found) during read
|
|
*/
|
|
|
|
static int
|
|
join_read_const(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
if (table->status & STATUS_GARBAGE) // If first read
|
|
{
|
|
table->status= 0;
|
|
if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
|
|
error=HA_ERR_KEY_NOT_FOUND;
|
|
else
|
|
{
|
|
handler *file= table->file;
|
|
if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread &&
|
|
(int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
|
|
{
|
|
file->ha_start_keyread(tab->ref.key);
|
|
/* This is probably needed for analyze table */
|
|
tab->index= tab->ref.key;
|
|
}
|
|
error= file->
|
|
ha_index_read_idx_map(table->record[0],tab->ref.key,
|
|
(uchar*) tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_KEY_EXACT);
|
|
file->ha_end_keyread();
|
|
}
|
|
if (unlikely(error))
|
|
{
|
|
table->status= STATUS_NOT_FOUND;
|
|
mark_as_null_row(tab->table);
|
|
empty_record(table);
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
return -1;
|
|
}
|
|
store_record(table,record[1]);
|
|
}
|
|
else if (!(table->status & ~STATUS_NULL_ROW)) // Only happens with left join
|
|
{
|
|
table->status=0;
|
|
restore_record(table,record[1]); // restore old record
|
|
}
|
|
table->null_row=0;
|
|
return table->status ? -1 : 0;
|
|
}
|
|
|
|
/*
|
|
eq_ref access method implementation: "read_first" function
|
|
|
|
SYNOPSIS
|
|
join_read_key()
|
|
tab JOIN_TAB of the accessed table
|
|
|
|
DESCRIPTION
|
|
This is "read_fist" function for the eq_ref access method. The difference
|
|
from ref access function is that is that it has a one-element lookup
|
|
cache (see cmp_buffer_with_ref)
|
|
|
|
RETURN
|
|
0 - Ok
|
|
-1 - Row not found
|
|
1 - Error
|
|
*/
|
|
|
|
|
|
static int
|
|
join_read_key(JOIN_TAB *tab)
|
|
{
|
|
return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
|
|
}
|
|
|
|
|
|
/*
|
|
eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
|
|
not from the join_tab. See join_read_key for detailed synopsis.
|
|
*/
|
|
int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
|
|
{
|
|
int error;
|
|
if (!table->file->inited)
|
|
{
|
|
error= table->file->ha_index_init(table_ref->key, tab ? tab->sorted : TRUE);
|
|
if (unlikely(error))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
The following is needed when one makes ref (or eq_ref) access from row
|
|
comparisons: one must call row->bring_value() to get the new values.
|
|
*/
|
|
if (tab && tab->bush_children)
|
|
{
|
|
TABLE_LIST *emb_sj_nest= tab->bush_children->start->emb_sj_nest;
|
|
emb_sj_nest->sj_subq_pred->left_exp()->bring_value();
|
|
}
|
|
|
|
/* TODO: Why don't we do "Late NULLs Filtering" here? */
|
|
|
|
if (cmp_buffer_with_ref(thd, table, table_ref) ||
|
|
(table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
|
|
{
|
|
if (table_ref->key_err)
|
|
{
|
|
table->status=STATUS_NOT_FOUND;
|
|
return -1;
|
|
}
|
|
/*
|
|
Moving away from the current record. Unlock the row
|
|
in the handler if it did not match the partial WHERE.
|
|
*/
|
|
if (tab && tab->ref.has_record && tab->ref.use_count == 0)
|
|
{
|
|
tab->read_record.table->file->unlock_row();
|
|
table_ref->has_record= FALSE;
|
|
}
|
|
error=table->file->ha_index_read_map(table->record[0],
|
|
table_ref->key_buff,
|
|
make_prev_keypart_map(table_ref->key_parts),
|
|
HA_READ_KEY_EXACT);
|
|
if (unlikely(error) &&
|
|
error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
|
|
if (likely(!error))
|
|
{
|
|
table_ref->has_record= TRUE;
|
|
table_ref->use_count= 1;
|
|
}
|
|
}
|
|
else if (table->status == 0)
|
|
{
|
|
DBUG_ASSERT(table_ref->has_record);
|
|
table_ref->use_count++;
|
|
}
|
|
table->null_row=0;
|
|
return table->status ? -1 : 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Since join_read_key may buffer a record, do not unlock
|
|
it if it was not used in this invocation of join_read_key().
|
|
Only count locks, thus remembering if the record was left unused,
|
|
and unlock already when pruning the current value of
|
|
TABLE_REF buffer.
|
|
@sa join_read_key()
|
|
*/
|
|
|
|
static void
|
|
join_read_key_unlock_row(st_join_table *tab)
|
|
{
|
|
DBUG_ASSERT(tab->ref.use_count);
|
|
if (tab->ref.use_count)
|
|
tab->ref.use_count--;
|
|
}
|
|
|
|
/**
|
|
Rows from const tables are read once but potentially used
|
|
multiple times during execution of a query.
|
|
Ensure such rows are never unlocked during query execution.
|
|
*/
|
|
|
|
void
|
|
join_const_unlock_row(JOIN_TAB *tab)
|
|
{
|
|
DBUG_ASSERT(tab->type == JT_CONST);
|
|
}
|
|
|
|
|
|
/*
|
|
ref access method implementation: "read_first" function
|
|
|
|
SYNOPSIS
|
|
join_read_always_key()
|
|
tab JOIN_TAB of the accessed table
|
|
|
|
DESCRIPTION
|
|
This is "read_fist" function for the "ref" access method.
|
|
|
|
The functon must leave the index initialized when it returns.
|
|
ref_or_null access implementation depends on that.
|
|
|
|
RETURN
|
|
0 - Ok
|
|
-1 - Row not found
|
|
1 - Error
|
|
*/
|
|
|
|
static int
|
|
join_read_always_key(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
|
|
/* Initialize the index first */
|
|
if (!table->file->inited)
|
|
{
|
|
if (unlikely((error= table->file->ha_index_init(tab->ref.key,
|
|
tab->sorted))))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref)))
|
|
return -1;
|
|
if (unlikely((error=
|
|
table->file->prepare_index_key_scan_map(tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts)))))
|
|
{
|
|
report_error(table,error);
|
|
return -1;
|
|
}
|
|
if ((error= table->file->ha_index_read_map(table->record[0],
|
|
tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_KEY_EXACT)))
|
|
{
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
return -1; /* purecov: inspected */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
This function is used when optimizing away ORDER BY in
|
|
SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC.
|
|
*/
|
|
|
|
static int
|
|
join_read_last_key(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
|
|
if (!table->file->inited &&
|
|
unlikely((error= table->file->ha_index_init(tab->ref.key, tab->sorted))))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
|
|
if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref)))
|
|
return -1;
|
|
if (unlikely((error=
|
|
table->file->prepare_index_key_scan_map(tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts)))) )
|
|
{
|
|
report_error(table,error);
|
|
return -1;
|
|
}
|
|
if (unlikely((error=
|
|
table->file->ha_index_read_map(table->record[0],
|
|
tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_PREFIX_LAST))))
|
|
{
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
return -1; /* purecov: inspected */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* ARGSUSED */
|
|
static int
|
|
join_no_more_records(READ_RECORD *info __attribute__((unused)))
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_next_same(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
TABLE *table= info->table;
|
|
JOIN_TAB *tab=table->reginfo.join_tab;
|
|
|
|
if (unlikely((error= table->file->ha_index_next_same(table->record[0],
|
|
tab->ref.key_buff,
|
|
tab->ref.key_length))))
|
|
{
|
|
if (error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
table->status= STATUS_GARBAGE;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_prev_same(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
TABLE *table= info->table;
|
|
JOIN_TAB *tab=table->reginfo.join_tab;
|
|
|
|
if (unlikely((error= table->file->ha_index_prev(table->record[0]))))
|
|
return report_error(table, error);
|
|
if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key,
|
|
tab->ref.key_length))
|
|
{
|
|
table->status=STATUS_NOT_FOUND;
|
|
error= -1;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
|
|
static int
|
|
join_init_quick_read_record(JOIN_TAB *tab)
|
|
{
|
|
quick_select_return res= test_if_quick_select(tab);
|
|
|
|
if (res == SQL_SELECT::ERROR)
|
|
return 1; /* Fatal error */
|
|
|
|
if (res == SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
return -1; /* No possible records */
|
|
|
|
/*
|
|
Proceed to read rows. If we've created a quick select, use it, otherwise
|
|
do a full scan.
|
|
*/
|
|
return join_init_read_record(tab);
|
|
}
|
|
|
|
|
|
int read_first_record_seq(JOIN_TAB *tab)
|
|
{
|
|
if (unlikely(tab->read_record.table->file->ha_rnd_init_with_error(1)))
|
|
return 1;
|
|
return tab->read_record.read_record();
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Create a new (dynamic) quick select.
|
|
*/
|
|
|
|
static quick_select_return
|
|
test_if_quick_select(JOIN_TAB *tab)
|
|
{
|
|
DBUG_EXECUTE_IF("show_explain_probe_test_if_quick_select",
|
|
if (dbug_user_var_equals_int(tab->join->thd,
|
|
"show_explain_probe_select_id",
|
|
tab->join->select_lex->select_number))
|
|
dbug_serve_apcs(tab->join->thd, 1);
|
|
);
|
|
|
|
|
|
delete tab->select->quick;
|
|
tab->select->quick=0;
|
|
|
|
if (tab->table->file->inited != handler::NONE)
|
|
tab->table->file->ha_index_or_rnd_end();
|
|
|
|
quick_select_return res;
|
|
res= tab->select->test_quick_select(tab->join->thd, tab->keys,
|
|
(table_map) 0, HA_POS_ERROR, 0,
|
|
FALSE, /*remove where parts*/FALSE,
|
|
FALSE,
|
|
/* no unusable key notes */
|
|
Item_func::BITMAP_NONE);
|
|
if (tab->explain_plan && tab->explain_plan->range_checked_fer)
|
|
tab->explain_plan->range_checked_fer->collect_data(tab->select->quick);
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/*
|
|
@return
|
|
1 - Yes, use dynamically built range
|
|
0 - No, don't use dynamic range (but there's no error)
|
|
-1 - Fatal error
|
|
*/
|
|
|
|
static
|
|
int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
|
|
{
|
|
if (unlikely(join_tab->use_quick == 2))
|
|
{
|
|
quick_select_return res= test_if_quick_select(join_tab);
|
|
if (res == SQL_SELECT::ERROR)
|
|
return -1;
|
|
else
|
|
{
|
|
/* Both OK and IMPOSSIBLE_RANGE go here */
|
|
return join_tab->select->quick ? 1 : 0;
|
|
}
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
|
|
int join_init_read_record(JOIN_TAB *tab)
|
|
{
|
|
bool need_unpacking= FALSE;
|
|
JOIN *join= tab->join;
|
|
/*
|
|
Note: the query plan tree for the below operations is constructed in
|
|
save_agg_explain_data.
|
|
*/
|
|
if (tab->distinct && tab->remove_duplicates()) // Remove duplicates.
|
|
return 1;
|
|
|
|
if (join->top_join_tab_count != join->const_tables)
|
|
{
|
|
TABLE_LIST *tbl= tab->table->pos_in_table_list;
|
|
need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
|
|
}
|
|
|
|
if (tab->need_to_build_rowid_filter)
|
|
{
|
|
if (tab->build_range_rowid_filter())
|
|
return 1; /* Fatal error */
|
|
}
|
|
|
|
if (tab->filesort && tab->sort_table()) // Sort table.
|
|
return 1;
|
|
|
|
DBUG_EXECUTE_IF("kill_join_init_read_record",
|
|
tab->join->thd->set_killed(KILL_QUERY););
|
|
|
|
|
|
if (!tab->preread_init_done && tab->preread_init())
|
|
return 1;
|
|
|
|
if (tab->select && tab->select->quick && tab->select->quick->reset())
|
|
{
|
|
/* Ensures error status is propagated back to client */
|
|
report_error(tab->table,
|
|
tab->join->thd->killed ? HA_ERR_QUERY_INTERRUPTED : HA_ERR_OUT_OF_MEM);
|
|
return 1;
|
|
}
|
|
/* make sure we won't get ER_QUERY_INTERRUPTED from any code below */
|
|
DBUG_EXECUTE_IF("kill_join_init_read_record",
|
|
tab->join->thd->reset_killed(););
|
|
|
|
Copy_field *save_copy, *save_copy_end;
|
|
|
|
/*
|
|
init_read_record resets all elements of tab->read_record().
|
|
Remember things that we don't want to have reset.
|
|
*/
|
|
save_copy= tab->read_record.copy_field;
|
|
save_copy_end= tab->read_record.copy_field_end;
|
|
|
|
/*
|
|
JT_NEXT means that we should use an index scan on index 'tab->index'
|
|
However if filesort is set, the table was already sorted above
|
|
and now have to retrive the rows from the tmp file or by rnd_pos()
|
|
If !(tab->select && tab->select->quick)) it means that we are
|
|
in "Range checked for each record" and we better let the normal
|
|
init_read_record() handle this case
|
|
*/
|
|
|
|
if (tab->type == JT_NEXT && ! tab->filesort &&
|
|
!(tab->select && tab->select->quick))
|
|
{
|
|
/* Used with covered_index scan or force index */
|
|
if (init_read_record_idx(&tab->read_record, tab->join->thd, tab->table,
|
|
1, tab->index, 0))
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
|
|
tab->select, tab->filesort_result, 1, 1, FALSE))
|
|
return 1;
|
|
}
|
|
tab->read_record.copy_field= save_copy;
|
|
tab->read_record.copy_field_end= save_copy_end;
|
|
|
|
if (need_unpacking)
|
|
{
|
|
tab->read_record.read_record_func_and_unpack_calls=
|
|
tab->read_record.read_record_func;
|
|
tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
|
|
}
|
|
|
|
return tab->read_record.read_record();
|
|
}
|
|
|
|
|
|
/*
|
|
Helper function for sorting table with filesort.
|
|
*/
|
|
|
|
bool
|
|
JOIN_TAB::sort_table()
|
|
{
|
|
int rc;
|
|
DBUG_PRINT("info",("Sorting for index"));
|
|
THD_STAGE_INFO(join->thd, stage_creating_sort_index);
|
|
DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ?
|
|
JOIN::ordered_index_order_by :
|
|
JOIN::ordered_index_group_by));
|
|
rc= create_sort_index(join->thd, join, this, NULL);
|
|
/* Disactivate rowid filter if it was used when creating sort index */
|
|
if (rowid_filter)
|
|
table->file->rowid_filter_is_active= false;
|
|
return (rc != 0);
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_first(JOIN_TAB *tab)
|
|
{
|
|
int error= 0;
|
|
TABLE *table=tab->table;
|
|
DBUG_ENTER("join_read_first");
|
|
|
|
DBUG_ASSERT(table->no_keyread ||
|
|
!table->covering_keys.is_set(tab->index) ||
|
|
table->file->keyread == tab->index);
|
|
tab->table->status=0;
|
|
tab->read_record.read_record_func= join_read_next;
|
|
tab->read_record.table=table;
|
|
if (!table->file->inited)
|
|
error= table->file->ha_index_init(tab->index, tab->sorted);
|
|
if (likely(!error))
|
|
error= table->file->prepare_index_scan();
|
|
if (unlikely(error) ||
|
|
unlikely(error= tab->table->file->ha_index_first(tab->table->record[0])))
|
|
{
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
report_error(table, error);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_next(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= info->table->file->ha_index_next(info->record()))))
|
|
return report_error(info->table, error);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_last(JOIN_TAB *tab)
|
|
{
|
|
TABLE *table=tab->table;
|
|
int error= 0;
|
|
DBUG_ENTER("join_read_last");
|
|
|
|
DBUG_ASSERT(table->no_keyread ||
|
|
!table->covering_keys.is_set(tab->index) ||
|
|
table->file->keyread == tab->index);
|
|
tab->table->status=0;
|
|
tab->read_record.read_record_func= join_read_prev;
|
|
tab->read_record.table=table;
|
|
if (!table->file->inited)
|
|
error= table->file->ha_index_init(tab->index, 1);
|
|
if (likely(!error))
|
|
error= table->file->prepare_index_scan();
|
|
if (unlikely(error) ||
|
|
unlikely(error= tab->table->file->ha_index_last(tab->table->record[0])))
|
|
DBUG_RETURN(report_error(table, error));
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_prev(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= info->table->file->ha_index_prev(info->record()))))
|
|
return report_error(info->table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
join_ft_read_first(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
|
|
if (!table->file->inited &&
|
|
(error= table->file->ha_index_init(tab->ref.key, 1)))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
|
|
table->file->ft_init();
|
|
|
|
if (unlikely((error= table->file->ha_ft_read(table->record[0]))))
|
|
return report_error(table, error);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
join_ft_read_next(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= info->table->file->ha_ft_read(info->record()))))
|
|
return report_error(info->table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Reading of key with key reference and one part that may be NULL.
|
|
*/
|
|
|
|
int
|
|
join_read_always_key_or_null(JOIN_TAB *tab)
|
|
{
|
|
int res;
|
|
|
|
/* First read according to key which is NOT NULL */
|
|
*tab->ref.null_ref_key= 0; // Clear null byte
|
|
if ((res= join_read_always_key(tab)) >= 0)
|
|
return res;
|
|
|
|
/* Then read key with null value */
|
|
*tab->ref.null_ref_key= 1; // Set null byte
|
|
return safe_index_read(tab);
|
|
}
|
|
|
|
|
|
int
|
|
join_read_next_same_or_null(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= join_read_next_same(info)) >= 0))
|
|
return error;
|
|
JOIN_TAB *tab= info->table->reginfo.join_tab;
|
|
|
|
/* Test if we have already done a read after null key */
|
|
if (*tab->ref.null_ref_key)
|
|
return -1; // All keys read
|
|
*tab->ref.null_ref_key= 1; // Set null byte
|
|
return safe_index_read(tab); // then read null keys
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
DESCRIPTION
|
|
Functions that end one nested loop iteration. Different functions
|
|
are used to support GROUP BY clause and to redirect records
|
|
to a table (e.g. in case of SELECT into a temporary table) or to the
|
|
network client.
|
|
|
|
RETURN VALUES
|
|
NESTED_LOOP_OK - the record has been successfully handled
|
|
NESTED_LOOP_ERROR - a fatal error (like table corruption)
|
|
was detected
|
|
NESTED_LOOP_KILLED - thread shutdown was requested while processing
|
|
the record
|
|
NESTED_LOOP_QUERY_LIMIT - the record has been successfully handled;
|
|
additionally, the nested loop produced the
|
|
number of rows specified in the LIMIT clause
|
|
for the query
|
|
NESTED_LOOP_CURSOR_LIMIT - the record has been successfully handled;
|
|
additionally, there is a cursor and the nested
|
|
loop algorithm produced the number of rows
|
|
that is specified for current cursor fetch
|
|
operation.
|
|
All return values except NESTED_LOOP_OK abort the nested loop.
|
|
*****************************************************************************/
|
|
|
|
/* ARGSUSED */
|
|
static enum_nested_loop_state
|
|
end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
DBUG_ENTER("end_send");
|
|
/*
|
|
When all tables are const this function is called with jointab == NULL.
|
|
This function shouldn't be called for the first join_tab as it needs
|
|
to get fields from previous tab.
|
|
*/
|
|
DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab);
|
|
//TODO pass fields via argument
|
|
List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
|
|
|
|
if (end_of_records)
|
|
{
|
|
if (join->procedure && join->procedure->end_of_records())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
if (join->table_count &&
|
|
join->join_tab->is_using_loose_index_scan())
|
|
{
|
|
/* Copy non-aggregated fields when loose index scan is used. */
|
|
copy_fields(&join->tmp_table_param);
|
|
}
|
|
if (join->having && join->having->val_int() == 0)
|
|
DBUG_RETURN(NESTED_LOOP_OK); // Didn't match having
|
|
if (join->procedure)
|
|
{
|
|
if (join->procedure->send_row(join->procedure_fields_list))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
if (join->send_records >= join->unit->lim.get_select_limit() &&
|
|
join->unit->lim.is_with_ties())
|
|
{
|
|
/*
|
|
Stop sending rows if the order fields corresponding to WITH TIES
|
|
have changed.
|
|
*/
|
|
int idx= test_if_item_cache_changed(join->order_fields);
|
|
if (idx >= 0)
|
|
join->do_send_rows= false;
|
|
}
|
|
|
|
if (join->do_send_rows)
|
|
{
|
|
int error;
|
|
/* result < 0 if row was not accepted and should not be counted */
|
|
if (unlikely((error= join->result->send_data_with_check(*fields,
|
|
join->unit,
|
|
join->send_records))))
|
|
{
|
|
if (error > 0)
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
// error < 0 => duplicate row
|
|
join->duplicate_rows++;
|
|
}
|
|
}
|
|
|
|
join->send_records++;
|
|
join->accepted_rows++;
|
|
if (join->send_records >= join->unit->lim.get_select_limit())
|
|
{
|
|
if (!join->do_send_rows)
|
|
{
|
|
/*
|
|
If we have used Priority Queue for optimizing order by with limit,
|
|
then stop here, there are no more records to consume.
|
|
When this optimization is used, end_send is called on the next
|
|
join_tab.
|
|
*/
|
|
if (join->order &&
|
|
join->select_options & OPTION_FOUND_ROWS &&
|
|
join_tab > join->join_tab &&
|
|
(join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq)
|
|
{
|
|
DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT"));
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
/* For WITH TIES we keep sending rows until a group has changed. */
|
|
if (join->unit->lim.is_with_ties())
|
|
{
|
|
/* Prepare the order_fields comparison for with ties. */
|
|
if (join->send_records == join->unit->lim.get_select_limit())
|
|
(void) test_if_group_changed(join->order_fields);
|
|
/* One more loop, to check if the next row matches with_ties or not. */
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
if (join->select_options & OPTION_FOUND_ROWS)
|
|
{
|
|
JOIN_TAB *jt=join->join_tab;
|
|
if ((join->table_count == 1) && !join->sort_and_group
|
|
&& !join->send_group_parts && !join->having && !jt->select_cond &&
|
|
!(jt->select && jt->select->quick) &&
|
|
(jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
|
|
(jt->ref.key < 0))
|
|
{
|
|
/* Join over all rows in table; Return number of found rows */
|
|
TABLE *table=jt->table;
|
|
|
|
if (jt->filesort_result) // If filesort was used
|
|
{
|
|
join->send_records= jt->filesort_result->found_rows;
|
|
}
|
|
else
|
|
{
|
|
table->file->info(HA_STATUS_VARIABLE);
|
|
join->send_records= table->file->stats.records;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
join->do_send_rows= 0;
|
|
if (join->unit->fake_select_lex)
|
|
join->unit->fake_select_lex->limit_params.select_limit= 0;
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
|
|
}
|
|
else if (join->send_records >= join->fetch_limit)
|
|
{
|
|
/*
|
|
There is a server side cursor and all rows for
|
|
this fetch request are sent.
|
|
*/
|
|
DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT);
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform OrderedGroupBy operation and write the output into join->result.
|
|
|
|
@detail
|
|
The input stream is ordered by the GROUP BY expression, so groups come
|
|
one after another. We only need to accumulate the aggregate value, when
|
|
a GROUP BY group ends, check the HAVING and send the group.
|
|
|
|
Note that the output comes in the GROUP BY order, which is required by
|
|
the MySQL's GROUP BY semantics. No further sorting is needed.
|
|
|
|
@seealso end_write_group() also implements SortAndGroup
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
int idx= -1;
|
|
enum_nested_loop_state ok_code= NESTED_LOOP_OK;
|
|
/*
|
|
join_tab can be 0 in the case all tables are const tables and we did not
|
|
need a temporary table to store the result.
|
|
In this case we use the original given fields, which is stored in
|
|
join->fields.
|
|
*/
|
|
List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
|
|
DBUG_ENTER("end_send_group");
|
|
|
|
if (!join->items3.is_null() && join->current_ref_ptrs != join->items3)
|
|
join->set_items_ref_array(join->items3);
|
|
|
|
if (!join->first_record || end_of_records ||
|
|
(idx=test_if_group_changed(join->group_fields)) >= 0)
|
|
{
|
|
|
|
if (!join->group_sent &&
|
|
(join->first_record ||
|
|
(end_of_records && !join->group && !join->group_optimized_away)))
|
|
{
|
|
table_map cleared_tables= (table_map) 0;
|
|
if (join->procedure)
|
|
join->procedure->end_group();
|
|
/* Test if there was a group change. */
|
|
if (idx < (int) join->send_group_parts)
|
|
{
|
|
int error=0;
|
|
if (join->procedure)
|
|
{
|
|
if (join->having && join->having->val_int() == 0)
|
|
error= -1; // Didn't satisfy having
|
|
else
|
|
{
|
|
if (join->do_send_rows)
|
|
error=join->procedure->send_row(*fields) ? 1 : 0;
|
|
join->send_records++;
|
|
}
|
|
if (end_of_records && join->procedure->end_of_records())
|
|
error= 1; // Fatal error
|
|
}
|
|
else
|
|
{
|
|
/* Reset all sum functions on group change. */
|
|
if (!join->first_record)
|
|
{
|
|
/* No matching rows for group function */
|
|
|
|
List_iterator_fast<Item> it(*fields);
|
|
Item *item;
|
|
join->no_rows_in_result_called= 1;
|
|
|
|
join->clear(&cleared_tables);
|
|
while ((item= it++))
|
|
item->no_rows_in_result();
|
|
}
|
|
if (join->having && join->having->val_int() == 0)
|
|
error= -1; // Didn't satisfy having
|
|
else
|
|
{
|
|
if (join->do_send_rows)
|
|
{
|
|
error= join->result->send_data_with_check(*fields,
|
|
join->unit,
|
|
join->send_records);
|
|
if (unlikely(error < 0))
|
|
{
|
|
/* Duplicate row, don't count */
|
|
join->duplicate_rows++;
|
|
error= 0;
|
|
}
|
|
}
|
|
join->send_records++;
|
|
join->group_sent= true;
|
|
}
|
|
if (unlikely(join->rollup.state != ROLLUP::STATE_NONE && error <= 0))
|
|
{
|
|
if (join->rollup_send_data((uint) (idx+1)))
|
|
error= 1;
|
|
}
|
|
if (join->no_rows_in_result_called)
|
|
{
|
|
/* Restore null tables to original state */
|
|
join->no_rows_in_result_called= 0;
|
|
if (cleared_tables)
|
|
unclear_tables(join, &cleared_tables);
|
|
}
|
|
}
|
|
if (unlikely(error > 0))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
if (join->send_records >= join->unit->lim.get_select_limit() &&
|
|
join->do_send_rows)
|
|
{
|
|
/* WITH TIES can be computed during end_send_group if
|
|
the order by is a subset of group by and we had an index
|
|
available to compute group by order directly. */
|
|
if (!join->unit->lim.is_with_ties() ||
|
|
idx < (int)join->with_ties_order_count)
|
|
{
|
|
if (!(join->select_options & OPTION_FOUND_ROWS))
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
|
|
join->do_send_rows= 0;
|
|
join->unit->lim.set_unlimited();
|
|
}
|
|
}
|
|
else if (join->send_records >= join->fetch_limit)
|
|
{
|
|
/*
|
|
There is a server side cursor and all rows
|
|
for this fetch request are sent.
|
|
|
|
Preventing code duplication. When finished with the group reset
|
|
the group functions and copy_fields. We fall through. bug #11904
|
|
*/
|
|
ok_code= NESTED_LOOP_CURSOR_LIMIT;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
join->first_record=1;
|
|
(void) test_if_group_changed(join->group_fields);
|
|
}
|
|
if (idx < (int) join->send_group_parts)
|
|
{
|
|
/*
|
|
This branch is executed also for cursors which have finished their
|
|
fetch limit - the reason for ok_code.
|
|
*/
|
|
copy_fields(&join->tmp_table_param);
|
|
if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1]))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (join->procedure)
|
|
join->procedure->add();
|
|
join->group_sent= false;
|
|
join->accepted_rows++;
|
|
DBUG_RETURN(ok_code);
|
|
}
|
|
}
|
|
if (update_sum_func(join->sum_funcs))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
join->accepted_rows++;
|
|
if (join->procedure)
|
|
join->procedure->add();
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/* ARGSUSED */
|
|
static enum_nested_loop_state
|
|
end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *const table= join_tab->table;
|
|
DBUG_ENTER("end_write");
|
|
|
|
if (!end_of_records)
|
|
{
|
|
copy_fields(join_tab->tmp_table_param);
|
|
if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
|
|
if (likely(!join_tab->having || join_tab->having->val_int()))
|
|
{
|
|
int error;
|
|
join->found_records++;
|
|
join->accepted_rows++;
|
|
if ((error= table->file->ha_write_tmp_row(table->record[0])))
|
|
{
|
|
if (likely(!table->file->is_fatal_error(error, HA_CHECK_DUP)))
|
|
goto end; // Ignore duplicate keys
|
|
bool is_duplicate;
|
|
if (create_internal_tmp_table_from_heap(join->thd, table,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
error, 1, &is_duplicate))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
|
|
if (is_duplicate)
|
|
goto end;
|
|
}
|
|
if (++join_tab->send_records >=
|
|
join_tab->tmp_table_param->end_write_records &&
|
|
join->do_send_rows)
|
|
{
|
|
if (!(join->select_options & OPTION_FOUND_ROWS))
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
|
|
join->do_send_rows=0;
|
|
join->unit->lim.set_unlimited();
|
|
}
|
|
}
|
|
}
|
|
end:
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform GROUP BY operation over rows coming in arbitrary order: use
|
|
TemporaryTableWithPartialSums algorithm.
|
|
|
|
@detail
|
|
The TemporaryTableWithPartialSums algorithm is:
|
|
|
|
CREATE TEMPORARY TABLE tmp (
|
|
group_by_columns PRIMARY KEY,
|
|
partial_sum
|
|
);
|
|
|
|
for each row R in join output {
|
|
INSERT INTO tmp (R.group_by_columns, R.sum_value)
|
|
ON DUPLICATE KEY UPDATE partial_sum=partial_sum + R.sum_value;
|
|
}
|
|
|
|
@detail
|
|
Also applies HAVING, etc.
|
|
|
|
@seealso end_unique_update()
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *const table= join_tab->table;
|
|
ORDER *group;
|
|
int error;
|
|
DBUG_ENTER("end_update");
|
|
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
|
|
join->found_records++;
|
|
copy_fields(join_tab->tmp_table_param); // Groups are copied twice.
|
|
/* Make a key of group index */
|
|
for (group=table->group ; group ; group=group->next)
|
|
{
|
|
Item *item= *group->item;
|
|
if (group->fast_field_copier_setup != group->field)
|
|
{
|
|
DBUG_PRINT("info", ("new setup %p -> %p",
|
|
group->fast_field_copier_setup,
|
|
group->field));
|
|
group->fast_field_copier_setup= group->field;
|
|
group->fast_field_copier_func=
|
|
item->setup_fast_field_copier(group->field);
|
|
}
|
|
item->save_org_in_field(group->field, group->fast_field_copier_func);
|
|
/* Store in the used key if the field was 0 */
|
|
if (item->maybe_null())
|
|
group->buff[-1]= (char) group->field->is_null();
|
|
}
|
|
if (!table->file->ha_index_read_map(table->record[1],
|
|
join_tab->tmp_table_param->group_buff,
|
|
HA_WHOLE_KEY,
|
|
HA_READ_KEY_EXACT))
|
|
{ /* Update old record */
|
|
restore_record(table,record[1]);
|
|
update_tmptable_sum_func(join->sum_funcs,table);
|
|
if (unlikely((error= table->file->ha_update_tmp_row(table->record[1],
|
|
table->record[0]))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
goto end;
|
|
}
|
|
|
|
init_tmptable_sum_functions(join->sum_funcs);
|
|
if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy,
|
|
join->thd)))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
|
|
{
|
|
if (create_internal_tmp_table_from_heap(join->thd, table,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
error, 0, NULL))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
|
|
/* Change method to update rows */
|
|
if (unlikely((error= table->file->ha_index_init(0, 0))))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
|
|
join_tab->aggr->set_write_func(end_unique_update);
|
|
}
|
|
join_tab->send_records++;
|
|
end:
|
|
join->accepted_rows++; // For rownum()
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/**
|
|
Like end_update, but this is done with unique constraints instead of keys.
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *table= join_tab->table;
|
|
int error;
|
|
DBUG_ENTER("end_unique_update");
|
|
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
|
|
init_tmptable_sum_functions(join->sum_funcs);
|
|
copy_fields(join_tab->tmp_table_param); // Groups are copied twice.
|
|
if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
|
|
join->accepted_rows++;
|
|
if (likely(!(error= table->file->ha_write_tmp_row(table->record[0]))))
|
|
join_tab->send_records++; // New group
|
|
else
|
|
{
|
|
if (unlikely((int) table->file->get_dup_key(error) < 0))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
/* Prepare table for random positioning */
|
|
bool rnd_inited= (table->file->inited == handler::RND);
|
|
if (!rnd_inited &&
|
|
((error= table->file->ha_index_end()) ||
|
|
(error= table->file->ha_rnd_init(0))))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
if (unlikely(table->file->ha_rnd_pos(table->record[1],table->file->dup_ref)))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
restore_record(table,record[1]);
|
|
update_tmptable_sum_func(join->sum_funcs,table);
|
|
if (unlikely((error= table->file->ha_update_tmp_row(table->record[1],
|
|
table->record[0]))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
if (!rnd_inited &&
|
|
((error= table->file->ha_rnd_end()) ||
|
|
(error= table->file->ha_index_init(0, 0))))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
}
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
join->accepted_rows++; // For rownum()
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform OrderedGroupBy operation and write the output into the temporary
|
|
table (join_tab->table).
|
|
|
|
@detail
|
|
The input stream is ordered by the GROUP BY expression, so groups come
|
|
one after another. We only need to accumulate the aggregate value, when
|
|
a GROUP BY group ends, check the HAVING and write the group.
|
|
|
|
@seealso end_send_group() also implements OrderedGroupBy
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *table= join_tab->table;
|
|
int idx= -1;
|
|
DBUG_ENTER("end_write_group");
|
|
|
|
join->accepted_rows++;
|
|
if (!join->first_record || end_of_records ||
|
|
(idx=test_if_group_changed(join->group_fields)) >= 0)
|
|
{
|
|
if (join->first_record || (end_of_records && !join->group))
|
|
{
|
|
table_map cleared_tables= (table_map) 0;
|
|
if (join->procedure)
|
|
join->procedure->end_group();
|
|
int send_group_parts= join->send_group_parts;
|
|
if (idx < send_group_parts)
|
|
{
|
|
if (!join->first_record)
|
|
{
|
|
/* No matching rows for group function */
|
|
join->clear(&cleared_tables);
|
|
}
|
|
copy_sum_funcs(join->sum_funcs,
|
|
join->sum_funcs_end[send_group_parts]);
|
|
if (!join_tab->having || join_tab->having->val_int())
|
|
{
|
|
int error= table->file->ha_write_tmp_row(table->record[0]);
|
|
if (unlikely(error) &&
|
|
create_internal_tmp_table_from_heap(join->thd, table,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
error, 0, NULL))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
if (unlikely(join->rollup.state != ROLLUP::STATE_NONE))
|
|
{
|
|
if (unlikely(join->rollup_write_data((uint) (idx+1),
|
|
join_tab->tmp_table_param,
|
|
table)))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
}
|
|
if (cleared_tables)
|
|
unclear_tables(join, &cleared_tables);
|
|
if (end_of_records)
|
|
goto end;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (end_of_records)
|
|
goto end;
|
|
join->first_record=1;
|
|
(void) test_if_group_changed(join->group_fields);
|
|
}
|
|
if (idx < (int) join->send_group_parts)
|
|
{
|
|
copy_fields(join_tab->tmp_table_param);
|
|
if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy,
|
|
join->thd)))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (unlikely(init_sum_functions(join->sum_funcs,
|
|
join->sum_funcs_end[idx+1])))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (unlikely(join->procedure))
|
|
join->procedure->add();
|
|
goto end;
|
|
}
|
|
}
|
|
if (unlikely(update_sum_func(join->sum_funcs)))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (unlikely(join->procedure))
|
|
join->procedure->add();
|
|
end:
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Remove calculation with tables that aren't yet read. Remove also tests
|
|
against fields that are read through key where the table is not a
|
|
outer join table.
|
|
We can't remove tests that are made against columns which are stored
|
|
in sorted order.
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
Check if "left_item=right_item" equality is guaranteed to be true by use of
|
|
[eq]ref access on left_item->field->table.
|
|
|
|
SYNOPSIS
|
|
test_if_ref()
|
|
root_cond
|
|
left_item
|
|
right_item
|
|
|
|
DESCRIPTION
|
|
Check if the given "left_item = right_item" equality is guaranteed to be
|
|
true by use of [eq_]ref access method.
|
|
|
|
We need root_cond as we can't remove ON expressions even if employed ref
|
|
access guarantees that they are true. This is because TODO
|
|
|
|
RETURN
|
|
TRUE if right_item is used removable reference key on left_item
|
|
FALSE Otherwise
|
|
|
|
*/
|
|
|
|
bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
|
|
{
|
|
Field *field=left_item->field;
|
|
JOIN_TAB *join_tab= field->table->reginfo.join_tab;
|
|
// No need to change const test
|
|
if (!field->table->const_table && join_tab &&
|
|
!join_tab->is_ref_for_hash_join() &&
|
|
(!join_tab->first_inner ||
|
|
*join_tab->first_inner->on_expr_ref == root_cond))
|
|
{
|
|
/*
|
|
If ref access uses "Full scan on NULL key" (i.e. it actually alternates
|
|
between ref access and full table scan), then no equality can be
|
|
guaranteed to be true.
|
|
*/
|
|
if (join_tab->ref.is_access_triggered())
|
|
return FALSE;
|
|
|
|
Item *ref_item=part_of_refkey(field->table,field);
|
|
if (ref_item && (ref_item->eq(right_item,1) ||
|
|
ref_item->real_item()->eq(right_item,1)))
|
|
{
|
|
right_item= right_item->real_item();
|
|
if (right_item->type() == Item::FIELD_ITEM)
|
|
return (field->eq_def(((Item_field *) right_item)->field));
|
|
/* remove equalities injected by IN->EXISTS transformation */
|
|
else if (right_item->type() == Item::CACHE_ITEM)
|
|
return ((Item_cache *)right_item)->eq_def (field);
|
|
if (right_item->const_item() && !(right_item->is_null()))
|
|
{
|
|
/*
|
|
We can remove binary fields and numerical fields except float,
|
|
as float comparison isn't 100 % safe
|
|
We have to keep normal strings to be able to check for end spaces
|
|
*/
|
|
if (field->binary() &&
|
|
field->real_type() != MYSQL_TYPE_STRING &&
|
|
field->real_type() != MYSQL_TYPE_VARCHAR &&
|
|
(field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0))
|
|
{
|
|
return !right_item->save_in_field_no_warnings(field, 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0; // keep test
|
|
}
|
|
|
|
|
|
/**
|
|
Extract a condition that can be checked after reading given table
|
|
@fn make_cond_for_table()
|
|
|
|
@param cond Condition to analyze
|
|
@param tables Tables for which "current field values" are available
|
|
Tables for which "current field values" are available (this
|
|
includes used_table)
|
|
(may also include PSEUDO_TABLE_BITS, and may be zero)
|
|
@param used_table Table that we're extracting the condition for
|
|
@param join_tab_idx_arg
|
|
The index of the JOIN_TAB this Item is being extracted
|
|
for. MAX_TABLES if there is no corresponding JOIN_TAB.
|
|
@param exclude_expensive_cond
|
|
Do not push expensive conditions
|
|
@param retain_ref_cond
|
|
Retain ref conditions
|
|
|
|
@retval <>NULL Generated condition
|
|
@retval =NULL Already checked, OR error
|
|
|
|
@details
|
|
Extract the condition that can be checked after reading the table
|
|
specified in 'used_table', given that current-field values for tables
|
|
specified in 'tables' bitmap are available.
|
|
If 'used_table' is 0
|
|
- extract conditions for all tables in 'tables'.
|
|
- extract conditions are unrelated to any tables
|
|
in the same query block/level(i.e. conditions
|
|
which have used_tables == 0).
|
|
|
|
The function assumes that
|
|
- Constant parts of the condition has already been checked.
|
|
- Condition that could be checked for tables in 'tables' has already
|
|
been checked.
|
|
|
|
The function takes into account that some parts of the condition are
|
|
guaranteed to be true by employed 'ref' access methods (the code that
|
|
does this is located at the end, search down for "EQ_FUNC").
|
|
|
|
@note
|
|
Make sure to keep the implementations of make_cond_for_table() and
|
|
make_cond_after_sjm() synchronized.
|
|
make_cond_for_info_schema() uses similar algorithm as well.
|
|
*/
|
|
|
|
static Item *
|
|
make_cond_for_table(THD *thd, Item *cond, table_map tables,
|
|
table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond __attribute__((unused)),
|
|
bool retain_ref_cond)
|
|
{
|
|
return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table,
|
|
join_tab_idx_arg,
|
|
exclude_expensive_cond,
|
|
retain_ref_cond, true);
|
|
}
|
|
|
|
|
|
static Item *
|
|
make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
|
|
table_map tables, table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond __attribute__
|
|
((unused)),
|
|
bool retain_ref_cond,
|
|
bool is_top_and_level)
|
|
|
|
{
|
|
table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
|
|
|
|
if (used_table && !(cond->used_tables() & used_table))
|
|
return (COND*) 0; // Already checked
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
/* Create new top level AND item */
|
|
Item_cond_and *new_cond=new (thd->mem_root) Item_cond_and(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
/*
|
|
Special handling of top level conjuncts with RAND_TABLE_BIT:
|
|
if such a conjunct contains a reference to a field that is not
|
|
an outer field then it is pushed to the corresponding table by
|
|
the same rule as all other conjuncts. Otherwise, if the conjunct
|
|
is used in WHERE is is pushed to the last joined table, if is it
|
|
is used in ON condition of an outer join it is pushed into the
|
|
last inner table of the outer join. Such conjuncts are pushed in
|
|
a call of make_cond_for_table_from_pred() with the
|
|
parameter 'used_table' equal to PSEUDO_TABLE_BITS.
|
|
*/
|
|
if (is_top_and_level && used_table == rand_table_bit &&
|
|
(item->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
|
|
{
|
|
/* The conjunct with RAND_TABLE_BIT has been allready pushed */
|
|
continue;
|
|
}
|
|
Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
|
|
tables, used_table,
|
|
join_tab_idx_arg,
|
|
exclude_expensive_cond,
|
|
retain_ref_cond, false);
|
|
if (fix)
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
else if (thd->is_error())
|
|
return ((COND*) 0);
|
|
}
|
|
switch (new_cond->argument_list()->elements) {
|
|
case 0:
|
|
return (COND*) 0; // Always true
|
|
case 1:
|
|
return new_cond->argument_list()->head();
|
|
default:
|
|
/*
|
|
Call fix_fields to propagate all properties of the children to
|
|
the new parent Item. This should not be expensive because all
|
|
children of Item_cond_and should be fixed by now.
|
|
*/
|
|
if (new_cond->fix_fields(thd, 0))
|
|
return (COND*) 0;
|
|
new_cond->used_tables_cache=
|
|
((Item_cond_and*) cond)->used_tables_cache &
|
|
tables;
|
|
return new_cond;
|
|
}
|
|
}
|
|
else
|
|
{ // Or list
|
|
if (is_top_and_level && used_table == rand_table_bit &&
|
|
(cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
|
|
{
|
|
/* This top level formula with RAND_TABLE_BIT has been already pushed */
|
|
return (COND*) 0;
|
|
}
|
|
|
|
Item_cond_or *new_cond=new (thd->mem_root) Item_cond_or(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
|
|
tables, 0L,
|
|
join_tab_idx_arg,
|
|
exclude_expensive_cond,
|
|
retain_ref_cond, false);
|
|
if (!fix)
|
|
return (COND*) 0; // Always true or error
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
/*
|
|
Call fix_fields to propagate all properties of the children to
|
|
the new parent Item. This should not be expensive because all
|
|
children of Item_cond_and should be fixed by now.
|
|
*/
|
|
if (new_cond->fix_fields(thd, 0))
|
|
return (COND*) 0;
|
|
new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
|
|
new_cond->top_level_item();
|
|
return new_cond;
|
|
}
|
|
}
|
|
else if (cond->basic_const_item())
|
|
return cond;
|
|
|
|
if (is_top_and_level && used_table == rand_table_bit &&
|
|
(cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
|
|
{
|
|
/* This top level formula with RAND_TABLE_BIT has been already pushed */
|
|
return (COND*) 0;
|
|
}
|
|
|
|
/*
|
|
Because the following test takes a while and it can be done
|
|
table_count times, we mark each item that we have examined with the result
|
|
of the test
|
|
*/
|
|
if ((cond->marker == MARKER_CHECK_ON_READ && !retain_ref_cond) ||
|
|
(cond->used_tables() & ~tables))
|
|
return (COND*) 0; // Can't check this yet
|
|
|
|
if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK)
|
|
{
|
|
cond->set_join_tab_idx((uint8) join_tab_idx_arg);
|
|
return cond; // Not boolean op
|
|
}
|
|
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
|
|
{
|
|
Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
|
|
Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
|
|
if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
|
|
test_if_ref(root_cond, (Item_field*) left_item,right_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ; // Checked when read
|
|
return (COND*) 0;
|
|
}
|
|
if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
|
|
test_if_ref(root_cond, (Item_field*) right_item,left_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ; // Checked when read
|
|
return (COND*) 0;
|
|
}
|
|
}
|
|
cond->marker= MARKER_PROCESSED;
|
|
cond->set_join_tab_idx((uint8) join_tab_idx_arg);
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
The difference of this from make_cond_for_table() is that we're in the
|
|
following state:
|
|
1. conditions referring to 'tables' have been checked
|
|
2. conditions referring to sjm_tables have been checked, too
|
|
3. We need condition that couldn't be checked in #1 or #2 but
|
|
can be checked when we get both (tables | sjm_tables).
|
|
|
|
*/
|
|
static COND *
|
|
make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables,
|
|
table_map sjm_tables, bool inside_or_clause)
|
|
{
|
|
/*
|
|
We assume that conditions that refer to only join prefix tables or
|
|
sjm_tables have already been checked.
|
|
*/
|
|
if (!inside_or_clause)
|
|
{
|
|
table_map cond_used_tables= cond->used_tables();
|
|
if((!(cond_used_tables & ~tables) ||
|
|
!(cond_used_tables & ~sjm_tables)))
|
|
return (COND*) 0; // Already checked
|
|
}
|
|
|
|
/* AND/OR recursive descent */
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
/* Create new top level AND item */
|
|
Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix=make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables,
|
|
inside_or_clause);
|
|
if (fix)
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
switch (new_cond->argument_list()->elements) {
|
|
case 0:
|
|
return (COND*) 0; // Always true
|
|
case 1:
|
|
return new_cond->argument_list()->head();
|
|
default:
|
|
/*
|
|
Item_cond_and do not need fix_fields for execution, its parameters
|
|
are fixed or do not need fix_fields, too
|
|
*/
|
|
new_cond->quick_fix_field();
|
|
new_cond->used_tables_cache=
|
|
((Item_cond_and*) cond)->used_tables_cache &
|
|
tables;
|
|
return new_cond;
|
|
}
|
|
}
|
|
else
|
|
{ // Or list
|
|
Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix= make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables,
|
|
/*inside_or_clause= */TRUE);
|
|
if (!fix)
|
|
return (COND*) 0; // Always true
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
/*
|
|
Item_cond_or do not need fix_fields for execution, its parameters
|
|
are fixed or do not need fix_fields, too
|
|
*/
|
|
new_cond->quick_fix_field();
|
|
new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
|
|
new_cond->top_level_item();
|
|
return new_cond;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Because the following test takes a while and it can be done
|
|
table_count times, we mark each item that we have examined with the result
|
|
of the test
|
|
*/
|
|
|
|
if (cond->marker == MARKER_CHECK_ON_READ ||
|
|
(cond->used_tables() & ~(tables | sjm_tables)))
|
|
return (COND*) 0; // Can't check this yet
|
|
if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK)
|
|
return cond; // Not boolean op
|
|
|
|
/*
|
|
Remove equalities that are guaranteed to be true by use of 'ref' access
|
|
method
|
|
*/
|
|
if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
|
|
{
|
|
Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
|
|
Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
|
|
if (left_item->type() == Item::FIELD_ITEM &&
|
|
test_if_ref(root_cond, (Item_field*) left_item,right_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ;
|
|
return (COND*) 0;
|
|
}
|
|
if (right_item->type() == Item::FIELD_ITEM &&
|
|
test_if_ref(root_cond, (Item_field*) right_item,left_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ;
|
|
return (COND*) 0;
|
|
}
|
|
}
|
|
cond->marker= MARKER_PROCESSED;
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
|
|
Check if
|
|
- @table uses "ref"-like access
|
|
- it is based on "@field=certain_item" equality
|
|
- the equality will be true for any record returned by the access method
|
|
and return the certain_item if yes.
|
|
|
|
@detail
|
|
|
|
Equality won't necessarily hold if:
|
|
- the used index covers only part of the @field.
|
|
Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup
|
|
for 'abc', you will get both record with 'abc' and with 'abcde'.
|
|
- The type of access is actually ref_or_null, and so @field can be either
|
|
a value or NULL.
|
|
|
|
@return
|
|
Item that the field will be equal to
|
|
NULL if no such item
|
|
*/
|
|
|
|
static Item *
|
|
part_of_refkey(TABLE *table,Field *field)
|
|
{
|
|
JOIN_TAB *join_tab= table->reginfo.join_tab;
|
|
if (!join_tab)
|
|
return (Item*) 0; // field from outer non-select (UPDATE,...)
|
|
|
|
uint ref_parts= join_tab->ref.key_parts;
|
|
if (ref_parts) /* if it's ref/eq_ref/ref_or_null */
|
|
{
|
|
uint key= join_tab->ref.key;
|
|
KEY *key_info= join_tab->get_keyinfo_by_key_no(key);
|
|
KEY_PART_INFO *key_part= key_info->key_part;
|
|
|
|
for (uint part=0 ; part < ref_parts ; part++,key_part++)
|
|
{
|
|
if (field->eq(key_part->field))
|
|
{
|
|
/*
|
|
Found the field in the key. Check that
|
|
1. ref_or_null doesn't alternate this component between a value and
|
|
a NULL
|
|
2. index fully covers the key
|
|
*/
|
|
if (part != join_tab->ref.null_ref_part && // (1)
|
|
!(key_part->key_part_flag & HA_PART_KEY_SEG)) // (2)
|
|
{
|
|
return join_tab->ref.items[part];
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return (Item*) 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Test if one can use the key to resolve ORDER BY.
|
|
|
|
@param join if not NULL, can use the join's top-level
|
|
multiple-equalities.
|
|
@param order Sort order
|
|
@param table Table to sort
|
|
@param idx Index to check
|
|
@param used_key_parts [out] NULL by default, otherwise return value for
|
|
used key parts.
|
|
|
|
|
|
@note
|
|
used_key_parts is set to correct key parts used if return value != 0
|
|
(On other cases, used_key_part may be changed)
|
|
Note that the value may actually be greater than the number of index
|
|
key parts. This can happen for storage engines that have the primary
|
|
key parts as a suffix for every secondary key.
|
|
|
|
@retval
|
|
1 key is ok.
|
|
@retval
|
|
0 Key can't be used
|
|
@retval
|
|
-1 Reverse key can be used
|
|
*/
|
|
|
|
static int test_if_order_by_key(JOIN *join,
|
|
ORDER *order, TABLE *table, uint idx,
|
|
uint *used_key_parts)
|
|
{
|
|
KEY_PART_INFO *key_part,*key_part_end;
|
|
key_part=table->key_info[idx].key_part;
|
|
key_part_end=key_part + table->key_info[idx].ext_key_parts;
|
|
key_part_map const_key_parts=table->const_key_parts[idx];
|
|
uint user_defined_kp= table->key_info[idx].user_defined_key_parts;
|
|
int reverse=0;
|
|
uint key_parts;
|
|
bool have_pk_suffix= false;
|
|
uint pk= table->s->primary_key;
|
|
ORDER::enum_order keypart_order;
|
|
DBUG_ENTER("test_if_order_by_key");
|
|
|
|
if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
|
|
table->key_info[idx].ext_key_part_map &&
|
|
pk != MAX_KEY && pk != idx)
|
|
{
|
|
have_pk_suffix= true;
|
|
}
|
|
|
|
for (; order ; order=order->next, const_key_parts>>=1)
|
|
{
|
|
Item_field *item_field= ((Item_field*) (*order->item)->real_item());
|
|
int flag;
|
|
|
|
/*
|
|
Skip key parts that are constants in the WHERE clause.
|
|
These are already skipped in the ORDER BY by const_expression_in_where()
|
|
for top level queries.
|
|
*/
|
|
for (; const_key_parts & 1 ; const_key_parts>>= 1)
|
|
{
|
|
if (item_field->contains(key_part->field))
|
|
{
|
|
/* Subquery with ORDER BY, continue with next field */
|
|
goto next_order_field;
|
|
}
|
|
key_part++;
|
|
}
|
|
|
|
/*
|
|
This check was in this function historically (although I think it's
|
|
better to check it outside of this function):
|
|
|
|
"Test if the primary key parts were all const (i.e. there's one row).
|
|
The sorting doesn't matter"
|
|
|
|
So, we're checking that
|
|
(1) this is an extended key
|
|
(2) we've reached its end
|
|
*/
|
|
key_parts= (uint)(key_part - table->key_info[idx].key_part);
|
|
if (have_pk_suffix &&
|
|
reverse == 0 && // all were =const so far
|
|
key_parts == table->key_info[idx].ext_key_parts &&
|
|
table->const_key_parts[pk] == PREV_BITS(uint,
|
|
table->key_info[pk].
|
|
user_defined_key_parts))
|
|
{
|
|
key_parts= 0;
|
|
reverse= 1; // Key is ok to use
|
|
goto ok;
|
|
}
|
|
|
|
if (key_part == key_part_end ||
|
|
!key_part->field->part_of_sortkey.is_set(idx))
|
|
{
|
|
/*
|
|
There are some items left in ORDER BY that we don't have in the key
|
|
*/
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
if (!item_field->contains(key_part->field))
|
|
DBUG_RETURN(0);
|
|
|
|
keypart_order= ((key_part->key_part_flag & HA_REVERSE_SORT) ?
|
|
ORDER::ORDER_DESC : ORDER::ORDER_ASC);
|
|
/* set flag to 1 if we can use read-next on key, else to -1 */
|
|
flag= (order->direction == keypart_order) ? 1 : -1;
|
|
if (reverse && flag != reverse)
|
|
DBUG_RETURN(0);
|
|
reverse=flag; // Remember if reverse
|
|
|
|
next_order_field:
|
|
if (key_part < key_part_end)
|
|
key_part++;
|
|
}
|
|
|
|
key_parts= (uint) (key_part - table->key_info[idx].key_part);
|
|
|
|
if (reverse == -1 &&
|
|
!(table->file->index_flags(idx, user_defined_kp-1, 1) & HA_READ_PREV))
|
|
reverse= 0; // Index can't be used
|
|
|
|
if (have_pk_suffix && reverse == -1)
|
|
{
|
|
uint pk_parts= table->key_info[pk].user_defined_key_parts;
|
|
if (!(table->file->index_flags(pk, pk_parts-1, 1) & HA_READ_PREV))
|
|
reverse= 0; // Index can't be used
|
|
}
|
|
|
|
ok:
|
|
*used_key_parts= key_parts;
|
|
DBUG_RETURN(reverse);
|
|
}
|
|
|
|
|
|
/**
|
|
Find shortest key suitable for full table scan.
|
|
|
|
@param table Table to scan
|
|
@param usable_keys Allowed keys
|
|
|
|
@return
|
|
MAX_KEY no suitable key found
|
|
key index otherwise
|
|
|
|
@notes
|
|
We should not use keyread_time() as in the case of disk_read_cost= 0
|
|
all keys would be regarded equal.
|
|
*/
|
|
|
|
uint find_shortest_key(TABLE *table, const key_map *usable_keys)
|
|
{
|
|
size_t min_length= INT_MAX32;
|
|
uint best= MAX_KEY;
|
|
uint possible_keys= usable_keys->bits_set();
|
|
|
|
if (possible_keys)
|
|
{
|
|
if (possible_keys == 1)
|
|
return usable_keys->find_first_bit();
|
|
|
|
for (uint nr=0; nr < table->s->keys ; nr++)
|
|
{
|
|
if (usable_keys->is_set(nr))
|
|
{
|
|
size_t length= table->key_storage_length(nr);
|
|
if (length < min_length)
|
|
{
|
|
min_length= length;
|
|
best= nr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return best;
|
|
}
|
|
|
|
|
|
/**
|
|
Test if a second key is the subkey of the first one.
|
|
|
|
@param key_part First key parts
|
|
@param ref_key_part Second key parts
|
|
@param ref_key_part_end Last+1 part of the second key
|
|
|
|
@note
|
|
Second key MUST be shorter than the first one.
|
|
|
|
@retval
|
|
1 is a subkey
|
|
@retval
|
|
0 no sub key
|
|
*/
|
|
|
|
inline bool
|
|
is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
|
|
KEY_PART_INFO *ref_key_part_end)
|
|
{
|
|
for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
|
|
if (!key_part->field->eq(ref_key_part->field))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
Test if we can use one of the 'usable_keys' instead of 'ref' key
|
|
for sorting.
|
|
|
|
@param ref Number of key, used for WHERE clause
|
|
@param usable_keys Keys for testing
|
|
|
|
@return
|
|
- MAX_KEY If we can't use other key
|
|
- the number of found key Otherwise
|
|
*/
|
|
|
|
static uint
|
|
test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
|
|
const key_map *usable_keys)
|
|
{
|
|
uint nr;
|
|
uint min_length= (uint) ~0;
|
|
uint best= MAX_KEY;
|
|
KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
|
|
KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
|
|
|
|
/*
|
|
Find the shortest key that
|
|
- produces the required ordering
|
|
- has key #ref (up to ref_key_parts) as its subkey.
|
|
*/
|
|
for (nr= 0 ; nr < table->s->keys ; nr++)
|
|
{
|
|
uint not_used;
|
|
if (usable_keys->is_set(nr) &&
|
|
table->key_info[nr].key_length < min_length &&
|
|
table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
|
|
is_subkey(table->key_info[nr].key_part, ref_key_part,
|
|
ref_key_part_end) &&
|
|
test_if_order_by_key(NULL, order, table, nr, ¬_used))
|
|
{
|
|
min_length= table->key_info[nr].key_length;
|
|
best= nr;
|
|
}
|
|
}
|
|
return best;
|
|
}
|
|
|
|
|
|
/**
|
|
Check if GROUP BY/DISTINCT can be optimized away because the set is
|
|
already known to be distinct.
|
|
|
|
Used in removing the GROUP BY/DISTINCT of the following types of
|
|
statements:
|
|
@code
|
|
SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
|
|
[GROUP BY <unique_key_cols>,...]
|
|
@endcode
|
|
|
|
If (a,b,c is distinct)
|
|
then <any combination of a,b,c>,{whatever} is also distinct
|
|
|
|
This function checks if all the key parts of any of the unique keys
|
|
of the table are referenced by a list : either the select list
|
|
through find_field_in_item_list or GROUP BY list through
|
|
find_field_in_order_list.
|
|
If the above holds and the key parts cannot contain NULLs then we
|
|
can safely remove the GROUP BY/DISTINCT,
|
|
as no result set can be more distinct than an unique key.
|
|
|
|
@param table The table to operate on.
|
|
@param find_func function to iterate over the list and search
|
|
for a field
|
|
|
|
@retval
|
|
1 found
|
|
@retval
|
|
0 not found.
|
|
*/
|
|
|
|
static bool
|
|
list_contains_unique_index(TABLE *table,
|
|
bool (*find_func) (Field *, void *), void *data)
|
|
{
|
|
for (uint keynr= 0; keynr < table->s->keys; keynr++)
|
|
{
|
|
if (keynr == table->s->primary_key ||
|
|
(table->key_info[keynr].flags & HA_NOSAME))
|
|
{
|
|
KEY *keyinfo= table->key_info + keynr;
|
|
KEY_PART_INFO *key_part, *key_part_end;
|
|
|
|
for (key_part=keyinfo->key_part,
|
|
key_part_end=key_part+ keyinfo->user_defined_key_parts;
|
|
key_part < key_part_end;
|
|
key_part++)
|
|
{
|
|
if (key_part->field->maybe_null() ||
|
|
!find_func(key_part->field, data))
|
|
break;
|
|
}
|
|
if (key_part == key_part_end)
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Helper function for list_contains_unique_index.
|
|
Find a field reference in a list of ORDER structures.
|
|
Finds a direct reference of the Field in the list.
|
|
|
|
@param field The field to search for.
|
|
@param data ORDER *.The list to search in
|
|
|
|
@retval
|
|
1 found
|
|
@retval
|
|
0 not found.
|
|
*/
|
|
|
|
static bool
|
|
find_field_in_order_list (Field *field, void *data)
|
|
{
|
|
ORDER *group= (ORDER *) data;
|
|
bool part_found= 0;
|
|
for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
|
|
{
|
|
Item *item= (*tmp_group->item)->real_item();
|
|
if (item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*) item)->field->eq(field))
|
|
{
|
|
part_found= 1;
|
|
break;
|
|
}
|
|
}
|
|
return part_found;
|
|
}
|
|
|
|
|
|
/**
|
|
Helper function for list_contains_unique_index.
|
|
Find a field reference in a dynamic list of Items.
|
|
Finds a direct reference of the Field in the list.
|
|
|
|
@param[in] field The field to search for.
|
|
@param[in] data List<Item> *.The list to search in
|
|
|
|
@retval
|
|
1 found
|
|
@retval
|
|
0 not found.
|
|
*/
|
|
|
|
static bool
|
|
find_field_in_item_list (Field *field, void *data)
|
|
{
|
|
List<Item> *fields= (List<Item> *) data;
|
|
bool part_found= 0;
|
|
List_iterator<Item> li(*fields);
|
|
Item *item;
|
|
|
|
while ((item= li++))
|
|
{
|
|
if (item->real_item()->type() == Item::FIELD_ITEM &&
|
|
((Item_field*) (item->real_item()))->field->eq(field))
|
|
{
|
|
part_found= 1;
|
|
break;
|
|
}
|
|
}
|
|
return part_found;
|
|
}
|
|
|
|
|
|
/*
|
|
Fill *col_keys with a union of Field::part_of_sortkey of all fields
|
|
that belong to 'table' and are equal to 'item_field'.
|
|
*/
|
|
|
|
static
|
|
void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table,
|
|
Item_field *item_field,
|
|
key_map *col_keys)
|
|
{
|
|
col_keys->clear_all();
|
|
col_keys->merge(item_field->field->part_of_sortkey);
|
|
|
|
if (!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP))
|
|
return;
|
|
|
|
Item_equal *item_eq= NULL;
|
|
|
|
if (item_field->item_equal)
|
|
{
|
|
/*
|
|
The item_field is from ORDER structure, but it already has an item_equal
|
|
pointer set (UseMultipleEqualitiesToRemoveTempTable code have set it)
|
|
*/
|
|
item_eq= item_field->item_equal;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Walk through join's muliple equalities and find the one that contains
|
|
item_field.
|
|
*/
|
|
if (!join->cond_equal)
|
|
return;
|
|
table_map needed_tbl_map= item_field->used_tables() | table->map;
|
|
List_iterator<Item_equal> li(join->cond_equal->current_level);
|
|
Item_equal *cur_item_eq;
|
|
while ((cur_item_eq= li++))
|
|
{
|
|
if ((cur_item_eq->used_tables() & needed_tbl_map) &&
|
|
cur_item_eq->contains(item_field->field))
|
|
{
|
|
item_eq= cur_item_eq;
|
|
item_field->item_equal= item_eq; // Save the pointer to our Item_equal.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (item_eq)
|
|
{
|
|
Item_equal_fields_iterator it(*item_eq);
|
|
Item *item;
|
|
/* Loop through other members that belong to table table */
|
|
while ((item= it++))
|
|
{
|
|
if (item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*)item)->field->table == table)
|
|
{
|
|
col_keys->merge(((Item_field*)item)->field->part_of_sortkey);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
This is called when switching table access to produce records
|
|
in reverse order.
|
|
|
|
@detail
|
|
- Disable "Range checked for each record" (Is this strictly necessary
|
|
here?)
|
|
- Disable Index Condition Pushdown and Rowid Filtering.
|
|
|
|
IndexConditionPushdownAndReverseScans, RowidFilteringAndReverseScans:
|
|
Suppose we're computing
|
|
|
|
select * from t1
|
|
where
|
|
key1 between 10 and 20 and extra_condition
|
|
order by key1 desc
|
|
|
|
here the range access uses a reverse-ordered scan on (1 <= key1 <= 10) and
|
|
extra_condition is checked by either ICP or Rowid Filtering.
|
|
|
|
Also suppose that extra_condition happens to be false for rows of t1 that
|
|
do not satisfy the "10 <= key1 <= 20" condition.
|
|
|
|
For forward ordered range scan, the SQL layer will make these calls:
|
|
|
|
h->read_range_first(RANGE(10 <= key1 <= 20));
|
|
while (h->read_range_next()) { ... }
|
|
|
|
The storage engine sees the end endpoint of "key1<=20" and can stop scanning
|
|
as soon as it encounters a row with key1>20.
|
|
|
|
For backward-ordered range scan, the SQL layer will make these calls:
|
|
|
|
h->index_read_map(key1=20, HA_READ_PREFIX_LAST_OR_PREV);
|
|
while (h->index_prev()) {
|
|
if (cmp_key(h->record, "key1=10" )<0)
|
|
break; // end of range
|
|
...
|
|
}
|
|
|
|
Note that the check whether we've walked beyond the key=10 endpoint is
|
|
made at the SQL layer. The storage engine has no information about the left
|
|
endpoint of the interval we're scanning. If all rows before that endpoint
|
|
do not satisfy ICP condition or do not pass the Rowid Filter, the storage
|
|
engine will enumerate the records until the table start.
|
|
|
|
In MySQL, the API is extended with set_end_range() call so that the storage
|
|
engine "knows" when to stop scanning.
|
|
*/
|
|
|
|
static void prepare_for_reverse_ordered_access(JOIN_TAB *tab)
|
|
{
|
|
/* Cancel "Range checked for each record" */
|
|
if (tab->use_quick == 2)
|
|
{
|
|
tab->use_quick= 1;
|
|
tab->read_first_record= join_init_read_record;
|
|
}
|
|
/*
|
|
Cancel Pushed Index Condition, as it doesn't work for reverse scans.
|
|
*/
|
|
if (tab->select && tab->select->pre_idx_push_select_cond)
|
|
{
|
|
tab->set_cond(tab->select->pre_idx_push_select_cond);
|
|
tab->table->file->cancel_pushed_idx_cond();
|
|
}
|
|
/*
|
|
The same with Rowid Filter: it doesn't work with reverse scans so cancel
|
|
it, too.
|
|
*/
|
|
{
|
|
/*
|
|
Rowid Filter is initialized at a later stage. It is not pushed to
|
|
the storage engine yet:
|
|
*/
|
|
DBUG_ASSERT(!tab->table->file->pushed_rowid_filter);
|
|
tab->range_rowid_filter_info= NULL;
|
|
delete tab->rowid_filter;
|
|
tab->rowid_filter= NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Given a table and order, find indexes that produce rows in the order
|
|
|
|
@param usable_keys IN Bitmap of keys we can use
|
|
OUT Bitmap of indexes that produce rows in order.
|
|
|
|
@return
|
|
false Some indexes were found
|
|
true No indexes found
|
|
*/
|
|
|
|
static
|
|
bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
|
|
key_map *usable_keys)
|
|
{
|
|
/* Find indexes that cover all ORDER/GROUP BY fields */
|
|
for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
|
|
{
|
|
Item *item= (*tmp_order->item)->real_item();
|
|
if (item->type() != Item::FIELD_ITEM)
|
|
{
|
|
usable_keys->clear_all();
|
|
return true; /* No suitable keys */
|
|
}
|
|
|
|
/*
|
|
Take multiple-equalities into account. Suppose we have
|
|
ORDER BY col1, col10
|
|
and there are
|
|
multiple-equal(col1, col2, col3),
|
|
multiple-equal(col10, col11).
|
|
|
|
Then,
|
|
- when item=col1, we find the set of indexes that cover one of {col1,
|
|
col2, col3}
|
|
- when item=col10, we find the set of indexes that cover one of {col10,
|
|
col11}
|
|
|
|
And we compute an intersection of these sets to find set of indexes that
|
|
cover all ORDER BY components.
|
|
*/
|
|
key_map col_keys;
|
|
compute_part_of_sort_key_for_equals(join, table, (Item_field*)item,
|
|
&col_keys);
|
|
usable_keys->intersect(col_keys);
|
|
if (usable_keys->is_clear_all())
|
|
return true; // No usable keys
|
|
}
|
|
return false;
|
|
|
|
}
|
|
|
|
/**
|
|
Test if we can skip the ORDER BY by using an index.
|
|
|
|
If we can use an index, the JOIN_TAB / tab->select struct
|
|
is changed to use the index.
|
|
|
|
The index must cover all fields in <order>, or it will not be considered.
|
|
|
|
@param no_changes No changes will be made to the query plan.
|
|
@param fatal_error OUT A fatal error occurred
|
|
|
|
@todo
|
|
- sergeyp: Results of all index merge selects actually are ordered
|
|
by clustered PK values.
|
|
|
|
@retval
|
|
0 We have to use filesort to do the sorting
|
|
@retval
|
|
1 We can use an index.
|
|
*/
|
|
|
|
static bool
|
|
test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
|
bool no_changes, const key_map *map, bool *fatal_error)
|
|
{
|
|
int ref_key;
|
|
uint UNINIT_VAR(ref_key_parts);
|
|
int order_direction= 0;
|
|
uint used_key_parts= 0;
|
|
TABLE *table=tab->table;
|
|
SQL_SELECT *select=tab->select;
|
|
key_map usable_keys;
|
|
QUICK_SELECT_I *save_quick= select ? select->quick : 0;
|
|
Item *orig_cond= 0;
|
|
bool orig_cond_saved= false;
|
|
int best_key= -1;
|
|
bool changed_key= false;
|
|
THD *thd= tab->join->thd;
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_array trace_arr(thd, "test_if_skip_sort_order");
|
|
DBUG_ENTER("test_if_skip_sort_order");
|
|
|
|
*fatal_error= false;
|
|
/* Check that we are always called with first non-const table */
|
|
DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
|
|
|
|
/* Sorting a single row can always be skipped */
|
|
if (tab->type == JT_EQ_REF ||
|
|
tab->type == JT_CONST ||
|
|
tab->type == JT_SYSTEM)
|
|
{
|
|
Json_writer_object trace_skip(thd);
|
|
trace_skip.add("skipped", "single row access method");
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
|
|
been taken into account.
|
|
*/
|
|
usable_keys= *map;
|
|
|
|
// Step #1: Find indexes that produce the required ordering.
|
|
if (find_indexes_matching_order(tab->join, table, order, &usable_keys))
|
|
{
|
|
DBUG_RETURN(false); // Cannot skip sorting
|
|
}
|
|
|
|
/*
|
|
Step #2: Analyze the current access method. Note the used index as ref_key
|
|
and #used keyparts in ref_key_parts.
|
|
*/
|
|
ref_key= -1;
|
|
/* Test if constant range in WHERE */
|
|
if (tab->ref.key >= 0 && tab->ref.key_parts)
|
|
{
|
|
ref_key= tab->ref.key;
|
|
ref_key_parts= tab->ref.key_parts;
|
|
/*
|
|
todo: why does JT_REF_OR_NULL mean filesort? We could find another index
|
|
that satisfies the ordering. I would just set ref_key=MAX_KEY here...
|
|
*/
|
|
if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT ||
|
|
tab->ref.uses_splitting)
|
|
goto use_filesort;
|
|
}
|
|
else if (select && select->quick) // Range found by opt_range
|
|
{
|
|
int quick_type= select->quick->get_type();
|
|
/*
|
|
assume results are not ordered when index merge is used
|
|
TODO: sergeyp: Results of all index merge selects actually are ordered
|
|
by clustered PK values.
|
|
*/
|
|
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
|
|
{
|
|
/*
|
|
we set ref_key=MAX_KEY instead of -1, because test_if_cheaper_ordering()
|
|
assumes that "ref_key==-1" means doing full index scan.
|
|
(This is not very straightforward and we got into this situation for
|
|
historical reasons. Should be fixed at some point).
|
|
*/
|
|
ref_key= MAX_KEY;
|
|
}
|
|
else
|
|
{
|
|
ref_key= select->quick->index;
|
|
ref_key_parts= select->quick->used_key_parts;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Step #3: Check if index ref_key that we're using produces the required
|
|
ordering or if there is another index new_ref_key such that
|
|
- ref_key is a prefix of new_ref_key (so, access method can be reused)
|
|
- new_ref_key produces the required ordering
|
|
*/
|
|
if (ref_key >= 0 && ref_key != MAX_KEY)
|
|
{
|
|
/* Current access method uses index ref_key with ref_key_parts parts */
|
|
if (!usable_keys.is_set(ref_key))
|
|
{
|
|
/* However, ref_key doesn't match the needed ordering */
|
|
uint new_ref_key;
|
|
|
|
/*
|
|
If using index only read, only consider other possible index only
|
|
keys
|
|
*/
|
|
if (table->covering_keys.is_set(ref_key))
|
|
usable_keys.intersect(table->covering_keys);
|
|
if (tab->pre_idx_push_select_cond)
|
|
{
|
|
orig_cond= tab->set_cond(tab->pre_idx_push_select_cond);
|
|
orig_cond_saved= true;
|
|
}
|
|
|
|
if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
|
|
&usable_keys)) < MAX_KEY)
|
|
{
|
|
/*
|
|
Index new_ref_key
|
|
- produces the required ordering,
|
|
- also has the same columns as ref_key for #ref_key_parts (this
|
|
means we will read the same number of rows as with ref_key).
|
|
*/
|
|
|
|
/*
|
|
If new_ref_key allows to construct a quick select which uses more key
|
|
parts than ref(new_ref_key) would, do that.
|
|
|
|
Otherwise, construct a ref access (todo: it's not clear what is the
|
|
win in using ref access when we could use quick select also?)
|
|
*/
|
|
if ((table->opt_range_keys.is_set(new_ref_key) &&
|
|
table->opt_range[new_ref_key].key_parts > ref_key_parts) ||
|
|
!(tab->ref.key >= 0))
|
|
{
|
|
/*
|
|
The range optimizer constructed QUICK_RANGE for ref_key, and
|
|
we want to use instead new_ref_key as the index. We can't
|
|
just change the index of the quick select, because this may
|
|
result in an inconsistent QUICK_SELECT object. Below we
|
|
create a new QUICK_SELECT from scratch so that all its
|
|
parameters are set correctly by the range optimizer.
|
|
*/
|
|
key_map new_ref_key_map;
|
|
COND *save_cond;
|
|
quick_select_return res;
|
|
new_ref_key_map.clear_all(); // Force the creation of quick select
|
|
new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
|
|
|
|
/* Reset quick; This will be restored in 'use_filesort' if needed */
|
|
select->quick= 0;
|
|
save_cond= select->cond;
|
|
if (select->pre_idx_push_select_cond)
|
|
select->cond= select->pre_idx_push_select_cond;
|
|
res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
|
|
(tab->join->select_options &
|
|
OPTION_FOUND_ROWS) ?
|
|
HA_POS_ERROR :
|
|
tab->join->unit->
|
|
lim.get_select_limit(),
|
|
TRUE, TRUE, FALSE, FALSE,
|
|
Item_func::BITMAP_ALL);
|
|
// if we cannot use quick select
|
|
if (res != SQL_SELECT::OK || !tab->select->quick)
|
|
{
|
|
if (res == SQL_SELECT::ERROR)
|
|
*fatal_error= true;
|
|
select->cond= save_cond;
|
|
goto use_filesort;
|
|
}
|
|
tab->type= JT_RANGE;
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts= 0;
|
|
tab->use_quick= 1;
|
|
best_key= new_ref_key;
|
|
/*
|
|
We don't restore select->cond as we want to use the
|
|
original condition as index condition pushdown is not
|
|
active for the new index.
|
|
todo: why not perform index condition pushdown for the new index?
|
|
*/
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We'll use ref access method on key new_ref_key. In general case
|
|
the index search tuple for new_ref_key will be different (e.g.
|
|
when one index is defined as (part1, part2, ...) and another as
|
|
(part1, part2(N), ...) and the WHERE clause contains
|
|
"part1 = const1 AND part2=const2".
|
|
So we build tab->ref from scratch here.
|
|
*/
|
|
KEYUSE *keyuse= tab->keyuse;
|
|
while (keyuse->key != new_ref_key && keyuse->table == tab->table)
|
|
keyuse++;
|
|
if (create_ref_for_key(tab->join, tab, keyuse, FALSE,
|
|
(tab->join->const_table_map |
|
|
OUTER_REF_TABLE_BIT)))
|
|
goto use_filesort;
|
|
|
|
pick_table_access_method(tab);
|
|
}
|
|
|
|
ref_key= new_ref_key;
|
|
changed_key= true;
|
|
}
|
|
}
|
|
/* Check if we get the rows in requested sorted order by using the key */
|
|
if (usable_keys.is_set(ref_key) &&
|
|
(order_direction= test_if_order_by_key(tab->join, order,table,ref_key,
|
|
&used_key_parts)))
|
|
goto check_reverse_order;
|
|
}
|
|
|
|
/*
|
|
Step #4: Go through all indexes that produce required ordering (in
|
|
usable_keys) and check if any of them is cheaper than ref_key
|
|
*/
|
|
{
|
|
uint UNINIT_VAR(best_key_parts);
|
|
uint saved_best_key_parts= 0;
|
|
int best_key_direction= 0;
|
|
JOIN *join= tab->join;
|
|
ha_rows table_records= table->stat_records();
|
|
double new_read_time_dummy;
|
|
|
|
test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys,
|
|
ref_key, select_limit,
|
|
&best_key, &best_key_direction,
|
|
&select_limit, &new_read_time_dummy,
|
|
&best_key_parts,
|
|
&saved_best_key_parts);
|
|
|
|
/*
|
|
filesort() and join cache are usually faster than reading in
|
|
index order and not using join cache, except in case that chosen
|
|
index is clustered key.
|
|
*/
|
|
if (best_key < 0 ||
|
|
((select_limit >= table_records) &&
|
|
((tab->type == JT_ALL || tab->type == JT_RANGE) &&
|
|
tab->join->table_count > tab->join->const_tables + 1) &&
|
|
!table->is_clustering_key(best_key)))
|
|
goto use_filesort;
|
|
|
|
if (select && table->opt_range_keys.is_set(best_key) && best_key != ref_key)
|
|
{
|
|
key_map tmp_map;
|
|
tmp_map.clear_all(); // Force the creation of quick select
|
|
tmp_map.set_bit(best_key); // only best_key.
|
|
select->quick= 0;
|
|
|
|
bool cond_saved= false;
|
|
Item *saved_cond;
|
|
|
|
/*
|
|
Index Condition Pushdown may have removed parts of the condition for
|
|
this table. Temporarily put them back because we want the whole
|
|
condition for the range analysis.
|
|
*/
|
|
if (select->pre_idx_push_select_cond)
|
|
{
|
|
saved_cond= select->cond;
|
|
select->cond= select->pre_idx_push_select_cond;
|
|
cond_saved= true;
|
|
}
|
|
|
|
quick_select_return res;
|
|
res = select->test_quick_select(join->thd, tmp_map, 0,
|
|
join->select_options & OPTION_FOUND_ROWS ?
|
|
HA_POS_ERROR :
|
|
join->unit->lim.get_select_limit(),
|
|
TRUE, FALSE, FALSE, FALSE,
|
|
Item_func::BITMAP_ALL);
|
|
if (res == SQL_SELECT::ERROR)
|
|
{
|
|
*fatal_error= true;
|
|
goto use_filesort;
|
|
}
|
|
|
|
if (cond_saved)
|
|
select->cond= saved_cond;
|
|
}
|
|
order_direction= best_key_direction;
|
|
/*
|
|
saved_best_key_parts is actual number of used keyparts found by
|
|
the test_if_order_by_key function. It could differ from
|
|
keyinfo->user_defined_key_parts, thus we have to restore it in
|
|
case of desc order as it affects QUICK_SELECT_DESC behaviour.
|
|
*/
|
|
used_key_parts= (order_direction == -1) ?
|
|
saved_best_key_parts : best_key_parts;
|
|
changed_key= true;
|
|
}
|
|
|
|
check_reverse_order:
|
|
DBUG_ASSERT(order_direction != 0);
|
|
|
|
if (order_direction == -1) // If ORDER BY ... DESC
|
|
{
|
|
int quick_type;
|
|
if (select && select->quick)
|
|
{
|
|
/*
|
|
Don't reverse the sort order, if it's already done.
|
|
(In some cases test_if_order_by_key() can be called multiple times
|
|
*/
|
|
if (select->quick->reverse_sorted())
|
|
goto skipped_filesort;
|
|
|
|
quick_type= select->quick->get_type();
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
|
|
{
|
|
tab->limit= 0;
|
|
goto use_filesort; // Use filesort
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Update query plan with access pattern for doing ordered access
|
|
according to what we have decided above.
|
|
*/
|
|
if (!no_changes) // We are allowed to update QEP
|
|
{
|
|
if (best_key >= 0)
|
|
{
|
|
bool quick_created=
|
|
(select && select->quick && select->quick!=save_quick);
|
|
|
|
if (!quick_created)
|
|
{
|
|
if (select) // Throw any existing quick select
|
|
select->quick= 0; // Cleanup either reset to save_quick,
|
|
// or 'delete save_quick'
|
|
tab->index= best_key;
|
|
tab->read_first_record= (order_direction > 0 ?
|
|
join_read_first:
|
|
join_read_last);
|
|
tab->type=JT_NEXT; // Read with index_first(), index_next()
|
|
|
|
/*
|
|
Currently usage of rowid filters is not supported in InnoDB
|
|
if the table is accessed by the primary key
|
|
*/
|
|
if (tab->rowid_filter &&
|
|
(table->file->is_clustering_key(tab->index) ||
|
|
table->covering_keys.is_set(best_key)))
|
|
tab->clear_range_rowid_filter();
|
|
|
|
if (tab->pre_idx_push_select_cond)
|
|
{
|
|
tab->set_cond(tab->pre_idx_push_select_cond);
|
|
/*
|
|
orig_cond is a part of pre_idx_push_cond,
|
|
no need to restore it.
|
|
*/
|
|
orig_cond= 0;
|
|
orig_cond_saved= false;
|
|
}
|
|
|
|
table->file->ha_index_or_rnd_end();
|
|
if (tab->join->select_options & SELECT_DESCRIBE)
|
|
{
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts= 0;
|
|
if (select_limit < table->stat_records())
|
|
tab->limit= select_limit;
|
|
}
|
|
}
|
|
else if (tab->type != JT_ALL || tab->select->quick)
|
|
{
|
|
/*
|
|
We're about to use a quick access to the table.
|
|
We need to change the access method so as the quick access
|
|
method is actually used.
|
|
*/
|
|
DBUG_ASSERT(tab->select->quick);
|
|
tab->type= JT_RANGE;
|
|
tab->use_quick=1;
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts=0; // Don't use ref key.
|
|
if (tab->rowid_filter)
|
|
tab->clear_range_rowid_filter();
|
|
tab->read_first_record= join_init_read_record;
|
|
if (tab->is_using_loose_index_scan())
|
|
tab->join->tmp_table_param.precomputed_group_by= TRUE;
|
|
|
|
/*
|
|
Restore the original condition as changes done by pushdown
|
|
condition are not relevant anymore
|
|
*/
|
|
if (tab->select && tab->select->pre_idx_push_select_cond)
|
|
{
|
|
tab->set_cond(tab->select->pre_idx_push_select_cond);
|
|
tab->table->file->cancel_pushed_idx_cond();
|
|
}
|
|
/*
|
|
TODO: update the number of records in join->best_positions[tablenr]
|
|
*/
|
|
}
|
|
} // best_key >= 0
|
|
|
|
if (order_direction == -1) // If ORDER BY ... DESC
|
|
{
|
|
if (select && select->quick)
|
|
{
|
|
/* ORDER BY range_key DESC */
|
|
QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts);
|
|
if (!tmp)
|
|
{
|
|
tab->limit= 0;
|
|
goto use_filesort; // Reverse sort failed -> filesort
|
|
}
|
|
prepare_for_reverse_ordered_access(tab);
|
|
|
|
if (select->quick == save_quick)
|
|
save_quick= 0; // make_reverse() consumed it
|
|
select->set_quick(tmp);
|
|
}
|
|
else if (tab->type != JT_NEXT && tab->type != JT_REF_OR_NULL &&
|
|
tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts)
|
|
{
|
|
/*
|
|
SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
|
|
|
|
Use a traversal function that starts by reading the last row
|
|
with key part (A) and then traverse the index backwards.
|
|
*/
|
|
tab->read_first_record= join_read_last_key;
|
|
tab->read_record.read_record_func= join_read_prev_same;
|
|
prepare_for_reverse_ordered_access(tab);
|
|
}
|
|
}
|
|
else if (select && select->quick)
|
|
{
|
|
/* Cancel "Range checked for each record" */
|
|
if (tab->use_quick == 2)
|
|
{
|
|
tab->use_quick= 1;
|
|
tab->read_first_record= join_init_read_record;
|
|
}
|
|
select->quick->need_sorted_output();
|
|
}
|
|
|
|
if (tab->type == JT_EQ_REF)
|
|
tab->read_record.unlock_row= join_read_key_unlock_row;
|
|
else if (tab->type == JT_CONST)
|
|
tab->read_record.unlock_row= join_const_unlock_row;
|
|
else
|
|
tab->read_record.unlock_row= rr_unlock_row;
|
|
|
|
} // QEP has been modified
|
|
|
|
/*
|
|
Cleanup:
|
|
We may have both a 'select->quick' and 'save_quick' (original)
|
|
at this point. Delete the one that we wan't use.
|
|
*/
|
|
|
|
skipped_filesort:
|
|
// Keep current (ordered) select->quick
|
|
if (select && save_quick != select->quick)
|
|
{
|
|
delete save_quick;
|
|
save_quick= NULL;
|
|
}
|
|
if (orig_cond_saved && !changed_key)
|
|
tab->set_cond(orig_cond);
|
|
if (!no_changes && changed_key && table->file->pushed_idx_cond)
|
|
table->file->cancel_pushed_idx_cond();
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
use_filesort:
|
|
// Restore original save_quick
|
|
if (select && select->quick != save_quick)
|
|
{
|
|
delete select->quick;
|
|
select->quick= save_quick;
|
|
}
|
|
if (orig_cond_saved)
|
|
tab->set_cond(orig_cond);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
If not selecting by given key, create an index how records should be read
|
|
|
|
SYNOPSIS
|
|
create_sort_index()
|
|
thd Thread handler
|
|
join Join with table to sort
|
|
join_tab What table to sort
|
|
fsort Filesort object. NULL means "use tab->filesort".
|
|
|
|
IMPLEMENTATION
|
|
- If there is an index that can be used, the first non-const join_tab in
|
|
'join' is modified to use this index.
|
|
- If no index, create with filesort() an index file that can be used to
|
|
retrieve rows in order (should be done with 'read_record').
|
|
The sorted data is stored in tab->filesort
|
|
|
|
RETURN VALUES
|
|
0 ok
|
|
-1 Some fatal error
|
|
1 No records
|
|
*/
|
|
|
|
int
|
|
create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort)
|
|
{
|
|
TABLE *table;
|
|
SQL_SELECT *select;
|
|
bool quick_created= FALSE;
|
|
SORT_INFO *file_sort= 0;
|
|
DBUG_ENTER("create_sort_index");
|
|
|
|
if (fsort == NULL)
|
|
fsort= tab->filesort;
|
|
|
|
table= tab->table;
|
|
select= fsort->select;
|
|
|
|
table->status=0; // May be wrong if quick_select
|
|
|
|
if (!tab->preread_init_done && tab->preread_init())
|
|
goto err;
|
|
|
|
// If table has a range, move it to select
|
|
if (select && tab->ref.key >= 0)
|
|
{
|
|
if (!select->quick)
|
|
{
|
|
if (tab->quick)
|
|
{
|
|
select->quick= tab->quick;
|
|
tab->quick= NULL;
|
|
/*
|
|
We can only use 'Only index' if quick key is same as ref_key
|
|
and in index_merge 'Only index' cannot be used
|
|
*/
|
|
if (((uint) tab->ref.key != select->quick->index))
|
|
table->file->ha_end_keyread();
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We have a ref on a const; Change this to a range that filesort
|
|
can use.
|
|
For impossible ranges (like when doing a lookup on NULL on a NOT NULL
|
|
field, quick will contain an empty record set.
|
|
*/
|
|
if (!(select->quick= (tab->type == JT_FT ?
|
|
get_ft_select(thd, table, tab->ref.key) :
|
|
get_quick_select_for_ref(thd, table, &tab->ref,
|
|
tab->found_records))))
|
|
goto err;
|
|
quick_created= TRUE;
|
|
}
|
|
fsort->own_select= true;
|
|
}
|
|
else
|
|
{
|
|
fsort->own_select= false;
|
|
DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF);
|
|
// Update ref value
|
|
if (unlikely(cp_buffer_from_ref(thd, table, &tab->ref) &&
|
|
thd->is_error()))
|
|
goto err; // out of memory
|
|
}
|
|
}
|
|
|
|
|
|
/* Fill schema tables with data before filesort if it's necessary */
|
|
if ((join->select_lex->options & OPTION_SCHEMA_TABLE) &&
|
|
unlikely(get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX)))
|
|
goto err;
|
|
|
|
if (table->s->tmp_table)
|
|
table->file->info(HA_STATUS_VARIABLE); // Get record count
|
|
fsort->accepted_rows= &join->accepted_rows; // For ROWNUM
|
|
file_sort= filesort(thd, table, fsort, fsort->tracker, join, tab->table->map);
|
|
DBUG_ASSERT(tab->filesort_result == 0);
|
|
tab->filesort_result= file_sort;
|
|
tab->records= 0;
|
|
if (file_sort)
|
|
{
|
|
tab->records= join->select_options & OPTION_FOUND_ROWS ?
|
|
file_sort->found_rows : file_sort->return_rows;
|
|
}
|
|
|
|
if (quick_created)
|
|
{
|
|
/* This will delete the quick select. */
|
|
select->cleanup();
|
|
}
|
|
|
|
table->file->ha_end_keyread();
|
|
if (tab->type == JT_FT)
|
|
table->file->ha_ft_end();
|
|
else
|
|
table->file->ha_index_or_rnd_end();
|
|
|
|
DBUG_RETURN(file_sort == 0);
|
|
err:
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
|
|
/**
|
|
Compare fields from table->record[0] and table->record[1],
|
|
possibly skipping few first fields.
|
|
|
|
@param table
|
|
@param ptr field to start the comparison from,
|
|
somewhere in the table->field[] array
|
|
|
|
@retval 1 different
|
|
@retval 0 identical
|
|
*/
|
|
static bool compare_record(TABLE *table, Field **ptr)
|
|
{
|
|
for (; *ptr ; ptr++)
|
|
{
|
|
Field *f= *ptr;
|
|
if (f->is_null() != f->is_null(table->s->rec_buff_length) ||
|
|
(!f->is_null() && f->cmp_offset(table->s->rec_buff_length)))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static bool copy_blobs(Field **ptr)
|
|
{
|
|
for (; *ptr ; ptr++)
|
|
{
|
|
if ((*ptr)->flags & BLOB_FLAG)
|
|
if (((Field_blob *) (*ptr))->copy())
|
|
return 1; // Error
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void free_blobs(Field **ptr)
|
|
{
|
|
for (; *ptr ; ptr++)
|
|
{
|
|
if ((*ptr)->flags & BLOB_FLAG)
|
|
((Field_blob *) (*ptr))->free();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Remove duplicates from a temporary table.
|
|
|
|
@detail
|
|
Remove duplicate rows from a temporary table. This is used for e.g. queries
|
|
like
|
|
|
|
select distinct count(*) as CNT from tbl group by col
|
|
|
|
Here, we get a group table with count(*) values. It is not possible to
|
|
prevent duplicates from appearing in the table (as we don't know the values
|
|
before we've done the grouping). Because of that, we have this function to
|
|
scan the temptable (maybe, multiple times) and remove the duplicate rows
|
|
|
|
Rows that do not satisfy 'having' condition are also removed.
|
|
*/
|
|
|
|
bool
|
|
JOIN_TAB::remove_duplicates()
|
|
|
|
{
|
|
bool error;
|
|
ulong keylength= 0, sort_field_keylength= 0;
|
|
uint field_count, item_count;
|
|
List<Item> *fields= (this-1)->fields;
|
|
Item *item;
|
|
THD *thd= join->thd;
|
|
SORT_FIELD *sortorder, *sorder;
|
|
DBUG_ENTER("remove_duplicates");
|
|
|
|
DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE);
|
|
THD_STAGE_INFO(join->thd, stage_removing_duplicates);
|
|
|
|
if (!(sortorder= (SORT_FIELD*) my_malloc(PSI_INSTRUMENT_ME,
|
|
(fields->elements+1) *
|
|
sizeof(SORT_FIELD),
|
|
MYF(MY_WME | MY_ZEROFILL))))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
/* Calculate how many saved fields there is in list */
|
|
field_count= item_count= 0;
|
|
|
|
List_iterator<Item> it(*fields);
|
|
for (sorder= sortorder ; (item=it++) ;)
|
|
{
|
|
if (!item->const_item())
|
|
{
|
|
if (item->get_tmp_table_field())
|
|
{
|
|
/* Field is stored in temporary table, skipp */
|
|
field_count++;
|
|
}
|
|
else
|
|
{
|
|
/* Item is not stored in temporary table, remember it */
|
|
sorder->item= item;
|
|
sorder->type= sorder->item->type_handler()->is_packable() ?
|
|
SORT_FIELD_ATTR::VARIABLE_SIZE :
|
|
SORT_FIELD_ATTR::FIXED_SIZE;
|
|
/* Calculate sorder->length */
|
|
item->type_handler()->sort_length(thd, item, sorder);
|
|
sorder++;
|
|
item_count++;
|
|
}
|
|
}
|
|
}
|
|
sorder->item= 0; // End marker
|
|
|
|
if ((field_count + item_count == 0) && ! having &&
|
|
!(join->select_options & OPTION_FOUND_ROWS))
|
|
{
|
|
// only const items with no OPTION_FOUND_ROWS
|
|
join->unit->lim.send_first_row(); // Only send first row
|
|
my_free(sortorder);
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
/*
|
|
The table contains first fields that will be in the output, then
|
|
temporary results pointed to by the fields list.
|
|
Example: SELECT DISTINCT sum(a), sum(d) > 2 FROM ...
|
|
In this case the temporary table contains sum(a), sum(d).
|
|
*/
|
|
|
|
Field **first_field=table->field+table->s->fields - field_count;
|
|
for (Field **ptr=first_field; *ptr; ptr++)
|
|
keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null();
|
|
for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++)
|
|
sort_field_keylength+= ptr->length + (ptr->item->maybe_null() ? 1 : 0);
|
|
|
|
/*
|
|
Disable LIMIT ROWS EXAMINED in order to avoid interrupting prematurely
|
|
duplicate removal, and produce a possibly incomplete query result.
|
|
*/
|
|
thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
|
|
if (thd->killed == ABORT_QUERY)
|
|
thd->reset_killed();
|
|
|
|
table->file->info(HA_STATUS_VARIABLE);
|
|
table->reginfo.lock_type=TL_WRITE;
|
|
|
|
if (table->s->db_type() == heap_hton ||
|
|
(!table->s->blob_fields &&
|
|
((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records <
|
|
thd->variables.sortbuff_size)))
|
|
error= remove_dup_with_hash_index(join->thd, table, field_count,
|
|
first_field, sortorder,
|
|
keylength + sort_field_keylength, having);
|
|
else
|
|
error=remove_dup_with_compare(join->thd, table, first_field, sortorder,
|
|
sort_field_keylength, having);
|
|
|
|
if (join->select_lex != join->select_lex->master_unit()->fake_select_lex)
|
|
thd->lex->set_limit_rows_examined();
|
|
free_blobs(first_field);
|
|
my_free(sortorder);
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
|
|
/*
|
|
Create a sort/compare key from items
|
|
|
|
Key is of fixed length and binary comparable
|
|
*/
|
|
|
|
static uchar *make_sort_key(SORT_FIELD *sortorder, uchar *key_buffer,
|
|
String *tmp_value)
|
|
{
|
|
for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++)
|
|
{
|
|
ptr->item->type_handler()->make_sort_key_part(key_buffer,
|
|
ptr->item,
|
|
ptr, tmp_value);
|
|
key_buffer+= (ptr->item->maybe_null() ? 1 : 0) + ptr->length;
|
|
}
|
|
return key_buffer;
|
|
}
|
|
|
|
|
|
/*
|
|
Remove duplicates by comparing all rows with all other rows
|
|
|
|
@param thd THD
|
|
@param table Temporary table
|
|
@param first_field Pointer to fields in temporary table that are part of
|
|
distinct, ends with null pointer
|
|
@param sortorder An array of Items part of distsinct. Terminated with an
|
|
element N with sortorder[N]->item=NULL.
|
|
@param keylength Length of key produced by sortorder
|
|
@param having Having expression (NULL if no having)
|
|
*/
|
|
|
|
static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
|
|
SORT_FIELD *sortorder, ulong keylength,
|
|
Item *having)
|
|
{
|
|
handler *file=table->file;
|
|
uchar *record=table->record[0], *key_buffer, *key_buffer2;
|
|
char *tmp_buffer;
|
|
int error;
|
|
String tmp_value;
|
|
DBUG_ENTER("remove_dup_with_compare");
|
|
|
|
if (unlikely(!my_multi_malloc(PSI_INSTRUMENT_ME,
|
|
MYF(MY_WME),
|
|
&key_buffer, keylength,
|
|
&key_buffer2, keylength,
|
|
&tmp_buffer, keylength+1,
|
|
NullS)))
|
|
DBUG_RETURN(1);
|
|
tmp_value.set(tmp_buffer, keylength, &my_charset_bin);
|
|
|
|
if (unlikely(file->ha_rnd_init_with_error(1)))
|
|
DBUG_RETURN(1);
|
|
|
|
error= file->ha_rnd_next(record);
|
|
for (;;)
|
|
{
|
|
if (unlikely(thd->check_killed()))
|
|
{
|
|
error= 1;
|
|
goto end;
|
|
}
|
|
if (unlikely(error))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
if (having && !having->val_int())
|
|
{
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
error= file->ha_rnd_next(record);
|
|
continue;
|
|
}
|
|
if (unlikely(copy_blobs(first_field)))
|
|
{
|
|
my_message(ER_OUTOFMEMORY, ER_THD(thd,ER_OUTOFMEMORY),
|
|
MYF(ME_FATAL));
|
|
error= 1;
|
|
goto end;
|
|
}
|
|
make_sort_key(sortorder, key_buffer, &tmp_value);
|
|
store_record(table,record[1]);
|
|
|
|
/* Read through rest of file and mark duplicated rows deleted */
|
|
bool found=0;
|
|
for (;;)
|
|
{
|
|
if (unlikely((error= file->ha_rnd_next(record))))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
make_sort_key(sortorder, key_buffer2, &tmp_value);
|
|
if (compare_record(table, first_field) == 0 &&
|
|
(!keylength ||
|
|
memcmp(key_buffer, key_buffer2, keylength) == 0))
|
|
{
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
}
|
|
else if (!found)
|
|
{
|
|
found=1;
|
|
if (unlikely((error= file->remember_rnd_pos())))
|
|
goto err;
|
|
}
|
|
}
|
|
if (!found)
|
|
break; // End of file
|
|
/* Restart search on saved row */
|
|
if (unlikely((error= file->restart_rnd_next(record))))
|
|
goto err;
|
|
}
|
|
|
|
error= 0;
|
|
end:
|
|
my_free(key_buffer);
|
|
file->extra(HA_EXTRA_NO_CACHE);
|
|
(void) file->ha_rnd_end();
|
|
DBUG_RETURN(error);
|
|
|
|
err:
|
|
DBUG_ASSERT(error);
|
|
file->print_error(error,MYF(0));
|
|
goto end;
|
|
}
|
|
|
|
|
|
/**
|
|
Generate a hash index for each row to quickly find duplicate rows.
|
|
|
|
@param thd THD
|
|
@param table Temporary table
|
|
@param field_count Number of fields part of distinct
|
|
@param first_field Pointer to fields in temporary table that are part of
|
|
distinct, ends with null pointer
|
|
@param sortorder An array of Items part of distsinct. Terminated with an
|
|
element N with sortorder[N]->item=NULL.
|
|
@param keylength Length of hash key
|
|
@param having Having expression (NULL if no having)
|
|
|
|
@note
|
|
Note that this will not work on tables with blobs!
|
|
*/
|
|
|
|
static int remove_dup_with_hash_index(THD *thd, TABLE *table,
|
|
uint field_count,
|
|
Field **first_field,
|
|
SORT_FIELD *sortorder,
|
|
ulong key_length,
|
|
Item *having)
|
|
{
|
|
uchar *key_buffer, *key_pos, *record=table->record[0];
|
|
char *tmp_buffer;
|
|
int error;
|
|
handler *file= table->file;
|
|
ulong extra_length= ALIGN_SIZE(key_length)-key_length;
|
|
uint *field_lengths, *field_length;
|
|
HASH hash;
|
|
String tmp_value;
|
|
DBUG_ENTER("remove_dup_with_hash_index");
|
|
|
|
if (!my_multi_malloc(key_memory_hash_index_key_buffer, MYF(MY_WME),
|
|
&key_buffer,
|
|
(uint) ((key_length + extra_length) *
|
|
(long) file->stats.records),
|
|
&field_lengths,
|
|
(uint) (field_count*sizeof(*field_lengths)),
|
|
&tmp_buffer, key_length+1,
|
|
NullS))
|
|
DBUG_RETURN(1);
|
|
|
|
tmp_value.set(tmp_buffer, key_length, &my_charset_bin);
|
|
field_length= field_lengths;
|
|
for (Field **ptr= first_field ; *ptr ; ptr++)
|
|
(*field_length++)= (*ptr)->sort_length();
|
|
|
|
if (my_hash_init(key_memory_hash_index_key_buffer, &hash, &my_charset_bin,
|
|
(uint) file->stats.records, 0, key_length,
|
|
(my_hash_get_key) 0, 0, 0))
|
|
{
|
|
my_free(key_buffer);
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
if (unlikely((error= file->ha_rnd_init(1))))
|
|
goto err;
|
|
|
|
key_pos= key_buffer;
|
|
for (;;)
|
|
{
|
|
uchar *org_key_pos;
|
|
if (unlikely(thd->check_killed()))
|
|
{
|
|
error=0;
|
|
goto err;
|
|
}
|
|
if (unlikely((error= file->ha_rnd_next(record))))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
if (having && !having->val_int())
|
|
{
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
continue;
|
|
}
|
|
|
|
/* copy fields to key buffer */
|
|
org_key_pos= key_pos;
|
|
field_length=field_lengths;
|
|
for (Field **ptr= first_field ; *ptr ; ptr++)
|
|
{
|
|
(*ptr)->make_sort_key_part(key_pos, *field_length);
|
|
key_pos+= (*ptr)->maybe_null() + *field_length++;
|
|
}
|
|
/* Copy result fields not stored in table to key buffer */
|
|
key_pos= make_sort_key(sortorder, key_pos, &tmp_value);
|
|
|
|
/* Check if it exists before */
|
|
if (my_hash_search(&hash, org_key_pos, key_length))
|
|
{
|
|
/* Duplicated found ; Remove the row */
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
}
|
|
else
|
|
{
|
|
if (my_hash_insert(&hash, org_key_pos))
|
|
goto err;
|
|
}
|
|
key_pos+=extra_length;
|
|
}
|
|
my_free(key_buffer);
|
|
my_hash_free(&hash);
|
|
file->extra(HA_EXTRA_NO_CACHE);
|
|
(void) file->ha_rnd_end();
|
|
DBUG_RETURN(0);
|
|
|
|
err:
|
|
my_free(key_buffer);
|
|
my_hash_free(&hash);
|
|
file->extra(HA_EXTRA_NO_CACHE);
|
|
(void) file->ha_rnd_end();
|
|
if (unlikely(error))
|
|
file->print_error(error,MYF(0));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
/*
|
|
eq_ref: Create the lookup key and check if it is the same as saved key
|
|
|
|
SYNOPSIS
|
|
cmp_buffer_with_ref()
|
|
tab Join tab of the accessed table
|
|
table The table to read. This is usually tab->table, except for
|
|
semi-join when we might need to make a lookup in a temptable
|
|
instead.
|
|
tab_ref The structure with methods to collect index lookup tuple.
|
|
This is usually table->ref, except for the case of when we're
|
|
doing lookup into semi-join materialization table.
|
|
|
|
DESCRIPTION
|
|
Used by eq_ref access method: create the index lookup key and check if
|
|
we've used this key at previous lookup (If yes, we don't need to repeat
|
|
the lookup - the record has been already fetched)
|
|
|
|
RETURN
|
|
TRUE No cached record for the key, or failed to create the key (due to
|
|
out-of-domain error)
|
|
FALSE The created key is the same as the previous one (and the record
|
|
is already in table->record)
|
|
*/
|
|
|
|
static bool
|
|
cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
|
|
{
|
|
bool no_prev_key;
|
|
if (!tab_ref->disable_cache)
|
|
{
|
|
if (!(no_prev_key= tab_ref->key_err))
|
|
{
|
|
/* Previous access found a row. Copy its key */
|
|
memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
|
|
}
|
|
}
|
|
else
|
|
no_prev_key= TRUE;
|
|
if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
|
|
no_prev_key)
|
|
return 1;
|
|
return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
|
|
!= 0;
|
|
}
|
|
|
|
|
|
bool
|
|
cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref)
|
|
{
|
|
enum_check_fields org_count_cuted_fields= thd->count_cuted_fields;
|
|
MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set);
|
|
bool result= 0;
|
|
key_part_map map= 1;
|
|
|
|
thd->count_cuted_fields= CHECK_FIELD_IGNORE;
|
|
for (store_key **copy=ref->key_copy ; *copy ; copy++, map <<= 1)
|
|
{
|
|
while (map & ref->const_ref_part_map) // skip const ref parts
|
|
map <<= 1; // no store_key objects for them
|
|
if ((*copy)->copy(thd) & 1 ||
|
|
((ref->null_rejecting & map) && (*copy)->null_key))
|
|
{
|
|
result= 1;
|
|
break;
|
|
}
|
|
}
|
|
thd->count_cuted_fields= org_count_cuted_fields;
|
|
dbug_tmp_restore_column_map(&table->write_set, old_map);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Group and order functions
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
Resolve an ORDER BY or GROUP BY column reference.
|
|
|
|
Given a column reference (represented by 'order') from a GROUP BY or ORDER
|
|
BY clause, find the actual column it represents. If the column being
|
|
resolved is from the GROUP BY clause, the procedure searches the SELECT
|
|
list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
|
|
the ORDER BY clause, only the SELECT list is being searched.
|
|
|
|
If 'order' is resolved to an Item, then order->item is set to the found
|
|
Item. If there is no item for the found column (that is, it was resolved
|
|
into a table field), order->item is 'fixed' and is added to all_fields and
|
|
ref_pointer_array.
|
|
|
|
ref_pointer_array and all_fields are updated.
|
|
|
|
@param[in] thd Pointer to current thread structure
|
|
@param[in,out] ref_pointer_array All select, group and order by fields
|
|
@param[in] tables List of tables to search in (usually
|
|
FROM clause)
|
|
@param[in] order Column reference to be resolved
|
|
@param[in] fields List of fields to search in (usually
|
|
SELECT list)
|
|
@param[in,out] all_fields All select, group and order by fields
|
|
@param[in] is_group_field True if order is a GROUP field, false if
|
|
ORDER by field
|
|
@param[in] add_to_all_fields If the item is to be added to all_fields and
|
|
ref_pointer_array, this flag can be set to
|
|
false to stop the automatic insertion.
|
|
@param[in] from_window_spec If true then order is from a window spec
|
|
|
|
@retval
|
|
FALSE if OK
|
|
@retval
|
|
TRUE if error occurred
|
|
*/
|
|
|
|
static bool
|
|
find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
TABLE_LIST *tables,
|
|
ORDER *order, List<Item> &fields, List<Item> &all_fields,
|
|
bool is_group_field, bool add_to_all_fields,
|
|
bool from_window_spec)
|
|
{
|
|
Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */
|
|
Item::Type order_item_type;
|
|
Item **select_item; /* The corresponding item from the SELECT clause. */
|
|
Field *from_field; /* The corresponding field from the FROM clause. */
|
|
uint counter;
|
|
enum_resolution_type resolution;
|
|
|
|
if (order_item->is_order_clause_position() && !from_window_spec)
|
|
{ /* Order by position */
|
|
uint count;
|
|
if (order->counter_used)
|
|
count= order->counter; // counter was once resolved
|
|
else
|
|
count= (uint) order_item->val_int();
|
|
if (!count || count > fields.elements)
|
|
{
|
|
my_error(ER_BAD_FIELD_ERROR, MYF(0),
|
|
order_item->full_name(), thd_where(thd));
|
|
return TRUE;
|
|
}
|
|
thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]);
|
|
order->in_field_list= 1;
|
|
order->counter= count;
|
|
order->counter_used= 1;
|
|
return FALSE;
|
|
}
|
|
/* Lookup the current GROUP/ORDER field in the SELECT clause. */
|
|
select_item= find_item_in_list(order_item, fields, &counter,
|
|
REPORT_EXCEPT_NOT_FOUND, &resolution);
|
|
if (!select_item)
|
|
return TRUE; /* The item is not unique, or some other error occurred. */
|
|
|
|
|
|
/* Check whether the resolved field is not ambiguos. */
|
|
if (select_item != not_found_item)
|
|
{
|
|
Item *view_ref= NULL;
|
|
/*
|
|
If we have found field not by its alias in select list but by its
|
|
original field name, we should additionally check if we have conflict
|
|
for this name (in case if we would perform lookup in all tables).
|
|
*/
|
|
if (resolution == RESOLVED_BEHIND_ALIAS &&
|
|
order_item->fix_fields_if_needed_for_order_by(thd, order->item))
|
|
return TRUE;
|
|
|
|
/* Lookup the current GROUP field in the FROM clause. */
|
|
order_item_type= order_item->type();
|
|
from_field= (Field*) not_found_field;
|
|
if ((is_group_field && order_item_type == Item::FIELD_ITEM) ||
|
|
order_item_type == Item::REF_ITEM)
|
|
{
|
|
from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
|
|
NULL, ignored_tables_list_t(NULL),
|
|
&view_ref, IGNORE_ERRORS, FALSE, FALSE);
|
|
if (!from_field)
|
|
from_field= (Field*) not_found_field;
|
|
}
|
|
|
|
if (from_field == not_found_field ||
|
|
(from_field != view_ref_found ?
|
|
/* it is field of base table => check that fields are same */
|
|
((*select_item)->type() == Item::FIELD_ITEM &&
|
|
((Item_field*) (*select_item))->field->eq(from_field)) :
|
|
/*
|
|
in is field of view table => check that references on translation
|
|
table are same
|
|
*/
|
|
((*select_item)->type() == Item::REF_ITEM &&
|
|
view_ref->type() == Item::REF_ITEM &&
|
|
((Item_ref *) (*select_item))->ref ==
|
|
((Item_ref *) view_ref)->ref)))
|
|
{
|
|
/*
|
|
If there is no such field in the FROM clause, or it is the same field
|
|
as the one found in the SELECT clause, then use the Item created for
|
|
the SELECT field. As a result if there was a derived field that
|
|
'shadowed' a table field with the same name, the table field will be
|
|
chosen over the derived field.
|
|
*/
|
|
order->item= &ref_pointer_array[counter];
|
|
order->in_field_list=1;
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
There is a field with the same name in the FROM clause. This
|
|
is the field that will be chosen. In this case we issue a
|
|
warning so the user knows that the field from the FROM clause
|
|
overshadows the column reference from the SELECT list.
|
|
*/
|
|
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
|
|
ER_NON_UNIQ_ERROR,
|
|
ER_THD(thd, ER_NON_UNIQ_ERROR),
|
|
((Item_ident*) order_item)->field_name.str,
|
|
thd_where(thd));
|
|
}
|
|
}
|
|
else if (from_window_spec)
|
|
{
|
|
Item **found_item= find_item_in_list(order_item, all_fields, &counter,
|
|
REPORT_EXCEPT_NOT_FOUND, &resolution,
|
|
all_fields.elements - fields.elements);
|
|
if (found_item != not_found_item)
|
|
{
|
|
order->item= &ref_pointer_array[all_fields.elements-1-counter];
|
|
order->in_field_list= 0;
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
order->in_field_list=0;
|
|
/*
|
|
The call to order_item->fix_fields() means that here we resolve
|
|
'order_item' to a column from a table in the list 'tables', or to
|
|
a column in some outer query. Exactly because of the second case
|
|
we come to this point even if (select_item == not_found_item),
|
|
inspite of that fix_fields() calls find_item_in_list() one more
|
|
time.
|
|
|
|
We check order_item->fixed() because Item_func_group_concat can put
|
|
arguments for which fix_fields already was called.
|
|
*/
|
|
if (order_item->fix_fields_if_needed_for_order_by(thd, order->item) ||
|
|
thd->is_error())
|
|
return TRUE; /* Wrong field. */
|
|
order_item= *order->item; // Item can change during fix_fields()
|
|
|
|
if (!add_to_all_fields)
|
|
return FALSE;
|
|
|
|
uint el= all_fields.elements;
|
|
/* Add new field to field list. */
|
|
all_fields.push_front(order_item, thd->mem_root);
|
|
ref_pointer_array[el]= order_item;
|
|
/*
|
|
If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
|
|
ref_by is set to order->item which is the address of order_item.
|
|
But this needs to be address of order_item in the all_fields list.
|
|
As a result, when it gets replaced with Item_aggregate_ref
|
|
object in Item::split_sum_func2, we will be able to retrieve the
|
|
newly created object.
|
|
*/
|
|
if (order_item->type() == Item::SUM_FUNC_ITEM)
|
|
((Item_sum *)order_item)->ref_by= all_fields.head_ref();
|
|
|
|
order->item= &ref_pointer_array[el];
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Change order to point at item in select list.
|
|
|
|
If item isn't a number and doesn't exits in the select list, add it the
|
|
the field list.
|
|
*/
|
|
|
|
int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
|
|
List<Item> &fields, List<Item> &all_fields, ORDER *order,
|
|
bool from_window_spec)
|
|
{
|
|
SELECT_LEX *select = thd->lex->current_select;
|
|
enum_parsing_place context_analysis_place=
|
|
thd->lex->current_select->context_analysis_place;
|
|
thd->where= THD_WHERE::ORDER_CLAUSE;
|
|
const bool for_union= select->master_unit()->is_unit_op() &&
|
|
select == select->master_unit()->fake_select_lex;
|
|
for (uint number = 1; order; order=order->next, number++)
|
|
{
|
|
if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
|
|
all_fields, false, true, from_window_spec))
|
|
return 1;
|
|
Item * const item= *order->item;
|
|
if (item->with_window_func() && context_analysis_place != IN_ORDER_BY)
|
|
{
|
|
my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
UNION queries cannot be used with an aggregate function in
|
|
an ORDER BY clause
|
|
*/
|
|
|
|
if (for_union && (item->with_sum_func() || item->with_window_func()))
|
|
{
|
|
my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number);
|
|
return 1;
|
|
}
|
|
|
|
if ((from_window_spec && item->with_sum_func() &&
|
|
item->type() != Item::SUM_FUNC_ITEM) || item->with_window_func())
|
|
{
|
|
item->split_sum_func(thd, ref_pointer_array,
|
|
all_fields, SPLIT_SUM_SELECT);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Intitialize the GROUP BY list.
|
|
|
|
@param thd Thread handler
|
|
@param ref_pointer_array We store references to all fields that was
|
|
not in 'fields' here.
|
|
@param fields All fields in the select part. Any item in
|
|
'order' that is part of these list is replaced
|
|
by a pointer to this fields.
|
|
@param all_fields Total list of all unique fields used by the
|
|
select. All items in 'order' that was not part
|
|
of fields will be added first to this list.
|
|
@param order The fields we should do GROUP/PARTITION BY on
|
|
@param hidden_group_fields Pointer to flag that is set to 1 if we added
|
|
any fields to all_fields.
|
|
@param from_window_spec If true then list is from a window spec
|
|
|
|
@todo
|
|
change ER_WRONG_FIELD_WITH_GROUP to more detailed
|
|
ER_NON_GROUPING_FIELD_USED
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 error (probably out of memory)
|
|
*/
|
|
|
|
int
|
|
setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
|
|
List<Item> &fields, List<Item> &all_fields, ORDER *order,
|
|
bool *hidden_group_fields, bool from_window_spec)
|
|
{
|
|
enum_parsing_place context_analysis_place=
|
|
thd->lex->current_select->context_analysis_place;
|
|
*hidden_group_fields=0;
|
|
ORDER *ord;
|
|
|
|
if (!order)
|
|
return 0; /* Everything is ok */
|
|
|
|
uint org_fields=all_fields.elements;
|
|
|
|
thd->where= THD_WHERE::GROUP_STATEMENT;
|
|
for (ord= order; ord; ord= ord->next)
|
|
{
|
|
if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
|
|
all_fields, true, true, from_window_spec))
|
|
return 1;
|
|
(*ord->item)->marker= MARKER_UNDEF_POS; /* Mark found */
|
|
if ((*ord->item)->with_sum_func() && context_analysis_place == IN_GROUP_BY)
|
|
{
|
|
my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name());
|
|
return 1;
|
|
}
|
|
if ((*ord->item)->with_window_func())
|
|
{
|
|
if (context_analysis_place == IN_GROUP_BY)
|
|
my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
|
|
else
|
|
my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
|
|
return 1;
|
|
}
|
|
if (from_window_spec && (*ord->item)->with_sum_func() &&
|
|
(*ord->item)->type() != Item::SUM_FUNC_ITEM)
|
|
(*ord->item)->split_sum_func(thd, ref_pointer_array,
|
|
all_fields, SPLIT_SUM_SELECT);
|
|
}
|
|
if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY &&
|
|
context_analysis_place == IN_GROUP_BY)
|
|
{
|
|
/*
|
|
Don't allow one to use fields that is not used in GROUP BY
|
|
For each select a list of field references that aren't under an
|
|
aggregate function is created. Each field in this list keeps the
|
|
position of the select list expression which it belongs to.
|
|
|
|
First we check an expression from the select list against the GROUP BY
|
|
list. If it's found there then it's ok. It's also ok if this expression
|
|
is a constant or an aggregate function. Otherwise we scan the list
|
|
of non-aggregated fields and if we'll find at least one field reference
|
|
that belongs to this expression and doesn't occur in the GROUP BY list
|
|
we throw an error. If there are no fields in the created list for a
|
|
select list expression this means that all fields in it are used under
|
|
aggregate functions.
|
|
|
|
Note that for items in the select list (fields), Item_field->markers
|
|
contains the position of the field in the select list.
|
|
*/
|
|
Item *item;
|
|
Item_field *field;
|
|
int cur_pos_in_select_list= 0;
|
|
List_iterator<Item> li(fields);
|
|
List_iterator<Item_field> naf_it(thd->lex->current_select->join->non_agg_fields);
|
|
|
|
field= naf_it++;
|
|
while (field && (item=li++))
|
|
{
|
|
if (item->type() != Item::SUM_FUNC_ITEM &&
|
|
item->marker != MARKER_UNDEF_POS &&
|
|
!item->const_item() &&
|
|
!(item->real_item()->type() == Item::FIELD_ITEM &&
|
|
item->used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
while (field)
|
|
{
|
|
/* Skip fields from previous expressions. */
|
|
if (field->marker < cur_pos_in_select_list)
|
|
goto next_field;
|
|
/* Found a field from the next expression. */
|
|
if (field->marker > cur_pos_in_select_list)
|
|
break;
|
|
/*
|
|
Check whether the field occur in the GROUP BY list.
|
|
Throw the error later if the field isn't found.
|
|
*/
|
|
for (ord= order; ord; ord= ord->next)
|
|
if ((*ord->item)->eq((Item*)field, 0))
|
|
goto next_field;
|
|
/*
|
|
TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed
|
|
ER_NON_GROUPING_FIELD_USED
|
|
*/
|
|
my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), field->full_name());
|
|
return 1;
|
|
next_field:
|
|
field= naf_it++;
|
|
}
|
|
}
|
|
cur_pos_in_select_list++;
|
|
}
|
|
}
|
|
if (org_fields != all_fields.elements)
|
|
*hidden_group_fields=1; // group fields is not used
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Add fields with aren't used at start of field list.
|
|
|
|
@return
|
|
FALSE if ok
|
|
*/
|
|
|
|
static bool
|
|
setup_new_fields(THD *thd, List<Item> &fields,
|
|
List<Item> &all_fields, ORDER *new_field)
|
|
{
|
|
Item **item;
|
|
uint counter;
|
|
enum_resolution_type not_used;
|
|
DBUG_ENTER("setup_new_fields");
|
|
|
|
thd->column_usage= MARK_COLUMNS_READ; // Not really needed, but...
|
|
for (; new_field ; new_field= new_field->next)
|
|
{
|
|
if ((item= find_item_in_list(*new_field->item, fields, &counter,
|
|
IGNORE_ERRORS, ¬_used)))
|
|
new_field->item=item; /* Change to shared Item */
|
|
else
|
|
{
|
|
thd->where= THD_WHERE::PROCEDURE_LIST;
|
|
if ((*new_field->item)->fix_fields(thd, new_field->item))
|
|
DBUG_RETURN(1); /* purecov: inspected */
|
|
all_fields.push_front(*new_field->item, thd->mem_root);
|
|
new_field->item=all_fields.head_ref();
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
Create a group by that consist of all non const fields.
|
|
|
|
Try to use the fields in the order given by 'order' to allow one to
|
|
optimize away 'order by'.
|
|
|
|
@retval
|
|
0 OOM error if thd->is_fatal_error is set. Otherwise group was eliminated
|
|
# Pointer to new group
|
|
*/
|
|
|
|
ORDER *
|
|
create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
ORDER *order_list, List<Item> &fields,
|
|
List<Item> &all_fields,
|
|
bool *all_order_by_fields_used)
|
|
{
|
|
List_iterator<Item> li(fields);
|
|
Item *item;
|
|
Ref_ptr_array orig_ref_pointer_array= ref_pointer_array;
|
|
ORDER *order,*group,**prev;
|
|
uint idx= 0;
|
|
|
|
*all_order_by_fields_used= 1;
|
|
while ((item=li++))
|
|
item->marker= MARKER_UNUSED; /* Marker that field is not used */
|
|
|
|
prev= &group; group=0;
|
|
for (order=order_list ; order; order=order->next)
|
|
{
|
|
if (order->in_field_list)
|
|
{
|
|
ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
|
|
if (!ord)
|
|
return 0;
|
|
*prev=ord;
|
|
prev= &ord->next;
|
|
(*ord->item)->marker= MARKER_FOUND_IN_ORDER;
|
|
}
|
|
else
|
|
*all_order_by_fields_used= 0;
|
|
}
|
|
|
|
li.rewind();
|
|
while ((item=li++))
|
|
{
|
|
if (!item->const_item() && !item->with_sum_func() &&
|
|
item->marker == MARKER_UNUSED)
|
|
{
|
|
/*
|
|
Don't put duplicate columns from the SELECT list into the
|
|
GROUP BY list.
|
|
*/
|
|
ORDER *ord_iter;
|
|
for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
|
|
if ((*ord_iter->item)->eq(item, 1))
|
|
goto next_item;
|
|
|
|
ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
|
|
if (!ord)
|
|
return 0;
|
|
|
|
if (item->type() == Item::FIELD_ITEM &&
|
|
item->field_type() == MYSQL_TYPE_BIT)
|
|
{
|
|
/*
|
|
Because HEAP tables can't index BIT fields we need to use an
|
|
additional hidden field for grouping because later it will be
|
|
converted to a LONG field. Original field will remain of the
|
|
BIT type and will be returned [el]client.
|
|
*/
|
|
Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item);
|
|
if (!new_item)
|
|
return 0;
|
|
int el= all_fields.elements;
|
|
orig_ref_pointer_array[el]= new_item;
|
|
all_fields.push_front(new_item, thd->mem_root);
|
|
ord->item=&orig_ref_pointer_array[el];
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We have here only field_list (not all_field_list), so we can use
|
|
simple indexing of ref_pointer_array (order in the array and in the
|
|
list are same)
|
|
*/
|
|
ord->item= &ref_pointer_array[idx];
|
|
}
|
|
ord->direction= ORDER::ORDER_ASC;
|
|
*prev=ord;
|
|
prev= &ord->next;
|
|
}
|
|
next_item:
|
|
idx++;
|
|
}
|
|
*prev=0;
|
|
return group;
|
|
}
|
|
|
|
|
|
/**
|
|
Update join with count of the different type of fields.
|
|
*/
|
|
|
|
void
|
|
count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
|
|
List<Item> &fields, bool reset_with_sum_func)
|
|
{
|
|
List_iterator<Item> li(fields);
|
|
Item *field;
|
|
|
|
param->field_count=param->sum_func_count=param->func_count=
|
|
param->hidden_field_count=0;
|
|
param->quick_group=1;
|
|
while ((field=li++))
|
|
{
|
|
Item::Type real_type= field->real_item()->type();
|
|
if (real_type == Item::FIELD_ITEM)
|
|
param->field_count++;
|
|
else if (real_type == Item::SUM_FUNC_ITEM)
|
|
{
|
|
if (! field->const_item())
|
|
{
|
|
Item_sum *sum_item=(Item_sum*) field->real_item();
|
|
if (!sum_item->depended_from() ||
|
|
sum_item->depended_from() == select_lex)
|
|
{
|
|
if (!sum_item->quick_group)
|
|
param->quick_group=0; // UDF SUM function
|
|
param->sum_func_count++;
|
|
|
|
for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
|
|
{
|
|
if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
|
|
param->field_count++;
|
|
else
|
|
param->func_count++;
|
|
}
|
|
}
|
|
param->func_count++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
param->func_count++;
|
|
if (reset_with_sum_func)
|
|
field->with_flags&= ~item_with_t::SUM_FUNC;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Return 1 if second is a subpart of first argument.
|
|
|
|
SIDE EFFECT:
|
|
For all the first items in the group by list that match, the sort
|
|
direction of the GROUP BY items are set to the same as those given by the
|
|
ORDER BY.
|
|
The direction of the group does not matter if the ORDER BY clause overrides
|
|
it anyway.
|
|
*/
|
|
|
|
static bool
|
|
test_if_subpart(ORDER *group_by, ORDER *order_by)
|
|
{
|
|
while (group_by && order_by)
|
|
{
|
|
if ((*group_by->item)->eq(*order_by->item, 1))
|
|
group_by->direction= order_by->direction;
|
|
else
|
|
return 0;
|
|
group_by= group_by->next;
|
|
order_by= order_by->next;
|
|
}
|
|
return MY_TEST(!order_by);
|
|
}
|
|
|
|
/**
|
|
Return table number if there is only one table in sort order
|
|
and group and order is compatible, else return 0.
|
|
*/
|
|
|
|
static TABLE *
|
|
get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables,
|
|
table_map const_tables)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> ti(tables);
|
|
table_map map= (table_map) 0;
|
|
DBUG_ENTER("get_sort_by_table");
|
|
|
|
if (!a)
|
|
a=b; // Only one need to be given
|
|
else if (!b)
|
|
b=a;
|
|
|
|
for (; a && b; a=a->next,b=b->next)
|
|
{
|
|
/* Skip elements of a that are constant */
|
|
while (!((*a->item)->used_tables() & ~const_tables))
|
|
{
|
|
if (!(a= a->next))
|
|
break;
|
|
}
|
|
|
|
/* Skip elements of b that are constant */
|
|
while (!((*b->item)->used_tables() & ~const_tables))
|
|
{
|
|
if (!(b= b->next))
|
|
break;
|
|
}
|
|
|
|
if (!a || !b)
|
|
break;
|
|
|
|
if (!(*a->item)->eq(*b->item,1))
|
|
DBUG_RETURN(0);
|
|
map|=a->item[0]->used_tables();
|
|
}
|
|
if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
|
|
DBUG_RETURN(0);
|
|
|
|
map&= ~const_tables;
|
|
while ((table= ti++) && !(map & table->table->map)) ;
|
|
if (map != table->table->map)
|
|
DBUG_RETURN(0); // More than one table
|
|
DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
|
|
DBUG_RETURN(table->table);
|
|
}
|
|
|
|
|
|
/**
|
|
calc how big buffer we need for comparing group entries.
|
|
*/
|
|
|
|
void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group)
|
|
{
|
|
uint key_length=0, parts=0, null_parts=0;
|
|
|
|
for (; group ; group=group->next)
|
|
{
|
|
Item *group_item= *group->item;
|
|
Field *field= group_item->get_tmp_table_field();
|
|
if (field)
|
|
{
|
|
enum_field_types type;
|
|
if ((type= field->type()) == MYSQL_TYPE_BLOB)
|
|
key_length+=MAX_BLOB_WIDTH; // Can't be used as a key
|
|
else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
|
|
key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
|
|
else if (type == MYSQL_TYPE_BIT)
|
|
{
|
|
/* Bit is usually stored as a longlong key for group fields */
|
|
key_length+= 8; // Big enough
|
|
}
|
|
else
|
|
key_length+= field->pack_length();
|
|
}
|
|
else
|
|
{
|
|
switch (group_item->cmp_type()) {
|
|
case REAL_RESULT:
|
|
key_length+= sizeof(double);
|
|
break;
|
|
case INT_RESULT:
|
|
key_length+= sizeof(longlong);
|
|
break;
|
|
case DECIMAL_RESULT:
|
|
key_length+= my_decimal_get_binary_size(group_item->max_length -
|
|
(group_item->decimals ? 1 : 0),
|
|
group_item->decimals);
|
|
break;
|
|
case TIME_RESULT:
|
|
{
|
|
/*
|
|
As items represented as DATE/TIME fields in the group buffer
|
|
have STRING_RESULT result type, we increase the length
|
|
by 8 as maximum pack length of such fields.
|
|
*/
|
|
key_length+= 8;
|
|
break;
|
|
}
|
|
case STRING_RESULT:
|
|
{
|
|
enum enum_field_types type= group_item->field_type();
|
|
if (type == MYSQL_TYPE_BLOB)
|
|
key_length+= MAX_BLOB_WIDTH; // Can't be used as a key
|
|
else
|
|
{
|
|
/*
|
|
Group strings are taken as varstrings and require an length field.
|
|
A field is not yet created by create_tmp_field_ex()
|
|
and the sizes should match up.
|
|
*/
|
|
key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
/* This case should never be chosen */
|
|
DBUG_ASSERT(0);
|
|
my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL));
|
|
}
|
|
}
|
|
parts++;
|
|
if (group_item->maybe_null())
|
|
null_parts++;
|
|
}
|
|
param->group_length= key_length + null_parts;
|
|
param->group_parts= parts;
|
|
param->group_null_parts= null_parts;
|
|
}
|
|
|
|
static void calc_group_buffer(JOIN *join, ORDER *group)
|
|
{
|
|
if (group)
|
|
join->group= 1;
|
|
calc_group_buffer(&join->tmp_table_param, group);
|
|
}
|
|
|
|
|
|
/**
|
|
allocate group fields or take prepared (cached).
|
|
|
|
@param main_join join of current select
|
|
@param curr_join current join (join of current select or temporary copy
|
|
of it)
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 failed
|
|
*/
|
|
|
|
static bool
|
|
make_group_fields(JOIN *main_join, JOIN *curr_join)
|
|
{
|
|
if (main_join->group_fields_cache.elements)
|
|
{
|
|
curr_join->group_fields= main_join->group_fields_cache;
|
|
curr_join->sort_and_group= 1;
|
|
}
|
|
else
|
|
{
|
|
if (alloc_group_fields(curr_join, curr_join->group_list))
|
|
return (1);
|
|
main_join->group_fields_cache= curr_join->group_fields;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static bool
|
|
fill_cached_item_list(THD *thd, List<Cached_item> *list, ORDER *order,
|
|
uint max_number_of_elements = UINT_MAX)
|
|
{
|
|
for (; order && max_number_of_elements ;
|
|
order= order->next, max_number_of_elements--)
|
|
{
|
|
Cached_item *tmp= new_Cached_item(thd, *order->item, true);
|
|
if (!tmp || list->push_front(tmp))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
Get a list of buffers for saving last group.
|
|
|
|
Groups are saved in reverse order for easier check loop.
|
|
*/
|
|
|
|
static bool
|
|
alloc_group_fields(JOIN *join, ORDER *group)
|
|
{
|
|
if (fill_cached_item_list(join->thd, &join->group_fields, group))
|
|
return true;
|
|
join->sort_and_group=1; /* Mark for do_select */
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
alloc_order_fields(JOIN *join, ORDER *order, uint max_number_of_elements)
|
|
{
|
|
return fill_cached_item_list(join->thd, &join->order_fields, order,
|
|
max_number_of_elements);
|
|
}
|
|
|
|
|
|
/*
|
|
Test if a single-row cache of items changed, and update the cache.
|
|
|
|
@details Test if a list of items that typically represents a result
|
|
row has changed. If the value of some item changed, update the cached
|
|
value for this item.
|
|
|
|
@param list list of <item, cached_value> pairs stored as Cached_item.
|
|
|
|
@return -1 if no item changed
|
|
@return index of the first item that changed
|
|
*/
|
|
|
|
int test_if_item_cache_changed(List<Cached_item> &list)
|
|
{
|
|
DBUG_ENTER("test_if_item_cache_changed");
|
|
List_iterator<Cached_item> li(list);
|
|
int idx= -1,i;
|
|
Cached_item *buff;
|
|
|
|
for (i=(int) list.elements-1 ; (buff=li++) ; i--)
|
|
{
|
|
if (buff->cmp())
|
|
idx=i;
|
|
}
|
|
DBUG_PRINT("info", ("idx: %d", idx));
|
|
DBUG_RETURN(idx);
|
|
}
|
|
|
|
|
|
/*
|
|
@return
|
|
-1 - Group not changed
|
|
value>=0 - Number of the component where the group changed
|
|
*/
|
|
|
|
int
|
|
test_if_group_changed(List<Cached_item> &list)
|
|
{
|
|
DBUG_ENTER("test_if_group_changed");
|
|
List_iterator<Cached_item> li(list);
|
|
int idx= -1,i;
|
|
Cached_item *buff;
|
|
|
|
for (i=(int) list.elements-1 ; (buff=li++) ; i--)
|
|
{
|
|
if (buff->cmp())
|
|
idx=i;
|
|
}
|
|
DBUG_PRINT("info", ("idx: %d", idx));
|
|
DBUG_RETURN(idx);
|
|
}
|
|
|
|
|
|
/**
|
|
Setup copy_fields to save fields at start of new group.
|
|
|
|
Setup copy_fields to save fields at start of new group
|
|
|
|
Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups.
|
|
Change old item_field to use a new field with points at saved fieldvalue
|
|
This function is only called before use of send_result_set_metadata.
|
|
|
|
@param thd THD pointer
|
|
@param param temporary table parameters
|
|
@param ref_pointer_array array of pointers to top elements of filed list
|
|
@param res_selected_fields new list of items of select item list
|
|
@param res_all_fields new list of all items
|
|
@param elements number of elements in select item list
|
|
@param all_fields all fields list
|
|
|
|
@todo
|
|
In most cases this result will be sent to the user.
|
|
This should be changed to use copy_int or copy_real depending
|
|
on how the value is to be used: In some cases this may be an
|
|
argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
!=0 error
|
|
*/
|
|
|
|
bool
|
|
setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
|
|
Ref_ptr_array ref_pointer_array,
|
|
List<Item> &res_selected_fields, List<Item> &res_all_fields,
|
|
uint elements, List<Item> &all_fields)
|
|
{
|
|
Item *pos;
|
|
List_iterator_fast<Item> li(all_fields);
|
|
Copy_field *copy= NULL;
|
|
Copy_field *copy_start __attribute__((unused));
|
|
res_selected_fields.empty();
|
|
res_all_fields.empty();
|
|
List_iterator_fast<Item> itr(res_all_fields);
|
|
List<Item> extra_funcs;
|
|
uint i, border= all_fields.elements - elements;
|
|
DBUG_ENTER("setup_copy_fields");
|
|
|
|
if (param->field_count &&
|
|
!(copy=param->copy_field= new (thd->mem_root) Copy_field[param->field_count]))
|
|
goto err2;
|
|
|
|
param->copy_funcs.empty();
|
|
copy_start= copy;
|
|
for (i= 0; (pos= li++); i++)
|
|
{
|
|
Field *field;
|
|
uchar *tmp;
|
|
Item *real_pos= pos->real_item();
|
|
/*
|
|
Aggregate functions can be substituted for fields (by e.g. temp tables).
|
|
We need to filter those substituted fields out.
|
|
*/
|
|
if (real_pos->type() == Item::FIELD_ITEM &&
|
|
!(real_pos != pos &&
|
|
((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF))
|
|
{
|
|
Item_field *item;
|
|
if (!(item= new (thd->mem_root) Item_field(thd, ((Item_field*) real_pos))))
|
|
goto err;
|
|
if (pos->type() == Item::REF_ITEM)
|
|
{
|
|
/* preserve the names of the ref when dereferncing */
|
|
Item_ref *ref= (Item_ref *) pos;
|
|
item->db_name= ref->db_name;
|
|
item->table_name= ref->table_name;
|
|
item->name= ref->name;
|
|
}
|
|
pos= item;
|
|
if (item->field->flags & BLOB_FLAG)
|
|
{
|
|
if (!(pos= new (thd->mem_root) Item_copy_string(thd, pos)))
|
|
goto err;
|
|
/*
|
|
Item_copy_string::copy for function can call
|
|
Item_copy_string::val_int for blob via Item_ref.
|
|
But if Item_copy_string::copy for blob isn't called before,
|
|
it's value will be wrong
|
|
so let's insert Item_copy_string for blobs in the beginning of
|
|
copy_funcs
|
|
(to see full test case look at having.test, BUG #4358)
|
|
*/
|
|
if (param->copy_funcs.push_front(pos, thd->mem_root))
|
|
goto err;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
set up save buffer and change result_field to point at
|
|
saved value
|
|
*/
|
|
field= item->field;
|
|
item->result_field=field->make_new_field(thd->mem_root,
|
|
field->table, 1);
|
|
/*
|
|
We need to allocate one extra byte for null handling and
|
|
another extra byte to not get warnings from purify in
|
|
Field_string::val_int
|
|
*/
|
|
if (!(tmp= (uchar*) thd->alloc(field->pack_length()+2)))
|
|
goto err;
|
|
if (copy)
|
|
{
|
|
DBUG_ASSERT (param->field_count > (uint) (copy - copy_start));
|
|
copy->set(tmp, item->result_field);
|
|
item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1);
|
|
#ifdef HAVE_valgrind
|
|
copy->to_ptr[copy->from_length]= 0;
|
|
#endif
|
|
copy++;
|
|
}
|
|
}
|
|
}
|
|
else if ((real_pos->type() == Item::FUNC_ITEM ||
|
|
real_pos->real_type() == Item::SUBSELECT_ITEM ||
|
|
real_pos->type() == Item::CACHE_ITEM ||
|
|
real_pos->type() == Item::COND_ITEM) &&
|
|
!real_pos->with_sum_func())
|
|
{ // Save for send fields
|
|
const Lex_ident_column real_name= pos->name;
|
|
pos= real_pos;
|
|
pos->name= real_name;
|
|
/* TODO:
|
|
In most cases this result will be sent to the user.
|
|
This should be changed to use copy_int or copy_real depending
|
|
on how the value is to be used: In some cases this may be an
|
|
argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
|
|
*/
|
|
if (!(pos= pos->type_handler()->create_item_copy(thd, pos)))
|
|
goto err;
|
|
if (i < border) // HAVING, ORDER and GROUP BY
|
|
{
|
|
if (extra_funcs.push_back(pos, thd->mem_root))
|
|
goto err;
|
|
}
|
|
else if (param->copy_funcs.push_back(pos, thd->mem_root))
|
|
goto err;
|
|
}
|
|
res_all_fields.push_back(pos, thd->mem_root);
|
|
ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
|
|
pos;
|
|
}
|
|
param->copy_field_end= copy;
|
|
|
|
for (i= 0; i < border; i++)
|
|
itr++;
|
|
itr.sublist(res_selected_fields, elements);
|
|
/*
|
|
Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any
|
|
reference used in these will resolve to a item that is already calculated
|
|
*/
|
|
param->copy_funcs.append(&extra_funcs);
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
err:
|
|
if (copy)
|
|
delete [] param->copy_field; // This is never 0
|
|
param->copy_field= 0;
|
|
err2:
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
Make a copy of all simple SELECT'ed items.
|
|
|
|
This is done at the start of a new group so that we can retrieve
|
|
these later when the group changes.
|
|
*/
|
|
|
|
void
|
|
copy_fields(TMP_TABLE_PARAM *param)
|
|
{
|
|
Copy_field *ptr=param->copy_field;
|
|
Copy_field *end=param->copy_field_end;
|
|
|
|
DBUG_ASSERT((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL));
|
|
|
|
for (; ptr != end; ptr++)
|
|
(*ptr->do_copy)(ptr);
|
|
|
|
List_iterator_fast<Item> it(param->copy_funcs);
|
|
Item_copy *item;
|
|
while ((item= (Item_copy*) it++))
|
|
item->copy();
|
|
}
|
|
|
|
|
|
/**
|
|
Make an array of pointers to sum_functions to speed up
|
|
sum_func calculation.
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Error
|
|
*/
|
|
|
|
bool JOIN::alloc_func_list()
|
|
{
|
|
uint func_count, group_parts;
|
|
DBUG_ENTER("alloc_func_list");
|
|
|
|
func_count= tmp_table_param.sum_func_count;
|
|
/*
|
|
If we are using rollup, we need a copy of the summary functions for
|
|
each level
|
|
*/
|
|
if (rollup.state != ROLLUP::STATE_NONE)
|
|
func_count*= (send_group_parts+1);
|
|
|
|
group_parts= send_group_parts;
|
|
/*
|
|
If distinct, reserve memory for possible
|
|
disctinct->group_by optimization
|
|
*/
|
|
if (select_distinct)
|
|
{
|
|
group_parts+= fields_list.elements;
|
|
/*
|
|
If the ORDER clause is specified then it's possible that
|
|
it also will be optimized, so reserve space for it too
|
|
*/
|
|
if (order)
|
|
{
|
|
ORDER *ord;
|
|
for (ord= order; ord; ord= ord->next)
|
|
group_parts++;
|
|
}
|
|
}
|
|
|
|
/* This must use calloc() as rollup_make_fields depends on this */
|
|
sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) +
|
|
sizeof(Item_sum***) * (group_parts+1));
|
|
sum_funcs_end= (Item_sum***) (sum_funcs+func_count+1);
|
|
DBUG_RETURN(sum_funcs == 0);
|
|
}
|
|
|
|
|
|
/**
|
|
Initialize 'sum_funcs' array with all Item_sum objects.
|
|
|
|
@param field_list All items
|
|
@param send_result_set_metadata Items in select list
|
|
@param before_group_by Set to 1 if this is called before GROUP BY handling
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 error
|
|
*/
|
|
|
|
bool JOIN::make_sum_func_list(List<Item> &field_list,
|
|
List<Item> &send_result_set_metadata,
|
|
bool before_group_by)
|
|
{
|
|
List_iterator_fast<Item> it(field_list);
|
|
Item_sum **func;
|
|
Item *item;
|
|
DBUG_ENTER("make_sum_func_list");
|
|
|
|
func= sum_funcs;
|
|
while ((item=it++))
|
|
{
|
|
if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
|
|
(!((Item_sum*) item)->depended_from() ||
|
|
((Item_sum *)item)->depended_from() == select_lex))
|
|
*func++= (Item_sum*) item;
|
|
}
|
|
if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
|
|
{
|
|
rollup.state= ROLLUP::STATE_READY;
|
|
if (rollup_make_fields(field_list, send_result_set_metadata, &func))
|
|
DBUG_RETURN(TRUE); // Should never happen
|
|
}
|
|
else if (rollup.state == ROLLUP::STATE_NONE)
|
|
{
|
|
for (uint i=0 ; i <= send_group_parts ;i++)
|
|
sum_funcs_end[i]= func;
|
|
}
|
|
else if (rollup.state == ROLLUP::STATE_READY)
|
|
DBUG_RETURN(FALSE); // Don't put end marker
|
|
*func=0; // End marker
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
Change all funcs and sum_funcs to fields in tmp table, and create
|
|
new list of all items.
|
|
|
|
@param thd THD pointer
|
|
@param ref_pointer_array array of pointers to top elements of filed list
|
|
@param res_selected_fields new list of items of select item list
|
|
@param res_all_fields new list of all items
|
|
@param elements number of elements in select item list
|
|
@param all_fields all fields list
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
!=0 error
|
|
*/
|
|
|
|
static bool
|
|
change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &res_selected_fields,
|
|
List<Item> &res_all_fields,
|
|
uint elements, List<Item> &all_fields)
|
|
{
|
|
List_iterator_fast<Item> it(all_fields);
|
|
Item *item_field,*item;
|
|
DBUG_ENTER("change_to_use_tmp_fields");
|
|
|
|
res_selected_fields.empty();
|
|
res_all_fields.empty();
|
|
|
|
uint border= all_fields.elements - elements;
|
|
for (uint i= 0; (item= it++); i++)
|
|
{
|
|
Field *field;
|
|
/*
|
|
SUM_FUNC_ITEM will be replaced by the calculated value which is
|
|
stored in the temporary table.
|
|
The first part of the following test is for items that are expressions
|
|
with SUM_FUNC_ITEMS, like 'sum(a)+1'. In this case we keep the original
|
|
item, which contain an Item_ref that points to the SUM_FUNC_ITEM that
|
|
will be replaced with a pointer to the calculated value.
|
|
The second test is for window functions. Window functions contains
|
|
only pointers to Item_refs, which will be adjusted to point to the
|
|
temporary table.
|
|
*/
|
|
enum Item::Type item_type= item->type();
|
|
if ((item->with_sum_func() && item_type != Item::SUM_FUNC_ITEM) ||
|
|
item->with_window_func())
|
|
item_field= item;
|
|
else if (item_type == Item::FIELD_ITEM ||
|
|
item_type == Item::DEFAULT_VALUE_ITEM)
|
|
{
|
|
if (!(item_field= item->get_tmp_table_item(thd)))
|
|
DBUG_RETURN(true);
|
|
}
|
|
else if (item_type == Item::FUNC_ITEM &&
|
|
((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC)
|
|
{
|
|
field= item->get_tmp_table_field();
|
|
if (field != NULL)
|
|
{
|
|
/*
|
|
Replace "@:=<expression>" with "@:=<tmp table
|
|
column>". Otherwise, we would re-evaluate <expression>, and
|
|
if expression were a subquery, this would access
|
|
already-unlocked tables.
|
|
*/
|
|
Item_func_set_user_var* suv=
|
|
new (thd->mem_root) Item_func_set_user_var(thd, (Item_func_set_user_var*) item);
|
|
Item_field *new_field= new (thd->mem_root) Item_field(thd, field);
|
|
if (!suv || !new_field)
|
|
DBUG_RETURN(true); // Fatal error
|
|
new_field->set_refers_to_temp_table();
|
|
List<Item> list;
|
|
list.push_back(new_field, thd->mem_root);
|
|
suv->set_arguments(thd, list);
|
|
item_field= suv;
|
|
}
|
|
else
|
|
item_field= item;
|
|
}
|
|
else if ((field= item->get_tmp_table_field()))
|
|
{
|
|
if (item->type() == Item::SUM_FUNC_ITEM && field->table->group)
|
|
{
|
|
item_field= ((Item_sum*) item)->result_item(thd, field);
|
|
}
|
|
else
|
|
{
|
|
item_field= (Item*) new (thd->mem_root) Item_field(thd, field);
|
|
if (item_field)
|
|
((Item_field*) item_field)->set_refers_to_temp_table();
|
|
}
|
|
if (!item_field)
|
|
DBUG_RETURN(true); // Fatal error
|
|
|
|
if (item->real_item()->type() != Item::FIELD_ITEM)
|
|
field->orig_table= 0;
|
|
item_field->name= item->name;
|
|
if (item->type() == Item::REF_ITEM)
|
|
{
|
|
Item_field *ifield= (Item_field *) item_field;
|
|
Item_ref *iref= (Item_ref *) item;
|
|
ifield->table_name= iref->table_name;
|
|
ifield->db_name= iref->db_name;
|
|
}
|
|
#ifndef DBUG_OFF
|
|
if (!item_field->name.str)
|
|
{
|
|
char buff[256];
|
|
String str(buff,sizeof(buff),&my_charset_bin);
|
|
str.length(0);
|
|
str.extra_allocation(1024);
|
|
item->print(&str, QT_ORDINARY);
|
|
item_field->name.str= thd->strmake(str.ptr(), str.length());
|
|
item_field->name.length= str.length();
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
item_field= item;
|
|
|
|
res_all_fields.push_back(item_field, thd->mem_root);
|
|
ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
|
|
item_field;
|
|
}
|
|
|
|
List_iterator_fast<Item> itr(res_all_fields);
|
|
for (uint i= 0; i < border; i++)
|
|
itr++;
|
|
itr.sublist(res_selected_fields, elements);
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
/**
|
|
Change all sum_func refs to fields to point at fields in tmp table.
|
|
Change all funcs to be fields in tmp table.
|
|
|
|
@param thd THD pointer
|
|
@param ref_pointer_array array of pointers to top elements of field list
|
|
@param res_selected_fields new list of items of select item list
|
|
@param res_all_fields new list of all items
|
|
@param elements number of elements in select item list
|
|
@param all_fields all fields list
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 error
|
|
*/
|
|
|
|
static bool
|
|
change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &res_selected_fields,
|
|
List<Item> &res_all_fields, uint elements,
|
|
List<Item> &all_fields)
|
|
{
|
|
List_iterator_fast<Item> it(all_fields);
|
|
Item *item, *new_item;
|
|
res_selected_fields.empty();
|
|
res_all_fields.empty();
|
|
|
|
uint i, border= all_fields.elements - elements;
|
|
for (i= 0; (item= it++); i++)
|
|
{
|
|
if (item->type() == Item::SUM_FUNC_ITEM && item->const_item())
|
|
new_item= item;
|
|
else
|
|
{
|
|
if (!(new_item= item->get_tmp_table_item(thd)))
|
|
return 1;
|
|
}
|
|
|
|
if (res_all_fields.push_back(new_item, thd->mem_root))
|
|
return 1;
|
|
ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
|
|
new_item;
|
|
}
|
|
|
|
List_iterator_fast<Item> itr(res_all_fields);
|
|
for (i= 0; i < border; i++)
|
|
itr++;
|
|
itr.sublist(res_selected_fields, elements);
|
|
|
|
return thd->is_error();
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************
|
|
Code for calculating functions
|
|
******************************************************************************/
|
|
|
|
|
|
/**
|
|
Call ::setup for all sum functions.
|
|
|
|
@param thd thread handler
|
|
@param func_ptr sum function list
|
|
|
|
@retval
|
|
FALSE ok
|
|
@retval
|
|
TRUE error
|
|
*/
|
|
|
|
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
|
|
{
|
|
Item_sum *func;
|
|
DBUG_ENTER("setup_sum_funcs");
|
|
while ((func= *(func_ptr++)))
|
|
{
|
|
if (func->aggregator_setup(thd))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Setup aggregate functions.
|
|
|
|
@param thd Thread descriptor
|
|
@param func_ptr Array of pointers to aggregate functions
|
|
@param need_distinct FALSE means that the table access method already
|
|
guarantees that arguments of all aggregate functions
|
|
will be unique. (This is the case for Loose Scan)
|
|
TRUE - Otherwise.
|
|
@return
|
|
false Ok
|
|
true Error
|
|
*/
|
|
|
|
bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
|
|
bool need_distinct)
|
|
{
|
|
Item_sum *func;
|
|
DBUG_ENTER("prepare_sum_aggregators");
|
|
while ((func= *(func_ptr++)))
|
|
{
|
|
bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
|
|
if (need_distinct_aggregator && table_count - const_tables == 1)
|
|
{
|
|
/*
|
|
We are doing setup for an aggregate with DISTINCT, like
|
|
|
|
SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
|
|
|
|
In general case, agg_func will need to use Aggregator_distinct to
|
|
remove duplicates from its arguments.
|
|
We won't have to remove duplicates if we know the arguments are already
|
|
unique. This is true when
|
|
1. the join operation has only one non-const table (checked above)
|
|
2. the argument list covers a PRIMARY or a UNIQUE index.
|
|
|
|
Example: here the values of t1.pk are unique:
|
|
|
|
SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
|
|
|
|
and so the whole argument of agg_func is unique.
|
|
*/
|
|
List<Item> arg_fields;
|
|
for (uint i= 0; i < func->argument_count(); i++)
|
|
{
|
|
if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
|
|
arg_fields.push_back(func->arguments()[i]);
|
|
}
|
|
|
|
/*
|
|
If the query has a GROUP BY, then it's sufficient that a unique
|
|
key is covered by a concatenation of {argument_list, group_by_list}.
|
|
|
|
Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
|
|
|
|
SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
|
|
|
|
Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
|
|
will be unique, and no de-duplication will be needed.
|
|
*/
|
|
for (ORDER *group= group_list; group ; group= group->next)
|
|
{
|
|
if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
|
|
arg_fields.push_back(*group->item);
|
|
}
|
|
|
|
if (list_contains_unique_index(join_tab[const_tables].table,
|
|
find_field_in_item_list,
|
|
(void *) &arg_fields))
|
|
need_distinct_aggregator= false;
|
|
}
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
|
|
trace_aggr.add("function", func);
|
|
trace_aggr.add("aggregator_type",
|
|
(need_distinct_aggregator ||
|
|
func->uses_non_standard_aggregator_for_distinct()) ?
|
|
"distinct" : "simple");
|
|
if (func->set_aggregator(thd, need_distinct_aggregator ?
|
|
Aggregator::DISTINCT_AGGREGATOR :
|
|
Aggregator::SIMPLE_AGGREGATOR))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
static void
|
|
init_tmptable_sum_functions(Item_sum **func_ptr)
|
|
{
|
|
Item_sum *func;
|
|
while ((func= *(func_ptr++)))
|
|
func->reset_field();
|
|
}
|
|
|
|
|
|
/** Update record 0 in tmp_table from record 1. */
|
|
|
|
static void
|
|
update_tmptable_sum_func(Item_sum **func_ptr,
|
|
TABLE *tmp_table __attribute__((unused)))
|
|
{
|
|
Item_sum *func;
|
|
while ((func= *(func_ptr++)))
|
|
func->update_field();
|
|
}
|
|
|
|
|
|
/** Copy result of sum functions to record in tmp_table. */
|
|
|
|
static void
|
|
copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr)
|
|
{
|
|
for (; func_ptr != end_ptr ; func_ptr++)
|
|
(void) (*func_ptr)->save_in_result_field(1);
|
|
return;
|
|
}
|
|
|
|
|
|
static bool
|
|
init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr)
|
|
{
|
|
for (; func_ptr != end_ptr ;func_ptr++)
|
|
{
|
|
if ((*func_ptr)->reset_and_add())
|
|
return 1;
|
|
}
|
|
/* If rollup, calculate the upper sum levels */
|
|
for ( ; *func_ptr ; func_ptr++)
|
|
{
|
|
if ((*func_ptr)->aggregator_add())
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static bool
|
|
update_sum_func(Item_sum **func_ptr)
|
|
{
|
|
Item_sum *func;
|
|
for (; (func= (Item_sum*) *func_ptr) ; func_ptr++)
|
|
if (func->aggregator_add())
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Copy result of functions to record in tmp_table.
|
|
|
|
Uses the thread pointer to check for errors in
|
|
some of the val_xxx() methods called by the
|
|
save_in_result_field() function.
|
|
TODO: make the Item::val_xxx() return error code
|
|
|
|
@param func_ptr array of the function Items to copy to the tmp table
|
|
@param thd pointer to the current thread for error checking
|
|
@retval
|
|
FALSE if OK
|
|
@retval
|
|
TRUE on error
|
|
*/
|
|
|
|
bool
|
|
copy_funcs(Item **func_ptr, const THD *thd)
|
|
{
|
|
Item *func;
|
|
for (; (func = *func_ptr) ; func_ptr++)
|
|
{
|
|
if (func->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) func)->with_window_func())
|
|
continue;
|
|
func->save_in_result_field(1);
|
|
/*
|
|
Need to check the THD error state because Item::val_xxx() don't
|
|
return error code, but can generate errors
|
|
TODO: change it for a real status check when Item::val_xxx()
|
|
are extended to return status code.
|
|
*/
|
|
if (unlikely(thd->is_error()))
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Create a condition for a const reference and add this to the
|
|
currenct select for the table.
|
|
*/
|
|
|
|
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
|
|
{
|
|
DBUG_ENTER("add_ref_to_table_cond");
|
|
if (!join_tab->ref.key_parts)
|
|
DBUG_RETURN(FALSE);
|
|
|
|
Item_cond_and *cond= new (thd->mem_root) Item_cond_and(thd);
|
|
TABLE *table=join_tab->table;
|
|
int error= 0;
|
|
if (!cond)
|
|
DBUG_RETURN(TRUE);
|
|
|
|
for (uint i=0 ; i < join_tab->ref.key_parts ; i++)
|
|
{
|
|
Field *field=table->field[table->key_info[join_tab->ref.key].key_part[i].
|
|
fieldnr-1];
|
|
Item *value=join_tab->ref.items[i];
|
|
cond->add(new (thd->mem_root)
|
|
Item_func_equal(thd, new (thd->mem_root) Item_field(thd, field),
|
|
value),
|
|
thd->mem_root);
|
|
}
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(TRUE);
|
|
if (!cond->fixed())
|
|
{
|
|
Item *tmp_item= (Item*) cond;
|
|
cond->fix_fields(thd, &tmp_item);
|
|
DBUG_ASSERT(cond == tmp_item);
|
|
}
|
|
if (join_tab->select)
|
|
{
|
|
Item *UNINIT_VAR(cond_copy);
|
|
if (join_tab->select->pre_idx_push_select_cond)
|
|
cond_copy= cond->copy_andor_structure(thd);
|
|
if (join_tab->select->cond)
|
|
error=(int) cond->add(join_tab->select->cond, thd->mem_root);
|
|
join_tab->select->cond= cond;
|
|
if (join_tab->select->pre_idx_push_select_cond)
|
|
{
|
|
Item *new_cond= and_conds(thd, cond_copy,
|
|
join_tab->select->pre_idx_push_select_cond);
|
|
if (new_cond->fix_fields_if_needed(thd, &new_cond))
|
|
error= 1;
|
|
join_tab->pre_idx_push_select_cond=
|
|
join_tab->select->pre_idx_push_select_cond= new_cond;
|
|
}
|
|
join_tab->set_select_cond(cond, __LINE__);
|
|
}
|
|
else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond,
|
|
(SORT_INFO*) 0, 0, &error)))
|
|
join_tab->set_select_cond(cond, __LINE__);
|
|
|
|
DBUG_RETURN(error ? TRUE : FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
Free joins of subselect of this select.
|
|
|
|
@param thd THD pointer
|
|
@param select pointer to st_select_lex which subselects joins we will free
|
|
*/
|
|
|
|
void free_underlaid_joins(THD *thd, SELECT_LEX *select)
|
|
{
|
|
for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
|
|
unit;
|
|
unit= unit->next_unit())
|
|
unit->cleanup();
|
|
}
|
|
|
|
/****************************************************************************
|
|
ROLLUP handling
|
|
****************************************************************************/
|
|
|
|
/**
|
|
Replace occurrences of group by fields in an expression by ref items.
|
|
|
|
The function replaces occurrences of group by fields in expr
|
|
by ref objects for these fields unless they are under aggregate
|
|
functions.
|
|
The function also corrects value of the the maybe_null attribute
|
|
for the items of all subexpressions containing group by fields.
|
|
|
|
@b EXAMPLES
|
|
@code
|
|
SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP
|
|
SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP
|
|
@endcode
|
|
|
|
@b IMPLEMENTATION
|
|
|
|
The function recursively traverses the tree of the expr expression,
|
|
looks for occurrences of the group by fields that are not under
|
|
aggregate functions and replaces them for the corresponding ref items.
|
|
|
|
@note
|
|
This substitution is needed GROUP BY queries with ROLLUP if
|
|
SELECT list contains expressions over group by attributes.
|
|
|
|
@param thd reference to the context
|
|
@param expr expression to make replacement
|
|
@param group_list list of references to group by items
|
|
@param changed out: returns 1 if item contains a replaced field item
|
|
|
|
@todo
|
|
- TODO: Some functions are not null-preserving. For those functions
|
|
updating of the maybe_null attribute is an overkill.
|
|
|
|
@retval
|
|
0 if ok
|
|
@retval
|
|
1 on error
|
|
*/
|
|
|
|
static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
|
|
bool *changed)
|
|
{
|
|
if (expr->argument_count())
|
|
{
|
|
Name_resolution_context *context= &thd->lex->current_select->context;
|
|
Item **arg,**arg_end;
|
|
bool arg_changed= FALSE;
|
|
for (arg= expr->arguments(),
|
|
arg_end= expr->arguments() + expr->argument_count();
|
|
arg != arg_end; arg++)
|
|
{
|
|
Item *item= *arg;
|
|
if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM)
|
|
{
|
|
ORDER *group_tmp;
|
|
for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
|
|
{
|
|
if (item->eq(*group_tmp->item,0))
|
|
{
|
|
Item *new_item;
|
|
if (!(new_item= new (thd->mem_root) Item_ref(thd, context,
|
|
group_tmp->item,
|
|
null_clex_str,
|
|
item->name)))
|
|
return 1; // fatal_error is set
|
|
thd->change_item_tree(arg, new_item);
|
|
arg_changed= TRUE;
|
|
}
|
|
}
|
|
}
|
|
else if (item->type() == Item::FUNC_ITEM)
|
|
{
|
|
if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed))
|
|
return 1;
|
|
}
|
|
}
|
|
if (arg_changed)
|
|
{
|
|
expr->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP;
|
|
*changed= TRUE;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/** Allocate memory needed for other rollup functions. */
|
|
|
|
bool JOIN::rollup_init()
|
|
{
|
|
uint i,j;
|
|
Item **ref_array;
|
|
|
|
tmp_table_param.quick_group= 0; // Can't create groups in tmp table
|
|
/*
|
|
Each group can potentially be replaced with Item_func_rollup_const() which
|
|
needs a copy_func placeholder.
|
|
*/
|
|
tmp_table_param.func_count+= send_group_parts;
|
|
rollup.state= ROLLUP::STATE_INITED;
|
|
|
|
/*
|
|
Create pointers to the different sum function groups
|
|
These are updated by rollup_make_fields()
|
|
*/
|
|
tmp_table_param.group_parts= send_group_parts;
|
|
|
|
Item_null_result **null_items=
|
|
static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
|
|
|
|
rollup.null_items= Item_null_array(null_items, send_group_parts);
|
|
rollup.ref_pointer_arrays=
|
|
static_cast<Ref_ptr_array*>
|
|
(thd->alloc((sizeof(Ref_ptr_array) +
|
|
all_fields.elements * sizeof(Item*)) * send_group_parts));
|
|
rollup.fields=
|
|
static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
|
|
|
|
if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
|
|
return true;
|
|
|
|
ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
|
|
|
|
/*
|
|
Prepare space for field list for the different levels
|
|
These will be filled up in rollup_make_fields()
|
|
*/
|
|
for (i= 0 ; i < send_group_parts ; i++)
|
|
{
|
|
if (!(rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd)))
|
|
return true;
|
|
|
|
List<Item> *rollup_fields= &rollup.fields[i];
|
|
rollup_fields->empty();
|
|
rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements);
|
|
ref_array+= all_fields.elements;
|
|
}
|
|
for (i= 0 ; i < send_group_parts; i++)
|
|
{
|
|
for (j=0 ; j < fields_list.elements ; j++)
|
|
rollup.fields[i].push_back(rollup.null_items[i], thd->mem_root);
|
|
}
|
|
List_iterator<Item> it(all_fields);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
ORDER *group_tmp;
|
|
bool found_in_group= 0;
|
|
|
|
for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
|
|
{
|
|
if (*group_tmp->item == item)
|
|
{
|
|
item->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP;
|
|
found_in_group= 1;
|
|
break;
|
|
}
|
|
}
|
|
if (item->type() == Item::FUNC_ITEM && !found_in_group)
|
|
{
|
|
bool changed= FALSE;
|
|
if (change_group_ref(thd, (Item_func *) item, group_list, &changed))
|
|
return 1;
|
|
/*
|
|
We have to prevent creation of a field in a temporary table for
|
|
an expression that contains GROUP BY attributes.
|
|
Marking the expression item as 'with_sum_func' will ensure this.
|
|
*/
|
|
if (changed)
|
|
item->with_flags|= item_with_t::SUM_FUNC;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Wrap all constant Items in GROUP BY list.
|
|
|
|
For ROLLUP queries each constant item referenced in GROUP BY list
|
|
is wrapped up into an Item_func object yielding the same value
|
|
as the constant item. The objects of the wrapper class are never
|
|
considered as constant items and besides they inherit all
|
|
properties of the Item_result_field class.
|
|
This wrapping allows us to ensure writing constant items
|
|
into temporary tables whenever the result of the ROLLUP
|
|
operation has to be written into a temporary table, e.g. when
|
|
ROLLUP is used together with DISTINCT in the SELECT list.
|
|
Usually when creating temporary tables for a intermidiate
|
|
result we do not include fields for constant expressions.
|
|
|
|
@retval
|
|
0 if ok
|
|
@retval
|
|
1 on error
|
|
*/
|
|
|
|
bool JOIN::rollup_process_const_fields()
|
|
{
|
|
ORDER *group_tmp;
|
|
Item *item;
|
|
List_iterator<Item> it(all_fields);
|
|
|
|
for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
|
|
{
|
|
if (!(*group_tmp->item)->const_item())
|
|
continue;
|
|
while ((item= it++))
|
|
{
|
|
if (*group_tmp->item == item)
|
|
{
|
|
Item* new_item= new (thd->mem_root) Item_func_rollup_const(thd, item);
|
|
if (!new_item)
|
|
return 1;
|
|
new_item->fix_fields(thd, (Item **) 0);
|
|
thd->change_item_tree(it.ref(), new_item);
|
|
for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
|
|
{
|
|
if (*tmp->item == item)
|
|
thd->change_item_tree(tmp->item, new_item);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
it.rewind();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Fill up rollup structures with pointers to fields to use.
|
|
|
|
Creates copies of item_sum items for each sum level.
|
|
|
|
@param fields_arg List of all fields (hidden and real ones)
|
|
@param sel_fields Pointer to selected fields
|
|
@param func Store here a pointer to all fields
|
|
|
|
@retval
|
|
0 if ok;
|
|
In this case func is pointing to next not used element.
|
|
@retval
|
|
1 on error
|
|
*/
|
|
|
|
bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
|
|
Item_sum ***func)
|
|
{
|
|
List_iterator_fast<Item> it(fields_arg);
|
|
Item *first_field= sel_fields.head();
|
|
uint level;
|
|
|
|
/*
|
|
Create field lists for the different levels
|
|
|
|
The idea here is to have a separate field list for each rollup level to
|
|
avoid all runtime checks of which columns should be NULL.
|
|
|
|
The list is stored in reverse order to get sum function in such an order
|
|
in func that it makes it easy to reset them with init_sum_functions()
|
|
|
|
Assuming: SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
|
|
|
|
rollup.fields[0] will contain list where a,b,c is NULL
|
|
rollup.fields[1] will contain list where b,c is NULL
|
|
...
|
|
rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
|
|
...
|
|
sum_funcs_end[0] points to all sum functions
|
|
sum_funcs_end[1] points to all sum functions, except grand totals
|
|
...
|
|
*/
|
|
|
|
for (level=0 ; level < send_group_parts ; level++)
|
|
{
|
|
uint i;
|
|
uint pos= send_group_parts - level -1;
|
|
bool real_fields= 0;
|
|
Item *item;
|
|
List_iterator<Item> new_it(rollup.fields[pos]);
|
|
Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
|
|
ORDER *start_group;
|
|
|
|
/* Point to first hidden field */
|
|
uint ref_array_ix= fields_arg.elements-1;
|
|
|
|
/* Remember where the sum functions ends for the previous level */
|
|
sum_funcs_end[pos+1]= *func;
|
|
|
|
/* Find the start of the group for this level */
|
|
for (i= 0, start_group= group_list ;
|
|
i++ < pos ;
|
|
start_group= start_group->next)
|
|
;
|
|
|
|
it.rewind();
|
|
while ((item= it++))
|
|
{
|
|
if (item == first_field)
|
|
{
|
|
real_fields= 1; // End of hidden fields
|
|
ref_array_ix= 0;
|
|
}
|
|
|
|
if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
|
|
(!((Item_sum*) item)->depended_from() ||
|
|
((Item_sum *)item)->depended_from() == select_lex))
|
|
|
|
{
|
|
/*
|
|
This is a top level summary function that must be replaced with
|
|
a sum function that is reset for this level.
|
|
|
|
NOTE: This code creates an object which is not that nice in a
|
|
sub select. Fortunately it's not common to have rollup in
|
|
sub selects.
|
|
*/
|
|
item= item->copy_or_same(thd);
|
|
((Item_sum*) item)->make_unique();
|
|
*(*func)= (Item_sum*) item;
|
|
(*func)++;
|
|
}
|
|
else
|
|
{
|
|
/* Check if this is something that is part of this group by */
|
|
ORDER *group_tmp;
|
|
for (group_tmp= start_group, i= pos ;
|
|
group_tmp ; group_tmp= group_tmp->next, i++)
|
|
{
|
|
if (*group_tmp->item == item)
|
|
{
|
|
/*
|
|
This is an element that is used by the GROUP BY and should be
|
|
set to NULL in this level
|
|
*/
|
|
Item_null_result *null_item= new (thd->mem_root) Item_null_result(thd);
|
|
if (!null_item)
|
|
return 1;
|
|
// Value will be null sometimes
|
|
item->set_maybe_null();
|
|
null_item->result_field= item->get_tmp_table_field();
|
|
item= null_item;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
ref_array_start[ref_array_ix]= item;
|
|
if (real_fields)
|
|
{
|
|
(void) new_it++; // Point to next item
|
|
new_it.replace(item); // Replace previous
|
|
ref_array_ix++;
|
|
}
|
|
else
|
|
ref_array_ix--;
|
|
}
|
|
}
|
|
sum_funcs_end[0]= *func; // Point to last function
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Send all rollup levels higher than the current one to the client.
|
|
|
|
@b SAMPLE
|
|
@code
|
|
SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
|
|
@endcode
|
|
|
|
@param idx Level we are on:
|
|
- 0 = Total sum level
|
|
- 1 = First group changed (a)
|
|
- 2 = Second group changed (a,b)
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 If send_data_failed()
|
|
*/
|
|
|
|
int JOIN::rollup_send_data(uint idx)
|
|
{
|
|
uint i;
|
|
for (i= send_group_parts ; i-- > idx ; )
|
|
{
|
|
int res= 0;
|
|
/* Get reference pointers to sum functions in place */
|
|
copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
|
|
if ((!having || having->val_int()))
|
|
{
|
|
if (send_records < unit->lim.get_select_limit() && do_send_rows &&
|
|
(res= result->send_data_with_check(rollup.fields[i],
|
|
unit, send_records)) > 0)
|
|
return 1;
|
|
if (!res)
|
|
send_records++;
|
|
}
|
|
}
|
|
/* Restore ref_pointer_array */
|
|
set_items_ref_array(current_ref_ptrs);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Write all rollup levels higher than the current one to a temp table.
|
|
|
|
@b SAMPLE
|
|
@code
|
|
SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP
|
|
@endcode
|
|
|
|
@param idx Level we are on:
|
|
- 0 = Total sum level
|
|
- 1 = First group changed (a)
|
|
- 2 = Second group changed (a,b)
|
|
@param table reference to temp table
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 if write_data_failed()
|
|
*/
|
|
|
|
int JOIN::rollup_write_data(uint idx, TMP_TABLE_PARAM *tmp_table_param_arg,
|
|
TABLE *table_arg)
|
|
{
|
|
uint i;
|
|
for (i= send_group_parts ; i-- > idx ; )
|
|
{
|
|
/* Get reference pointers to sum functions in place */
|
|
copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
|
|
if ((!having || having->val_int()))
|
|
{
|
|
int write_error;
|
|
Item *item;
|
|
List_iterator_fast<Item> it(rollup.fields[i]);
|
|
while ((item= it++))
|
|
{
|
|
if (item->type() == Item::NULL_ITEM && item->is_result_field())
|
|
item->save_in_result_field(1);
|
|
}
|
|
copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
|
|
if (unlikely((write_error=
|
|
table_arg->file->ha_write_tmp_row(table_arg->record[0]))))
|
|
{
|
|
if (create_internal_tmp_table_from_heap(thd, table_arg,
|
|
tmp_table_param_arg->start_recinfo,
|
|
&tmp_table_param_arg->recinfo,
|
|
write_error, 0, NULL))
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
/* Restore ref_pointer_array */
|
|
set_items_ref_array(current_ref_ptrs);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
clear results if there are not rows found for group
|
|
(end_send_group/end_write_group)
|
|
*/
|
|
|
|
void inline JOIN::clear_sum_funcs()
|
|
{
|
|
if (sum_funcs)
|
|
{
|
|
Item_sum *func, **func_ptr= sum_funcs;
|
|
while ((func= *(func_ptr++)))
|
|
func->clear();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Prepare for returning 'empty row' when there is no matching row.
|
|
|
|
- Mark all tables with mark_as_null_row()
|
|
- Make a copy of of all simple SELECT items
|
|
- Reset all sum functions to NULL or 0.
|
|
*/
|
|
|
|
void JOIN::clear(table_map *cleared_tables)
|
|
{
|
|
clear_tables(this, cleared_tables);
|
|
copy_fields(&tmp_table_param);
|
|
clear_sum_funcs();
|
|
}
|
|
|
|
|
|
/**
|
|
Print an EXPLAIN line with all NULLs and given message in the 'Extra' column
|
|
|
|
@retval
|
|
0 ok
|
|
1 OOM error or error from send_data()
|
|
*/
|
|
|
|
int print_explain_message_line(select_result_sink *result,
|
|
uint8 options, bool is_analyze,
|
|
uint select_number,
|
|
const char *select_type,
|
|
ha_rows *rows,
|
|
const char *message)
|
|
{
|
|
/* Note: for SHOW EXPLAIN, this is caller thread's THD */
|
|
THD *thd= result->thd;
|
|
MEM_ROOT *mem_root= thd->mem_root;
|
|
Item *item_null= new (mem_root) Item_null(thd);
|
|
List<Item> item_list;
|
|
|
|
item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number),
|
|
mem_root);
|
|
item_list.push_back(new (mem_root) Item_string_sys(thd, select_type),
|
|
mem_root);
|
|
/* `table` */
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `partitions` */
|
|
if (options & DESCRIBE_PARTITIONS)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* type, possible_keys, key, key_len, ref */
|
|
for (uint i=0 ; i < 5; i++)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `rows` */
|
|
StringBuffer<64> rows_str;
|
|
if (rows)
|
|
{
|
|
rows_str.append_ulonglong((ulonglong)(*rows));
|
|
item_list.push_back(new (mem_root)
|
|
Item_string_sys(thd, rows_str.ptr(),
|
|
rows_str.length()), mem_root);
|
|
}
|
|
else
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `r_rows` */
|
|
if (is_analyze)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `filtered` */
|
|
if (is_analyze || options & DESCRIBE_EXTENDED)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `r_filtered` */
|
|
if (is_analyze)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `Extra` */
|
|
if (message)
|
|
item_list.push_back(new (mem_root) Item_string_sys(thd, message),
|
|
mem_root);
|
|
else
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
if (unlikely(thd->is_error()) || unlikely(result->send_data(item_list)))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Append MRR information from quick select to the given string
|
|
*/
|
|
|
|
void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res)
|
|
{
|
|
char mrr_str_buf[128];
|
|
mrr_str_buf[0]=0;
|
|
int len;
|
|
handler *h= quick->head->file;
|
|
len= h->multi_range_read_explain_info(quick->mrr_flags, mrr_str_buf,
|
|
sizeof(mrr_str_buf));
|
|
if (len > 0)
|
|
{
|
|
//res->append(STRING_WITH_LEN("; "));
|
|
res->append(mrr_str_buf, len);
|
|
}
|
|
}
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
|
|
key_map possible_keys)
|
|
{
|
|
uint j;
|
|
for (j=0 ; j < table->s->keys ; j++)
|
|
{
|
|
if (possible_keys.is_set(j))
|
|
if (!(list.append_str(alloc, table->key_info[j].name.str)))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
|
|
table_map prefix_tables,
|
|
bool distinct_arg, JOIN_TAB *first_top_tab)
|
|
{
|
|
int quick_type= -1;
|
|
CHARSET_INFO *cs= system_charset_info;
|
|
THD *thd= join->thd;
|
|
TABLE_LIST *table_list= table->pos_in_table_list;
|
|
QUICK_SELECT_I *cur_quick= NULL;
|
|
my_bool key_read;
|
|
char table_name_buffer[SAFE_NAME_LEN];
|
|
KEY *key_info= 0;
|
|
uint key_len= 0, used_index= MAX_KEY;
|
|
|
|
#ifdef NOT_YET
|
|
/*
|
|
Would be good to keep this condition up to date.
|
|
Another alternative is to remove JOIN_TAB::cond_selectivity and use
|
|
TABLE::cond_selectivity everywhere
|
|
*/
|
|
DBUG_ASSERT(cond_selectivity == table->cond_selectivity);
|
|
#endif
|
|
|
|
explain_plan= eta;
|
|
eta->key.clear();
|
|
eta->quick_info= NULL;
|
|
eta->cost= join_read_time;
|
|
eta->loops= join_loops;
|
|
|
|
/*
|
|
We assume that if this table does pre-sorting, then it doesn't do filtering
|
|
with SQL_SELECT.
|
|
*/
|
|
DBUG_ASSERT(!(select && filesort));
|
|
const SQL_SELECT *tab_select= get_sql_select();
|
|
|
|
if (filesort)
|
|
{
|
|
if (!(eta->pre_join_sort=
|
|
new (thd->mem_root) Explain_aggr_filesort(thd->mem_root,
|
|
thd->lex->analyze_stmt,
|
|
filesort)))
|
|
return 1;
|
|
}
|
|
// psergey-todo: data for filtering!
|
|
tracker= &eta->tracker;
|
|
jbuf_tracker= &eta->jbuf_tracker;
|
|
jbuf_loops_tracker= &eta->jbuf_loops_tracker;
|
|
jbuf_unpack_tracker= &eta->jbuf_unpack_tracker;
|
|
|
|
/* Enable the table access time tracker only for "ANALYZE stmt" */
|
|
if (unlikely(thd->lex->analyze_stmt ||
|
|
thd->variables.log_slow_verbosity &
|
|
LOG_SLOW_VERBOSITY_ENGINE))
|
|
{
|
|
table->file->set_time_tracker(&eta->op_tracker);
|
|
|
|
/*
|
|
Set handler_for_stats even if we are not running an ANALYZE command.
|
|
There's no harm, and in case somebody runs a SHOW ANALYZE command we'll
|
|
be able to print the engine statistics.
|
|
*/
|
|
if (table->file->handler_stats &&
|
|
table->s->tmp_table != INTERNAL_TMP_TABLE)
|
|
eta->handler_for_stats= table->file;
|
|
|
|
if (likely(thd->lex->analyze_stmt))
|
|
{
|
|
eta->op_tracker.set_gap_tracker(&eta->extra_time_tracker);
|
|
eta->jbuf_unpack_tracker.set_gap_tracker(&eta->jbuf_extra_time_tracker);
|
|
}
|
|
}
|
|
/* No need to save id and select_type here, they are kept in Explain_select */
|
|
|
|
/* table */
|
|
if (table->derived_select_number)
|
|
{
|
|
/* Derived table name generation */
|
|
size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1,
|
|
"<derived%u>",
|
|
table->derived_select_number);
|
|
eta->table_name.copy(table_name_buffer, len, cs);
|
|
}
|
|
else if (bush_children)
|
|
{
|
|
JOIN_TAB *ctab= bush_children->start;
|
|
/* table */
|
|
size_t len= my_snprintf(table_name_buffer,
|
|
sizeof(table_name_buffer)-1,
|
|
"<subquery%d>",
|
|
ctab->emb_sj_nest->sj_subq_pred->get_identifier());
|
|
eta->table_name.copy(table_name_buffer, len, cs);
|
|
}
|
|
else
|
|
{
|
|
TABLE_LIST *real_table= table->pos_in_table_list;
|
|
/*
|
|
When multi-table UPDATE/DELETE does updates/deletes to a VIEW, the view
|
|
is merged in a certain particular way (grep for DT_MERGE_FOR_INSERT).
|
|
|
|
As a result, view's underlying tables have $tbl->pos_in_table_list={view}.
|
|
We don't want to print view name in EXPLAIN, we want underlying table's
|
|
alias (like specified in the view definition).
|
|
*/
|
|
if (real_table->merged_for_insert)
|
|
{
|
|
TABLE_LIST *view_child=
|
|
real_table->view->first_select_lex()->table_list.first;
|
|
for (;view_child; view_child= view_child->next_local)
|
|
{
|
|
if (view_child->table == table)
|
|
{
|
|
real_table= view_child;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
eta->table_name.copy(real_table->alias.str, real_table->alias.length, cs);
|
|
}
|
|
|
|
/* "partitions" column */
|
|
{
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
partition_info *part_info;
|
|
if (!table->derived_select_number &&
|
|
(part_info= table->part_info))
|
|
{ //TODO: all thd->mem_root here should be fixed
|
|
make_used_partitions_str(thd->mem_root, part_info, &eta->used_partitions,
|
|
eta->used_partitions_list);
|
|
eta->used_partitions_set= true;
|
|
}
|
|
else
|
|
eta->used_partitions_set= false;
|
|
#else
|
|
/* just produce empty column if partitioning is not compiled in */
|
|
eta->used_partitions_set= false;
|
|
#endif
|
|
}
|
|
|
|
/* "type" column */
|
|
enum join_type tab_type= type;
|
|
if ((type == JT_ALL || type == JT_RANGE || type == JT_HASH) &&
|
|
tab_select && tab_select->quick && use_quick != 2)
|
|
{
|
|
cur_quick= tab_select->quick;
|
|
quick_type= cur_quick->get_type();
|
|
if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
|
|
(quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
|
|
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
|
|
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
|
|
tab_type= type == JT_HASH ? JT_HASH_INDEX_MERGE : JT_INDEX_MERGE;
|
|
else
|
|
tab_type= type == JT_HASH ? JT_HASH_RANGE : JT_RANGE;
|
|
}
|
|
eta->type= tab_type;
|
|
|
|
/* Build "possible_keys" value */
|
|
// psergey-todo: why does this use thd MEM_ROOT??? Doesn't this
|
|
// break ANALYZE ? thd->mem_root will be freed, and after that we will
|
|
// attempt to print the query plan?
|
|
if (append_possible_keys(thd->mem_root, eta->possible_keys, table, keys))
|
|
return 1;
|
|
// psergey-todo: ^ check for error return code
|
|
|
|
/* Build "key", "key_len", and "ref" */
|
|
|
|
if (rowid_filter)
|
|
{
|
|
Range_rowid_filter *range_filter= (Range_rowid_filter *) rowid_filter;
|
|
QUICK_SELECT_I *quick= range_filter->get_select()->quick;
|
|
|
|
Explain_rowid_filter *erf= new (thd->mem_root) Explain_rowid_filter;
|
|
erf->quick= quick->get_explain(thd->mem_root);
|
|
erf->selectivity= range_rowid_filter_info->selectivity;
|
|
erf->rows= quick->records;
|
|
if (!(erf->tracker= new Rowid_filter_tracker(thd->lex->analyze_stmt)))
|
|
return 1;
|
|
rowid_filter->set_tracker(erf->tracker);
|
|
eta->rowid_filter= erf;
|
|
}
|
|
|
|
if (tab_type == JT_NEXT)
|
|
{
|
|
used_index= index;
|
|
key_info= table->key_info+index;
|
|
key_len= key_info->key_length;
|
|
}
|
|
else if (ref.key_parts)
|
|
{
|
|
used_index= ref.key;
|
|
key_info= get_keyinfo_by_key_no(ref.key);
|
|
key_len= ref.key_length;
|
|
}
|
|
|
|
/*
|
|
In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type
|
|
that still have quick selects.
|
|
*/
|
|
if (tab_select && tab_select->quick && tab_type != JT_CONST)
|
|
{
|
|
if (!(eta->quick_info= tab_select->quick->get_explain(thd->mem_root)))
|
|
return 1;
|
|
}
|
|
|
|
if (key_info) /* 'index' or 'ref' access */
|
|
{
|
|
eta->key.set(thd->mem_root, key_info, key_len);
|
|
|
|
if (ref.key_parts && tab_type != JT_FT)
|
|
{
|
|
store_key **key_ref= ref.key_copy;
|
|
for (uint kp= 0; kp < ref.key_parts; kp++)
|
|
{
|
|
if ((key_part_map(1) << kp) & ref.const_ref_part_map)
|
|
{
|
|
if (!(eta->ref_list.append_str(thd->mem_root, "const")))
|
|
return 1;
|
|
/*
|
|
create_ref_for_key() handles keypart=const equalities as follows:
|
|
- non-EXPLAIN execution will copy the "const" to lookup tuple
|
|
immediately and will not add an element to ref.key_copy
|
|
- EXPLAIN will put an element into ref.key_copy. Since we've
|
|
just printed "const" for it, we should skip it here
|
|
*/
|
|
if (thd->lex->describe)
|
|
key_ref++;
|
|
}
|
|
else
|
|
{
|
|
if (!(eta->ref_list.append_str(thd->mem_root, (*key_ref)->name())))
|
|
return 1;
|
|
key_ref++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (tab_type == JT_HASH_NEXT) /* full index scan + hash join */
|
|
{
|
|
used_index= index;
|
|
eta->hash_next_key.set(thd->mem_root,
|
|
& table->key_info[index],
|
|
table->key_info[index].key_length);
|
|
// psergey-todo: ^ is the above correct? are we necessarily joining on all
|
|
// columns?
|
|
}
|
|
|
|
if (!key_info)
|
|
{
|
|
if (table_list && /* SJM bushes don't have table_list */
|
|
table_list->schema_table &&
|
|
table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
|
|
{
|
|
IS_table_read_plan *is_table_read_plan= table_list->is_table_read_plan;
|
|
StringBuffer<64> key_name_buf;
|
|
if (is_table_read_plan->trivial_show_command ||
|
|
is_table_read_plan->has_db_lookup_value())
|
|
{
|
|
/* The "key" has the name of the column referring to the database */
|
|
int f_idx= table_list->schema_table->idx_field1;
|
|
LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name();
|
|
key_name_buf.append(tmp, cs);
|
|
}
|
|
if (is_table_read_plan->trivial_show_command ||
|
|
is_table_read_plan->has_table_lookup_value())
|
|
{
|
|
if (is_table_read_plan->trivial_show_command ||
|
|
is_table_read_plan->has_db_lookup_value())
|
|
key_name_buf.append(',');
|
|
|
|
int f_idx= table_list->schema_table->idx_field2;
|
|
LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name();
|
|
key_name_buf.append(tmp, cs);
|
|
}
|
|
|
|
if (key_name_buf.length())
|
|
eta->key.set_pseudo_key(thd->mem_root, key_name_buf.c_ptr_safe());
|
|
}
|
|
}
|
|
|
|
/* "rows" */
|
|
if (table_list /* SJM bushes don't have table_list */ &&
|
|
table_list->schema_table)
|
|
{
|
|
/* I_S tables have rows=extra=NULL */
|
|
eta->rows_set= false;
|
|
eta->filtered_set= false;
|
|
}
|
|
else
|
|
{
|
|
double examined_rows= get_examined_rows();
|
|
|
|
eta->rows_set= true;
|
|
eta->rows= double_to_rows(examined_rows);
|
|
|
|
/* "filtered" */
|
|
float f= 0.0;
|
|
if (examined_rows)
|
|
{
|
|
f= (float) (100.0 * records_out / examined_rows);
|
|
set_if_smaller(f, 100.0);
|
|
}
|
|
eta->filtered_set= true;
|
|
eta->filtered= f;
|
|
}
|
|
|
|
/* Build "Extra" field and save it */
|
|
key_read= table->file->keyread_enabled();
|
|
if ((tab_type == JT_NEXT || tab_type == JT_CONST) && used_index != MAX_KEY &&
|
|
table->covering_keys.is_set(used_index))
|
|
key_read=1;
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&
|
|
!((QUICK_ROR_INTERSECT_SELECT*)cur_quick)->need_to_fetch_row)
|
|
key_read=1;
|
|
|
|
if (table_list->table_function)
|
|
eta->push_extra(ET_TABLE_FUNCTION);
|
|
|
|
if (info)
|
|
{
|
|
eta->push_extra(info);
|
|
}
|
|
else if (packed_info & TAB_INFO_HAVE_VALUE)
|
|
{
|
|
if (packed_info & TAB_INFO_USING_INDEX)
|
|
eta->push_extra(ET_USING_INDEX);
|
|
if (packed_info & TAB_INFO_USING_WHERE)
|
|
eta->push_extra(ET_USING_WHERE);
|
|
if (packed_info & TAB_INFO_FULL_SCAN_ON_NULL)
|
|
eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY);
|
|
}
|
|
else
|
|
{
|
|
uint keyno= MAX_KEY;
|
|
if (ref.key_parts)
|
|
keyno= ref.key;
|
|
else if (tab_select && cur_quick)
|
|
keyno = cur_quick->index;
|
|
|
|
if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
|
|
table->file->pushed_idx_cond)
|
|
{
|
|
eta->push_extra(ET_USING_INDEX_CONDITION);
|
|
eta->pushed_index_cond= table->file->pushed_idx_cond;
|
|
}
|
|
else if (cache_idx_cond)
|
|
{
|
|
eta->push_extra(ET_USING_INDEX_CONDITION_BKA);
|
|
eta->pushed_index_cond= cache_idx_cond;
|
|
}
|
|
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
|
|
{
|
|
eta->push_extra(ET_USING);
|
|
}
|
|
if (tab_select)
|
|
{
|
|
if (use_quick == 2)
|
|
{
|
|
eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD);
|
|
eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer;
|
|
if (eta->range_checked_fer)
|
|
eta->range_checked_fer->
|
|
append_possible_keys_stat(thd->mem_root, table, keys);
|
|
}
|
|
else if (tab_select->cond ||
|
|
(cache_select && cache_select->cond))
|
|
{
|
|
const COND *pushed_cond= table->file->pushed_cond;
|
|
|
|
if ((table->file->ha_table_flags() &
|
|
HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
|
|
pushed_cond)
|
|
{
|
|
eta->push_extra(ET_USING_WHERE_WITH_PUSHED_CONDITION);
|
|
}
|
|
else
|
|
{
|
|
eta->where_cond= tab_select->cond;
|
|
eta->cache_cond= cache_select? cache_select->cond : NULL;
|
|
eta->push_extra(ET_USING_WHERE);
|
|
}
|
|
}
|
|
}
|
|
if (table_list /* SJM bushes don't have table_list */ &&
|
|
table_list->schema_table &&
|
|
table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
|
|
{
|
|
if (!table_list->table_open_method)
|
|
eta->push_extra(ET_SKIP_OPEN_TABLE);
|
|
else if (table_list->table_open_method == OPEN_FRM_ONLY)
|
|
eta->push_extra(ET_OPEN_FRM_ONLY);
|
|
else
|
|
eta->push_extra(ET_OPEN_FULL_TABLE);
|
|
/* psergey-note: the following has a bug.*/
|
|
if (table_list->is_table_read_plan->trivial_show_command ||
|
|
(table_list->is_table_read_plan->has_db_lookup_value() &&
|
|
table_list->is_table_read_plan->has_table_lookup_value()))
|
|
eta->push_extra(ET_SCANNED_0_DATABASES);
|
|
else if (table_list->is_table_read_plan->has_db_lookup_value() ||
|
|
table_list->is_table_read_plan->has_table_lookup_value())
|
|
eta->push_extra(ET_SCANNED_1_DATABASE);
|
|
else
|
|
eta->push_extra(ET_SCANNED_ALL_DATABASES);
|
|
}
|
|
if (key_read)
|
|
{
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
|
|
{
|
|
QUICK_GROUP_MIN_MAX_SELECT *qgs=
|
|
(QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick;
|
|
eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY);
|
|
eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning();
|
|
}
|
|
else
|
|
eta->push_extra(ET_USING_INDEX);
|
|
}
|
|
if (table->reginfo.not_exists_optimize)
|
|
eta->push_extra(ET_NOT_EXISTS);
|
|
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
|
|
{
|
|
explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick),
|
|
&eta->mrr_type);
|
|
if (eta->mrr_type.length() > 0)
|
|
eta->push_extra(ET_USING_MRR);
|
|
}
|
|
|
|
if (shortcut_for_distinct)
|
|
eta->push_extra(ET_DISTINCT);
|
|
|
|
if (loosescan_match_tab)
|
|
{
|
|
eta->push_extra(ET_LOOSESCAN);
|
|
}
|
|
|
|
if (first_weedout_table)
|
|
{
|
|
eta->start_dups_weedout= true;
|
|
eta->push_extra(ET_START_TEMPORARY);
|
|
}
|
|
if (check_weed_out_table)
|
|
{
|
|
eta->push_extra(ET_END_TEMPORARY);
|
|
eta->end_dups_weedout= true;
|
|
}
|
|
|
|
else if (do_firstmatch)
|
|
{
|
|
if (do_firstmatch == /*join->join_tab*/ first_top_tab - 1)
|
|
eta->push_extra(ET_FIRST_MATCH);
|
|
else
|
|
{
|
|
eta->push_extra(ET_FIRST_MATCH);
|
|
TABLE *prev_table=do_firstmatch->table;
|
|
if (prev_table->derived_select_number)
|
|
{
|
|
char namebuf[NAME_LEN];
|
|
/* Derived table name generation */
|
|
size_t len= my_snprintf(namebuf, sizeof(namebuf)-1,
|
|
"<derived%u>",
|
|
prev_table->derived_select_number);
|
|
eta->firstmatch_table_name.append(namebuf, len);
|
|
}
|
|
else
|
|
eta->firstmatch_table_name.append(&prev_table->pos_in_table_list->alias);
|
|
}
|
|
}
|
|
|
|
for (uint part= 0; part < ref.key_parts; part++)
|
|
{
|
|
if (ref.cond_guards[part])
|
|
{
|
|
eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY);
|
|
eta->full_scan_on_null_key= true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (cache)
|
|
{
|
|
eta->push_extra(ET_USING_JOIN_BUFFER);
|
|
if (cache->save_explain_data(&eta->bka_type))
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
In case this is a derived table, here we remember the number of
|
|
subselect that used to produce it.
|
|
*/
|
|
if (!(table_list && table_list->is_with_table_recursive_reference()))
|
|
eta->derived_select_number= table->derived_select_number;
|
|
|
|
/* The same for non-merged semi-joins */
|
|
eta->non_merged_sjm_number = get_non_merged_semijoin_select();
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Walk through join->aggr_tables and save aggregation/grouping query plan into
|
|
an Explain_select object
|
|
|
|
@retval
|
|
0 ok
|
|
1 error
|
|
*/
|
|
|
|
bool save_agg_explain_data(JOIN *join, Explain_select *xpl_sel)
|
|
{
|
|
JOIN_TAB *join_tab=join->join_tab + join->exec_join_tab_cnt();
|
|
Explain_aggr_node *prev_node;
|
|
Explain_aggr_node *node= xpl_sel->aggr_tree;
|
|
bool is_analyze= join->thd->lex->analyze_stmt;
|
|
THD *thd= join->thd;
|
|
|
|
for (uint i= 0; i < join->aggr_tables; i++, join_tab++)
|
|
{
|
|
// Each aggregate means a temp.table
|
|
prev_node= node;
|
|
if (!(node= new (thd->mem_root) Explain_aggr_tmp_table))
|
|
return 1;
|
|
node->child= prev_node;
|
|
|
|
if (join_tab->window_funcs_step)
|
|
{
|
|
Explain_aggr_node *new_node=
|
|
join_tab->window_funcs_step->save_explain_plan(thd->mem_root,
|
|
is_analyze);
|
|
if (!new_node)
|
|
return 1;
|
|
|
|
prev_node=node;
|
|
node= new_node;
|
|
node->child= prev_node;
|
|
}
|
|
|
|
/* The below matches execution in join_init_read_record() */
|
|
if (join_tab->distinct)
|
|
{
|
|
prev_node= node;
|
|
if (!(node= new (thd->mem_root) Explain_aggr_remove_dups))
|
|
return 1;
|
|
node->child= prev_node;
|
|
}
|
|
|
|
if (join_tab->filesort)
|
|
{
|
|
Explain_aggr_filesort *eaf =
|
|
new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, is_analyze, join_tab->filesort);
|
|
if (!eaf)
|
|
return 1;
|
|
prev_node= node;
|
|
node= eaf;
|
|
node->child= prev_node;
|
|
}
|
|
}
|
|
xpl_sel->aggr_tree= node;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Save Query Plan Footprint
|
|
|
|
@note
|
|
Currently, this function may be called multiple times
|
|
|
|
@retval
|
|
0 ok
|
|
1 error
|
|
*/
|
|
|
|
int JOIN::save_explain_data_intern(Explain_query *output,
|
|
bool need_tmp_table_arg,
|
|
bool need_order_arg, bool distinct_arg,
|
|
const char *message)
|
|
{
|
|
JOIN *join= this; /* Legacy: this code used to be a non-member function */
|
|
DBUG_ENTER("JOIN::save_explain_data_intern");
|
|
DBUG_PRINT("info", ("Select %p (%u), type %s, message %s",
|
|
join->select_lex, join->select_lex->select_number,
|
|
join->select_lex->type,
|
|
message ? message : "NULL"));
|
|
DBUG_ASSERT(have_query_plan == QEP_AVAILABLE);
|
|
/* fake_select_lex is created/printed by Explain_union */
|
|
DBUG_ASSERT(join->select_lex != join->unit->fake_select_lex);
|
|
|
|
/* There should be no attempts to save query plans for merged selects */
|
|
DBUG_ASSERT(!join->select_lex->master_unit()->derived ||
|
|
join->select_lex->master_unit()->derived->is_materialized_derived() ||
|
|
join->select_lex->master_unit()->derived->is_with_table());
|
|
|
|
/* Don't log this into the slow query log */
|
|
|
|
if (message)
|
|
{
|
|
if (!(explain= new (output->mem_root)
|
|
Explain_select(output->mem_root,
|
|
thd->lex->analyze_stmt)))
|
|
DBUG_RETURN(1);
|
|
#ifndef DBUG_OFF
|
|
explain->select_lex= select_lex;
|
|
#endif
|
|
join->select_lex->set_explain_type(true);
|
|
|
|
explain->select_id= join->select_lex->select_number;
|
|
explain->select_type= join->select_lex->type;
|
|
explain->linkage= select_lex->get_linkage();
|
|
explain->using_temporary= need_tmp;
|
|
explain->using_filesort= need_order_arg;
|
|
/* Setting explain->message means that all other members are invalid */
|
|
explain->message= message;
|
|
|
|
if (select_lex->master_unit()->derived)
|
|
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
|
|
if (save_agg_explain_data(this, explain))
|
|
DBUG_RETURN(1);
|
|
|
|
output->add_node(explain);
|
|
}
|
|
else if (pushdown_query)
|
|
{
|
|
if (!(explain= new (output->mem_root)
|
|
Explain_select(output->mem_root,
|
|
thd->lex->analyze_stmt)))
|
|
DBUG_RETURN(1);
|
|
select_lex->set_explain_type(true);
|
|
|
|
explain->select_id= select_lex->select_number;
|
|
explain->select_type= select_lex->type;
|
|
explain->linkage= select_lex->get_linkage();
|
|
explain->using_temporary= need_tmp;
|
|
explain->using_filesort= need_order_arg;
|
|
explain->message= "Storage engine handles GROUP BY";
|
|
|
|
if (select_lex->master_unit()->derived)
|
|
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
|
|
output->add_node(explain);
|
|
}
|
|
else
|
|
{
|
|
Explain_select *xpl_sel;
|
|
explain= xpl_sel=
|
|
new (output->mem_root) Explain_select(output->mem_root,
|
|
thd->lex->analyze_stmt);
|
|
if (!explain)
|
|
DBUG_RETURN(1);
|
|
|
|
table_map used_tables=0;
|
|
|
|
join->select_lex->set_explain_type(true);
|
|
xpl_sel->cost= best_read;
|
|
xpl_sel->select_id= join->select_lex->select_number;
|
|
xpl_sel->select_type= join->select_lex->type;
|
|
xpl_sel->linkage= select_lex->get_linkage();
|
|
xpl_sel->is_lateral= ((select_lex->get_linkage() == DERIVED_TABLE_TYPE) &&
|
|
(select_lex->uncacheable & UNCACHEABLE_DEPENDENT));
|
|
if (select_lex->master_unit()->derived)
|
|
xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
|
|
|
|
if (save_agg_explain_data(this, xpl_sel))
|
|
DBUG_RETURN(1);
|
|
|
|
xpl_sel->exec_const_cond= exec_const_cond;
|
|
xpl_sel->outer_ref_cond= outer_ref_cond;
|
|
xpl_sel->pseudo_bits_cond= pseudo_bits_cond;
|
|
if (tmp_having)
|
|
xpl_sel->having= tmp_having;
|
|
else
|
|
xpl_sel->having= having;
|
|
xpl_sel->having_value= having_value;
|
|
|
|
JOIN_TAB* const first_top_tab= join->first_breadth_first_tab();
|
|
JOIN_TAB* prev_bush_root_tab= NULL;
|
|
|
|
Explain_basic_join *cur_parent= xpl_sel;
|
|
|
|
for (JOIN_TAB *tab= first_explain_order_tab(join); tab;
|
|
tab= next_explain_order_tab(join, tab))
|
|
{
|
|
JOIN_TAB *saved_join_tab= NULL;
|
|
TABLE *cur_table= tab->table;
|
|
|
|
/* Don't show eliminated tables */
|
|
if (cur_table->map & join->eliminated_tables)
|
|
{
|
|
used_tables|= cur_table->map;
|
|
continue;
|
|
}
|
|
|
|
Explain_table_access *eta= (new (output->mem_root)
|
|
Explain_table_access(output->mem_root,
|
|
thd->lex->analyze_stmt));
|
|
|
|
if (!eta)
|
|
DBUG_RETURN(1);
|
|
if (tab->bush_root_tab != prev_bush_root_tab)
|
|
{
|
|
if (tab->bush_root_tab)
|
|
{
|
|
/*
|
|
We've entered an SJ-Materialization nest. Create an object for it.
|
|
*/
|
|
if (!(cur_parent=
|
|
new (output->mem_root) Explain_basic_join(output->mem_root)))
|
|
DBUG_RETURN(1);
|
|
|
|
JOIN_TAB *first_child= tab->bush_root_tab->bush_children->start;
|
|
cur_parent->select_id=
|
|
first_child->emb_sj_nest->sj_subq_pred->get_identifier();
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We've just left an SJ-Materialization nest. We are at the join tab
|
|
that 'embeds the nest'
|
|
*/
|
|
DBUG_ASSERT(tab->bush_children);
|
|
eta->sjm_nest= cur_parent;
|
|
cur_parent= xpl_sel;
|
|
}
|
|
}
|
|
prev_bush_root_tab= tab->bush_root_tab;
|
|
|
|
cur_parent->add_table(eta, output);
|
|
if (tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab))
|
|
DBUG_RETURN(1);
|
|
|
|
if (saved_join_tab)
|
|
tab= saved_join_tab;
|
|
|
|
// For next iteration
|
|
used_tables|= cur_table->map;
|
|
}
|
|
output->add_node(xpl_sel);
|
|
}
|
|
|
|
/*
|
|
Don't try to add query plans for child selects if this select was pushed
|
|
down into a Smart Storage Engine:
|
|
- the entire statement was pushed down ("PUSHED SELECT"), or
|
|
- this derived table was pushed down ("PUSHED DERIVED")
|
|
*/
|
|
if (!select_lex->pushdown_select && select_lex->type != pushed_derived_text)
|
|
for (SELECT_LEX_UNIT *tmp_unit= join->select_lex->first_inner_unit();
|
|
tmp_unit;
|
|
tmp_unit= tmp_unit->next_unit())
|
|
if (tmp_unit->explainable())
|
|
explain->add_child(tmp_unit->first_select()->select_number);
|
|
|
|
if (select_lex->is_top_level_node())
|
|
output->query_plan_ready();
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
This function serves as "shortcut point" for EXPLAIN queries.
|
|
|
|
The EXPLAIN statement executes just like its SELECT counterpart would
|
|
execute, except that JOIN::exec() will call select_describe() instead of
|
|
actually executing the query.
|
|
|
|
Inside select_describe():
|
|
- Query plan is updated with latest QEP choices made at the start of
|
|
JOIN::exec().
|
|
- the proces of "almost execution" is invoked for the children subqueries.
|
|
|
|
Overall, select_describe() is a legacy of old EXPLAIN implementation and
|
|
should be removed.
|
|
*/
|
|
|
|
static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
|
|
bool distinct,const char *message)
|
|
{
|
|
THD *thd=join->thd;
|
|
DBUG_ENTER("select_describe");
|
|
|
|
if (join->select_lex->pushdown_select)
|
|
{
|
|
/*
|
|
The whole statement was pushed down to a Smart Storage Engine. Do not
|
|
attempt to produce a query plan locally.
|
|
*/
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/* Update the QPF with latest values of using_temporary, using_filesort */
|
|
for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit();
|
|
unit;
|
|
unit= unit->next_unit())
|
|
{
|
|
/*
|
|
This fix_fields() call is to handle an edge case like this:
|
|
|
|
SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
|
|
|
|
for such queries, we'll get here before having called
|
|
subquery_expr->fix_fields(), which will cause failure to
|
|
*/
|
|
if (unit->item && !unit->item->fixed())
|
|
{
|
|
Item *ref= unit->item;
|
|
if (unit->item->fix_fields(thd, &ref))
|
|
DBUG_VOID_RETURN;
|
|
DBUG_ASSERT(ref == unit->item);
|
|
}
|
|
|
|
if (unit->explainable())
|
|
{
|
|
if (mysql_explain_union(thd, unit, unit->result))
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
|
|
{
|
|
DBUG_ENTER("mysql_explain_union");
|
|
bool res= 0;
|
|
SELECT_LEX *first= unit->first_select();
|
|
|
|
for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
|
|
{
|
|
sl->set_explain_type(FALSE);
|
|
sl->options|= SELECT_DESCRIBE;
|
|
}
|
|
|
|
if (unit->is_unit_op() || unit->fake_select_lex)
|
|
{
|
|
ulonglong save_options= 0;
|
|
|
|
if (unit->union_needs_tmp_table() && unit->fake_select_lex)
|
|
{
|
|
save_options= unit->fake_select_lex->options;
|
|
unit->fake_select_lex->select_number= FAKE_SELECT_LEX_ID; // just for initialization
|
|
unit->fake_select_lex->type= unit_operation_text[unit->common_op()];
|
|
unit->fake_select_lex->options|= SELECT_DESCRIBE;
|
|
}
|
|
if (!(res= unit->prepare(unit->derived, result,
|
|
SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
|
|
{
|
|
bool is_pushed_union=
|
|
(unit->derived && unit->derived->pushdown_derived) ||
|
|
unit->pushdown_unit;
|
|
if (unit->pushdown_unit)
|
|
{
|
|
create_explain_query_if_not_exists(thd->lex, thd->mem_root);
|
|
if (!unit->executed)
|
|
unit->save_union_explain(thd->lex->explain);
|
|
List<Item> items;
|
|
result->prepare(items, unit);
|
|
}
|
|
if (!is_pushed_union)
|
|
res= unit->exec();
|
|
}
|
|
|
|
if (unit->union_needs_tmp_table() && unit->fake_select_lex)
|
|
unit->fake_select_lex->options= save_options;
|
|
}
|
|
else
|
|
{
|
|
thd->lex->current_select= first;
|
|
unit->set_limit(unit->global_parameters());
|
|
res= mysql_select(thd, first->table_list.first, first->item_list,
|
|
first->where,
|
|
first->order_list.elements + first->group_list.elements,
|
|
first->order_list.first, first->group_list.first,
|
|
first->having, thd->lex->proc_list.first,
|
|
first->options | thd->variables.option_bits | SELECT_DESCRIBE,
|
|
result, unit, first);
|
|
}
|
|
|
|
DBUG_RETURN(res || thd->is_error());
|
|
}
|
|
|
|
|
|
static void print_table_array(THD *thd,
|
|
table_map eliminated_tables,
|
|
String *str, TABLE_LIST **table,
|
|
TABLE_LIST **end,
|
|
enum_query_type query_type)
|
|
{
|
|
(*table)->print(thd, eliminated_tables, str, query_type);
|
|
|
|
for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
|
|
{
|
|
TABLE_LIST *curr= *tbl;
|
|
|
|
/*
|
|
The "eliminated_tables &&" check guards againist the case of
|
|
printing the query for CREATE VIEW. We do that without having run
|
|
JOIN::optimize() and so will have nested_join->used_tables==0.
|
|
*/
|
|
if (eliminated_tables &&
|
|
((curr->table && (curr->table->map & eliminated_tables)) ||
|
|
(curr->nested_join && !(curr->nested_join->used_tables &
|
|
~eliminated_tables))))
|
|
{
|
|
/* as of 5.5, print_join doesnt put eliminated elements into array */
|
|
DBUG_ASSERT(0);
|
|
continue;
|
|
}
|
|
|
|
/* JOIN_TYPE_OUTER is just a marker unrelated to real join */
|
|
if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT))
|
|
{
|
|
/* MySQL converts right to left joins */
|
|
str->append(STRING_WITH_LEN(" left join "));
|
|
}
|
|
else if (curr->straight)
|
|
str->append(STRING_WITH_LEN(" straight_join "));
|
|
else if (curr->sj_inner_tables)
|
|
str->append(STRING_WITH_LEN(" semi join "));
|
|
else
|
|
str->append(STRING_WITH_LEN(" join "));
|
|
|
|
curr->print(thd, eliminated_tables, str, query_type);
|
|
if (curr->on_expr)
|
|
{
|
|
str->append(STRING_WITH_LEN(" on("));
|
|
curr->on_expr->print(str, query_type);
|
|
str->append(')');
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Check if the passed table is
|
|
- a base table which was eliminated, or
|
|
- a join nest which only contained eliminated tables (and so was eliminated,
|
|
too)
|
|
*/
|
|
|
|
bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl)
|
|
{
|
|
return eliminated_tables &&
|
|
((tbl->table && (tbl->table->map & eliminated_tables)) ||
|
|
(tbl->nested_join && !(tbl->nested_join->used_tables &
|
|
~eliminated_tables)));
|
|
}
|
|
|
|
/**
|
|
Print joins from the FROM clause.
|
|
|
|
@param thd thread handler
|
|
@param str string where table should be printed
|
|
@param tables list of tables in join
|
|
@query_type type of the query is being generated
|
|
*/
|
|
|
|
static void print_join(THD *thd,
|
|
table_map eliminated_tables,
|
|
String *str,
|
|
List<TABLE_LIST> *tables,
|
|
enum_query_type query_type)
|
|
{
|
|
/* List is reversed => we should reverse it before using */
|
|
List_iterator_fast<TABLE_LIST> ti(*tables);
|
|
TABLE_LIST **table;
|
|
DBUG_ENTER("print_join");
|
|
|
|
/*
|
|
If the QT_NO_DATA_EXPANSION flag is specified, we print the
|
|
original table list, including constant tables that have been
|
|
optimized away, as the constant tables may be referenced in the
|
|
expression printed by Item_field::print() when this flag is given.
|
|
Otherwise, only non-const tables are printed.
|
|
|
|
Example:
|
|
|
|
Original SQL:
|
|
select * from (select 1) t
|
|
|
|
Printed without QT_NO_DATA_EXPANSION:
|
|
select '1' AS `1` from dual
|
|
|
|
Printed with QT_NO_DATA_EXPANSION:
|
|
select `t`.`1` from (select 1 AS `1`) `t`
|
|
*/
|
|
const bool print_const_tables= (query_type & QT_NO_DATA_EXPANSION);
|
|
size_t tables_to_print= 0;
|
|
|
|
for (TABLE_LIST *t= ti++; t ; t= ti++)
|
|
{
|
|
/* See comment in print_table_array() about the second condition */
|
|
if (print_const_tables || !t->optimized_away)
|
|
if (!is_eliminated_table(eliminated_tables, t))
|
|
tables_to_print++;
|
|
}
|
|
if (tables_to_print == 0)
|
|
{
|
|
str->append(STRING_WITH_LEN("dual"));
|
|
DBUG_VOID_RETURN; // all tables were optimized away
|
|
}
|
|
ti.rewind();
|
|
|
|
if (!(table= static_cast<TABLE_LIST **>(thd->alloc(sizeof(TABLE_LIST*) *
|
|
tables_to_print))))
|
|
DBUG_VOID_RETURN; // out of memory
|
|
|
|
TABLE_LIST *tmp, **t= table + (tables_to_print - 1);
|
|
while ((tmp= ti++))
|
|
{
|
|
if (tmp->optimized_away && !print_const_tables)
|
|
continue;
|
|
if (is_eliminated_table(eliminated_tables, tmp))
|
|
continue;
|
|
*t--= tmp;
|
|
}
|
|
|
|
DBUG_ASSERT(tables->elements >= 1);
|
|
/*
|
|
Assert that the first table in the list isn't eliminated. This comes from
|
|
the fact that the first table can't be inner table of an outer join.
|
|
*/
|
|
DBUG_ASSERT(!eliminated_tables ||
|
|
!(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
|
|
((*table)->nested_join && !((*table)->nested_join->used_tables &
|
|
~eliminated_tables))));
|
|
/*
|
|
If the first table is a semi-join nest, swap it with something that is
|
|
not a semi-join nest.
|
|
*/
|
|
if ((*table)->sj_inner_tables)
|
|
{
|
|
TABLE_LIST **end= table + tables_to_print;
|
|
for (TABLE_LIST **t2= table; t2!=end; t2++)
|
|
{
|
|
if (!(*t2)->sj_inner_tables)
|
|
{
|
|
tmp= *t2;
|
|
*t2= *table;
|
|
*table= tmp;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
print_table_array(thd, eliminated_tables, str, table,
|
|
table + tables_to_print, query_type);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
@brief Print an index hint
|
|
|
|
@details Prints out the USE|FORCE|IGNORE index hint.
|
|
|
|
@param thd the current thread
|
|
@param[out] str appends the index hint here
|
|
@param hint what the hint is (as string : "USE INDEX"|
|
|
"FORCE INDEX"|"IGNORE INDEX")
|
|
@param hint_length the length of the string in 'hint'
|
|
@param indexes a list of index names for the hint
|
|
*/
|
|
|
|
void
|
|
Index_hint::print(THD *thd, String *str)
|
|
{
|
|
switch (type)
|
|
{
|
|
case INDEX_HINT_IGNORE: str->append(STRING_WITH_LEN("IGNORE INDEX")); break;
|
|
case INDEX_HINT_USE: str->append(STRING_WITH_LEN("USE INDEX")); break;
|
|
case INDEX_HINT_FORCE: str->append(STRING_WITH_LEN("FORCE INDEX")); break;
|
|
}
|
|
str->append(STRING_WITH_LEN(" ("));
|
|
if (key_name.length)
|
|
{
|
|
if (thd && key_name.streq(primary_key_name))
|
|
str->append(primary_key_name);
|
|
else
|
|
append_identifier(thd, str, &key_name);
|
|
}
|
|
str->append(')');
|
|
}
|
|
|
|
|
|
/**
|
|
Print table as it should be in join list.
|
|
|
|
@param str string where table should be printed
|
|
*/
|
|
|
|
void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
if (nested_join)
|
|
{
|
|
str->append('(');
|
|
print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
|
|
str->append(')');
|
|
}
|
|
else if (jtbm_subselect)
|
|
{
|
|
if (jtbm_subselect->engine->engine_type() ==
|
|
subselect_engine::SINGLE_SELECT_ENGINE)
|
|
{
|
|
/*
|
|
We get here when conversion into materialization didn't finish (this
|
|
happens when
|
|
- The subquery is a degenerate case which produces 0 or 1 record
|
|
- subquery's optimization didn't finish because of @@max_join_size
|
|
limits
|
|
- ... maybe some other cases like this
|
|
*/
|
|
str->append(STRING_WITH_LEN(" <materialize> ("));
|
|
jtbm_subselect->engine->print(str, query_type);
|
|
str->append(')');
|
|
}
|
|
else
|
|
{
|
|
str->append(STRING_WITH_LEN(" <materialize> ("));
|
|
subselect_hash_sj_engine *hash_engine;
|
|
hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine;
|
|
hash_engine->materialize_engine->print(str, query_type);
|
|
str->append(')');
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Lex_ident_table cmp_name(empty_clex_str); // Name to compare with alias
|
|
if (view_name.str)
|
|
{
|
|
// A view
|
|
|
|
if (!(belong_to_view &&
|
|
belong_to_view->compact_view_format) &&
|
|
!(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES))
|
|
{
|
|
append_identifier(thd, str, &view_db);
|
|
str->append('.');
|
|
}
|
|
append_identifier(thd, str, &view_name);
|
|
cmp_name= view_name;
|
|
}
|
|
else if (derived)
|
|
{
|
|
if (!is_with_table())
|
|
{
|
|
// A derived table
|
|
str->append('(');
|
|
derived->print(str, query_type);
|
|
str->append(')');
|
|
cmp_name= Lex_ident_table(empty_clex_str); // Force printing of alias
|
|
}
|
|
else
|
|
{
|
|
append_identifier(thd, str, &table_name);
|
|
cmp_name= table_name;
|
|
}
|
|
}
|
|
else if (table_function)
|
|
{
|
|
/* A table function. */
|
|
(void) table_function->print(thd, this, str, query_type);
|
|
str->append(' ');
|
|
append_identifier(thd, str, &alias);
|
|
cmp_name= alias;
|
|
}
|
|
else
|
|
{
|
|
// A normal table
|
|
|
|
if (!(belong_to_view &&
|
|
belong_to_view->compact_view_format) &&
|
|
!(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES))
|
|
{
|
|
append_identifier(thd, str, &db);
|
|
str->append('.');
|
|
}
|
|
if (schema_table)
|
|
{
|
|
append_identifier(thd, str, &schema_table_name);
|
|
cmp_name= Lex_ident_table(schema_table_name);
|
|
}
|
|
else
|
|
{
|
|
append_identifier(thd, str, &table_name);
|
|
cmp_name= table_name;
|
|
}
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
if (partition_names && partition_names->elements)
|
|
{
|
|
int i, num_parts= partition_names->elements;
|
|
List_iterator<String> name_it(*(partition_names));
|
|
str->append(STRING_WITH_LEN(" PARTITION ("));
|
|
for (i= 1; i <= num_parts; i++)
|
|
{
|
|
String *name= name_it++;
|
|
append_identifier(thd, str, name->ptr(), name->length());
|
|
if (i != num_parts)
|
|
str->append(',');
|
|
}
|
|
str->append(')');
|
|
}
|
|
#endif /* WITH_PARTITION_STORAGE_ENGINE */
|
|
}
|
|
if (table && table->versioned())
|
|
vers_conditions.print(str, query_type);
|
|
|
|
if (!cmp_name.streq(alias))
|
|
{
|
|
str->append(' ');
|
|
append_identifier_opt_casedn(thd, str, alias,
|
|
lower_case_table_names == 1);
|
|
|
|
if (column_names && (column_names->elements > 0))
|
|
list_strlex_print(thd, str, column_names, true);
|
|
}
|
|
|
|
if (index_hints)
|
|
{
|
|
List_iterator<Index_hint> it(*index_hints);
|
|
Index_hint *hint;
|
|
|
|
while ((hint= it++))
|
|
{
|
|
str->append(' ');
|
|
hint->print(thd, str);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
enum explainable_cmd_type
|
|
{
|
|
SELECT_CMD, INSERT_CMD, REPLACE_CMD, UPDATE_CMD, DELETE_CMD, NO_CMD
|
|
};
|
|
|
|
static
|
|
const LEX_CSTRING explainable_cmd_name []=
|
|
{
|
|
{STRING_WITH_LEN("select ")},
|
|
{STRING_WITH_LEN("insert ")},
|
|
{STRING_WITH_LEN("replace ")},
|
|
{STRING_WITH_LEN("update ")},
|
|
{STRING_WITH_LEN("delete ")},
|
|
};
|
|
|
|
static
|
|
const LEX_CSTRING* get_explainable_cmd_name(enum explainable_cmd_type cmd)
|
|
{
|
|
return explainable_cmd_name + cmd;
|
|
}
|
|
|
|
static
|
|
enum explainable_cmd_type get_explainable_cmd_type(THD *thd)
|
|
{
|
|
switch (thd->lex->sql_command) {
|
|
case SQLCOM_SELECT:
|
|
return SELECT_CMD;
|
|
case SQLCOM_INSERT:
|
|
case SQLCOM_INSERT_SELECT:
|
|
return INSERT_CMD;
|
|
case SQLCOM_REPLACE:
|
|
case SQLCOM_REPLACE_SELECT:
|
|
return REPLACE_CMD;
|
|
case SQLCOM_UPDATE:
|
|
case SQLCOM_UPDATE_MULTI:
|
|
return UPDATE_CMD;
|
|
case SQLCOM_DELETE:
|
|
case SQLCOM_DELETE_MULTI:
|
|
return DELETE_CMD;
|
|
default:
|
|
return SELECT_CMD;
|
|
}
|
|
}
|
|
|
|
|
|
void TABLE_LIST::print_leaf_tables(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
if (merge_underlying_list)
|
|
{
|
|
for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
|
|
tbl->print_leaf_tables(thd, str, query_type);
|
|
}
|
|
else
|
|
print(thd, 0, str, query_type);
|
|
}
|
|
|
|
|
|
void st_select_lex::print_item_list(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
bool first= 1;
|
|
/*
|
|
outer_select() can not be used here because it is for name resolution
|
|
and will return NULL at any end of name resolution chain (view/derived)
|
|
*/
|
|
bool top_level= is_query_topmost(thd);
|
|
List_iterator_fast<Item> it(item_list);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
if (first)
|
|
first= 0;
|
|
else
|
|
str->append(',');
|
|
|
|
if ((is_subquery_function() && !item->is_explicit_name()) ||
|
|
!item->name.str)
|
|
{
|
|
/*
|
|
Do not print auto-generated aliases in subqueries. It has no purpose
|
|
in a view definition or other contexts where the query is printed.
|
|
*/
|
|
item->print(str, query_type);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Do not print illegal names (if it is not top level SELECT).
|
|
Top level view checked (and correct name are assigned),
|
|
other cases of top level SELECT are not important, because
|
|
it is not "table field".
|
|
*/
|
|
if (top_level ||
|
|
item->is_explicit_name() ||
|
|
!check_column_name(item->name.str))
|
|
item->print_item_w_name(str, query_type);
|
|
else
|
|
item->print(str, query_type);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void st_select_lex::print_set_clause(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
bool first= 1;
|
|
/*
|
|
outer_select() can not be used here because it is for name resolution
|
|
and will return NULL at any end of name resolution chain (view/derived)
|
|
*/
|
|
List_iterator_fast<Item> it(item_list);
|
|
List_iterator_fast<Item> vt(thd->lex->value_list);
|
|
Item *item;
|
|
Item *val;
|
|
while ((item= it++, val= vt++ ))
|
|
{
|
|
if (first)
|
|
{
|
|
str->append(STRING_WITH_LEN(" set "));
|
|
first= 0;
|
|
}
|
|
else
|
|
str->append(',');
|
|
|
|
item->print(str, (enum_query_type) (query_type | QT_NO_DATA_EXPANSION));
|
|
str->append(STRING_WITH_LEN(" = "));
|
|
val->print(str, query_type);
|
|
}
|
|
}
|
|
|
|
|
|
void st_select_lex::print_on_duplicate_key_clause(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
bool first= 1;
|
|
List_iterator_fast<Item> it(thd->lex->update_list);
|
|
List_iterator_fast<Item> vt(thd->lex->value_list);
|
|
Item *item;
|
|
Item *val;
|
|
while ((item= it++, val= vt++ ))
|
|
{
|
|
if (first)
|
|
{
|
|
str->append(STRING_WITH_LEN(" on duplicate key update "));
|
|
first= 0;
|
|
}
|
|
else
|
|
str->append(',');
|
|
|
|
item->print(str, query_type);
|
|
str->append(STRING_WITH_LEN(" = "));
|
|
val->print(str, query_type);
|
|
}
|
|
}
|
|
|
|
|
|
void st_select_lex::print_lock_type(String *str)
|
|
{
|
|
if (select_lock == select_lock_type::IN_SHARE_MODE)
|
|
str->append(STRING_WITH_LEN(" lock in share mode"));
|
|
else if (select_lock == select_lock_type::FOR_UPDATE)
|
|
str->append(STRING_WITH_LEN(" for update"));
|
|
if (unlikely(skip_locked))
|
|
str->append(STRING_WITH_LEN(" skip locked"));
|
|
}
|
|
|
|
|
|
void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
|
|
{
|
|
DBUG_ASSERT(thd);
|
|
|
|
if (tvc)
|
|
{
|
|
tvc->print(thd, str, query_type);
|
|
return;
|
|
}
|
|
|
|
if (is_tvc_wrapper && (query_type & QT_NO_WRAPPERS_FOR_TVC_IN_VIEW))
|
|
{
|
|
first_inner_unit()->first_select()->print(thd, str, query_type);
|
|
return;
|
|
}
|
|
|
|
bool top_level= is_query_topmost(thd);
|
|
enum explainable_cmd_type sel_type= SELECT_CMD;
|
|
if (top_level && !(query_type & QT_SELECT_ONLY))
|
|
sel_type= get_explainable_cmd_type(thd);
|
|
|
|
if (sel_type == INSERT_CMD || sel_type == REPLACE_CMD)
|
|
{
|
|
str->append(get_explainable_cmd_name(sel_type));
|
|
str->append(STRING_WITH_LEN("into "));
|
|
TABLE_LIST *tbl= thd->lex->query_tables;
|
|
while (tbl->merge_underlying_list)
|
|
tbl= tbl->merge_underlying_list;
|
|
tbl->print(thd, 0, str, query_type);
|
|
if (thd->lex->field_list.elements)
|
|
{
|
|
str->append ('(');
|
|
List_iterator_fast<Item> it(thd->lex->field_list);
|
|
Item *item;
|
|
bool first= true;
|
|
while ((item= it++))
|
|
{
|
|
if (first)
|
|
first= false;
|
|
else
|
|
str->append(',');
|
|
str->append(item->name);
|
|
}
|
|
str->append(')');
|
|
}
|
|
|
|
str->append(' ');
|
|
|
|
if (thd->lex->sql_command == SQLCOM_INSERT ||
|
|
thd->lex->sql_command == SQLCOM_REPLACE)
|
|
{
|
|
str->append(STRING_WITH_LEN("values "));
|
|
bool is_first_elem= true;
|
|
List_iterator_fast<List_item> li(thd->lex->many_values);
|
|
List_item *list;
|
|
|
|
while ((list= li++))
|
|
{
|
|
if (is_first_elem)
|
|
is_first_elem= false;
|
|
else
|
|
str->append(',');
|
|
|
|
print_list_item(str, list, query_type);
|
|
}
|
|
if (thd->lex->update_list.elements)
|
|
print_on_duplicate_key_clause(thd, str, query_type);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if ((query_type & QT_SHOW_SELECT_NUMBER) &&
|
|
thd->lex->all_selects_list &&
|
|
thd->lex->all_selects_list->link_next &&
|
|
select_number != FAKE_SELECT_LEX_ID)
|
|
{
|
|
str->append(STRING_WITH_LEN("/* select#"));
|
|
str->append_ulonglong(select_number);
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED2)
|
|
{
|
|
str->append('/');
|
|
str->append_ulonglong(nest_level);
|
|
|
|
if (master_unit()->fake_select_lex &&
|
|
master_unit()->first_select() == this)
|
|
{
|
|
str->append(STRING_WITH_LEN(" Filter Select: "));
|
|
master_unit()->fake_select_lex->print(thd, str, query_type);
|
|
}
|
|
}
|
|
str->append(STRING_WITH_LEN(" */ "));
|
|
}
|
|
|
|
if (sel_type == SELECT_CMD ||
|
|
sel_type == INSERT_CMD ||
|
|
sel_type == REPLACE_CMD)
|
|
str->append(STRING_WITH_LEN("select "));
|
|
|
|
if (join && join->cleaned)
|
|
{
|
|
/*
|
|
JOIN already cleaned up so it is dangerous to print items
|
|
because temporary tables they pointed on could be freed.
|
|
*/
|
|
str->append('#');
|
|
str->append(select_number);
|
|
return;
|
|
}
|
|
|
|
/* First add options */
|
|
if (options & SELECT_STRAIGHT_JOIN)
|
|
str->append(STRING_WITH_LEN("straight_join "));
|
|
if (options & SELECT_HIGH_PRIORITY)
|
|
str->append(STRING_WITH_LEN("high_priority "));
|
|
if (options & SELECT_DISTINCT)
|
|
str->append(STRING_WITH_LEN("distinct "));
|
|
if (options & SELECT_SMALL_RESULT)
|
|
str->append(STRING_WITH_LEN("sql_small_result "));
|
|
if (options & SELECT_BIG_RESULT)
|
|
str->append(STRING_WITH_LEN("sql_big_result "));
|
|
if (options & OPTION_BUFFER_RESULT)
|
|
str->append(STRING_WITH_LEN("sql_buffer_result "));
|
|
if (options & OPTION_FOUND_ROWS)
|
|
str->append(STRING_WITH_LEN("sql_calc_found_rows "));
|
|
if (this == parent_lex->first_select_lex())
|
|
{
|
|
switch (parent_lex->sql_cache)
|
|
{
|
|
case LEX::SQL_NO_CACHE:
|
|
str->append(STRING_WITH_LEN("sql_no_cache "));
|
|
break;
|
|
case LEX::SQL_CACHE:
|
|
str->append(STRING_WITH_LEN("sql_cache "));
|
|
break;
|
|
case LEX::SQL_CACHE_UNSPECIFIED:
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
//Item List
|
|
if (sel_type == SELECT_CMD ||
|
|
sel_type == INSERT_CMD ||
|
|
sel_type == REPLACE_CMD)
|
|
print_item_list(thd, str, query_type);
|
|
/*
|
|
from clause
|
|
TODO: support USING/FORCE/IGNORE index
|
|
*/
|
|
if (table_list.elements)
|
|
{
|
|
if (sel_type == SELECT_CMD ||
|
|
sel_type == INSERT_CMD ||
|
|
sel_type == REPLACE_CMD)
|
|
{
|
|
str->append(STRING_WITH_LEN(" from "));
|
|
/* go through join tree */
|
|
print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list,
|
|
query_type);
|
|
}
|
|
if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD)
|
|
str->append(get_explainable_cmd_name(sel_type));
|
|
if (sel_type == DELETE_CMD)
|
|
{
|
|
str->append(STRING_WITH_LEN(" from "));
|
|
bool first= true;
|
|
for (TABLE_LIST *target_tbl= thd->lex->auxiliary_table_list.first;
|
|
target_tbl;
|
|
target_tbl= target_tbl->next_local)
|
|
{
|
|
if (first)
|
|
first= false;
|
|
else
|
|
str->append(',');
|
|
target_tbl->correspondent_table->print_leaf_tables(thd, str,
|
|
query_type);
|
|
}
|
|
|
|
if (!first)
|
|
str->append(STRING_WITH_LEN(" using "));
|
|
}
|
|
if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD)
|
|
{
|
|
if (join)
|
|
print_join(thd, 0, str, &top_join_list, query_type);
|
|
else
|
|
{
|
|
bool first= true;
|
|
List_iterator_fast<TABLE_LIST> li(leaf_tables);
|
|
TABLE_LIST *tbl;
|
|
while ((tbl= li++))
|
|
{
|
|
if (first)
|
|
first= false;
|
|
else
|
|
str->append(',');
|
|
tbl->print(thd, 0, str, query_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (where)
|
|
{
|
|
/*
|
|
"SELECT 1 FROM DUAL WHERE 2" should not be printed as
|
|
"SELECT 1 WHERE 2": the 1st syntax is valid, but the 2nd is not.
|
|
*/
|
|
str->append(STRING_WITH_LEN(" from DUAL "));
|
|
}
|
|
|
|
if (sel_type == UPDATE_CMD)
|
|
print_set_clause(thd, str, query_type);
|
|
|
|
// Where
|
|
Item *cur_where= where;
|
|
if (join)
|
|
cur_where= join->conds;
|
|
else if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD)
|
|
cur_where= thd->lex->upd_del_where;
|
|
if (cur_where || cond_value != Item::COND_UNDEF)
|
|
{
|
|
str->append(STRING_WITH_LEN(" where "));
|
|
if (cur_where)
|
|
cur_where->print(str, query_type);
|
|
else
|
|
str->append(cond_value != Item::COND_FALSE ? '1' : '0');
|
|
}
|
|
|
|
// group by & olap
|
|
if (group_list.elements)
|
|
{
|
|
str->append(STRING_WITH_LEN(" group by "));
|
|
print_order(str, group_list.first, query_type);
|
|
switch (olap)
|
|
{
|
|
case CUBE_TYPE:
|
|
str->append(STRING_WITH_LEN(" with cube"));
|
|
break;
|
|
case ROLLUP_TYPE:
|
|
str->append(STRING_WITH_LEN(" with rollup"));
|
|
break;
|
|
default:
|
|
; //satisfy compiler
|
|
}
|
|
}
|
|
|
|
// having
|
|
Item *cur_having= having;
|
|
if (join)
|
|
cur_having= join->having;
|
|
|
|
if (cur_having || having_value != Item::COND_UNDEF)
|
|
{
|
|
str->append(STRING_WITH_LEN(" having "));
|
|
if (cur_having)
|
|
cur_having->print(str, query_type);
|
|
else
|
|
str->append(having_value != Item::COND_FALSE ? '1' : '0');
|
|
}
|
|
|
|
if (order_list.elements)
|
|
{
|
|
str->append(STRING_WITH_LEN(" order by "));
|
|
print_order(str, order_list.first, query_type);
|
|
}
|
|
|
|
// limit
|
|
print_limit(thd, str, query_type);
|
|
|
|
// lock type
|
|
if (braces) /* no braces processed in
|
|
SELECT_LEX_UNIT::print_lock_from_the_last_select */
|
|
print_lock_type(str);
|
|
|
|
if ((sel_type == INSERT_CMD || sel_type == REPLACE_CMD) &&
|
|
thd->lex->update_list.elements)
|
|
print_on_duplicate_key_clause(thd, str, query_type);
|
|
|
|
// returning clause
|
|
if (sel_type == DELETE_CMD && !item_list.elements)
|
|
{
|
|
print_item_list(thd, str, query_type);
|
|
}
|
|
// PROCEDURE unsupported here
|
|
}
|
|
|
|
|
|
/**
|
|
Change the select_result object of the JOIN.
|
|
|
|
If old_result is not used, forward the call to the current
|
|
select_result in case it is a wrapper around old_result.
|
|
|
|
Call prepare() and prepare2() on the new select_result if we decide
|
|
to use it.
|
|
|
|
@param new_result New select_result object
|
|
@param old_result Old select_result object (NULL to force change)
|
|
|
|
@retval false Success
|
|
@retval true Error
|
|
*/
|
|
|
|
bool JOIN::change_result(select_result *new_result, select_result *old_result)
|
|
{
|
|
DBUG_ENTER("JOIN::change_result");
|
|
if (old_result == NULL || result == old_result)
|
|
{
|
|
result= new_result;
|
|
if (result->prepare(fields_list, select_lex->master_unit()) ||
|
|
result->prepare2(this))
|
|
DBUG_RETURN(true); /* purecov: inspected */
|
|
DBUG_RETURN(false);
|
|
}
|
|
DBUG_RETURN(result->change_result(new_result));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Set allowed types of join caches that can be used for join operations
|
|
|
|
@details
|
|
The function sets a bitmap of allowed join buffers types in the field
|
|
allowed_join_cache_types of this JOIN structure:
|
|
bit 1 is set if tjoin buffers are allowed to be incremental
|
|
bit 2 is set if the join buffers are allowed to be hashed
|
|
but 3 is set if the join buffers are allowed to be used for BKA
|
|
join algorithms.
|
|
The allowed types are read from system variables.
|
|
Besides the function sets maximum allowed join cache level that is
|
|
also read from a system variable.
|
|
*/
|
|
|
|
void JOIN::set_allowed_join_cache_types()
|
|
{
|
|
allowed_join_cache_types= 0;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL))
|
|
allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED))
|
|
allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA))
|
|
allowed_join_cache_types|= JOIN_CACHE_BKA_BIT;
|
|
allowed_semijoin_with_cache=
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE);
|
|
allowed_outer_join_with_cache=
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE);
|
|
max_allowed_join_cache_level= thd->variables.join_cache_level;
|
|
}
|
|
|
|
|
|
/**
|
|
Save a query execution plan so that the caller can revert to it if needed,
|
|
and reset the current query plan so that it can be reoptimized.
|
|
|
|
@param save_to The object into which the current query plan state is saved
|
|
*/
|
|
|
|
void JOIN::save_query_plan(Join_plan_state *save_to)
|
|
{
|
|
DYNAMIC_ARRAY tmp_keyuse;
|
|
/* Swap the current and the backup keyuse internal arrays. */
|
|
tmp_keyuse= keyuse;
|
|
keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */
|
|
save_to->keyuse= tmp_keyuse;
|
|
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
save_to->join_tab_keyuse[i]= join_tab[i].keyuse;
|
|
join_tab[i].keyuse= NULL;
|
|
save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys;
|
|
join_tab[i].checked_keys.clear_all();
|
|
}
|
|
memcpy((uchar*) save_to->best_positions, (uchar*) best_positions,
|
|
sizeof(POSITION) * (table_count + 1));
|
|
memset((uchar*) best_positions, 0, sizeof(POSITION) * (table_count + 1));
|
|
|
|
/* Save SJM nests */
|
|
List_iterator<TABLE_LIST> it(select_lex->sj_nests);
|
|
TABLE_LIST *tlist;
|
|
SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info;
|
|
while ((tlist= it++))
|
|
{
|
|
*(p_info++)= tlist->sj_mat_info;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Reset a query execution plan so that it can be reoptimized in-place.
|
|
*/
|
|
void JOIN::reset_query_plan()
|
|
{
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
join_tab[i].keyuse= NULL;
|
|
join_tab[i].checked_keys.clear_all();
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Restore a query execution plan previously saved by the caller.
|
|
|
|
@param The object from which the current query plan state is restored.
|
|
*/
|
|
|
|
void JOIN::restore_query_plan(Join_plan_state *restore_from)
|
|
{
|
|
DYNAMIC_ARRAY tmp_keyuse;
|
|
tmp_keyuse= keyuse;
|
|
keyuse= restore_from->keyuse;
|
|
restore_from->keyuse= tmp_keyuse;
|
|
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
join_tab[i].keyuse= restore_from->join_tab_keyuse[i];
|
|
join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i];
|
|
}
|
|
|
|
memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions,
|
|
sizeof(POSITION) * (table_count + 1));
|
|
/* Restore SJM nests */
|
|
List_iterator<TABLE_LIST> it(select_lex->sj_nests);
|
|
TABLE_LIST *tlist;
|
|
SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info;
|
|
while ((tlist= it++))
|
|
{
|
|
tlist->sj_mat_info= *(p_info++);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Reoptimize a query plan taking into account an additional conjunct to the
|
|
WHERE clause.
|
|
|
|
@param added_where An extra conjunct to the WHERE clause to reoptimize with
|
|
@param join_tables The set of tables to reoptimize
|
|
@param save_to If != NULL, save here the state of the current query
|
|
plan, otherwise reuse the existing query plan structures.
|
|
|
|
@notes
|
|
Given a query plan that was already optimized taking into account some WHERE
|
|
clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'.
|
|
The reoptimization works as follows:
|
|
|
|
1. Call update_ref_and_keys *only* for the new conditions 'added_where'
|
|
that are about to be injected into the query.
|
|
2. Expand if necessary the original KEYUSE array JOIN::keyuse to
|
|
accommodate the new REF accesses computed for the 'added_where' condition.
|
|
3. Add the new KEYUSEs into JOIN::keyuse.
|
|
4. Re-sort and re-filter the JOIN::keyuse array with the newly added
|
|
KEYUSE elements.
|
|
|
|
@retval REOPT_NEW_PLAN there is a new plan.
|
|
@retval REOPT_OLD_PLAN no new improved plan was produced, use the old one.
|
|
@retval REOPT_ERROR an irrecovarable error occurred during
|
|
reoptimization.
|
|
*/
|
|
|
|
JOIN::enum_reopt_result
|
|
JOIN::reoptimize(Item *added_where, table_map join_tables,
|
|
Join_plan_state *save_to)
|
|
{
|
|
DYNAMIC_ARRAY added_keyuse;
|
|
SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
|
|
size_t org_keyuse_elements;
|
|
|
|
/* Re-run the REF optimizer to take into account the new conditions. */
|
|
if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count,
|
|
added_where, ~outer_join, select_lex, &sargables))
|
|
{
|
|
delete_dynamic(&added_keyuse);
|
|
return REOPT_ERROR;
|
|
}
|
|
|
|
if (!added_keyuse.elements)
|
|
{
|
|
delete_dynamic(&added_keyuse);
|
|
return REOPT_OLD_PLAN;
|
|
}
|
|
|
|
if (save_to)
|
|
save_query_plan(save_to);
|
|
else
|
|
reset_query_plan();
|
|
|
|
if (!keyuse.buffer &&
|
|
my_init_dynamic_array(thd->mem_root->psi_key, &keyuse, sizeof(KEYUSE),
|
|
20, 64, MYF(MY_THREAD_SPECIFIC)))
|
|
{
|
|
delete_dynamic(&added_keyuse);
|
|
return REOPT_ERROR;
|
|
}
|
|
|
|
org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements;
|
|
allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements);
|
|
|
|
/* If needed, add the access methods from the original query plan. */
|
|
if (save_to)
|
|
{
|
|
DBUG_ASSERT(!keyuse.elements);
|
|
keyuse.elements= save_to->keyuse.elements;
|
|
if (size_t e= keyuse.elements)
|
|
memcpy(keyuse.buffer,
|
|
save_to->keyuse.buffer, e * keyuse.size_of_element);
|
|
}
|
|
|
|
/* Add the new access methods to the keyuse array. */
|
|
memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
|
|
added_keyuse.buffer,
|
|
(size_t) added_keyuse.elements * added_keyuse.size_of_element);
|
|
keyuse.elements+= added_keyuse.elements;
|
|
/* added_keyuse contents is copied, and it is no longer needed. */
|
|
delete_dynamic(&added_keyuse);
|
|
|
|
if (sort_and_filter_keyuse(this, &keyuse, true))
|
|
return REOPT_ERROR;
|
|
optimize_keyuse(this, &keyuse);
|
|
|
|
if (optimize_semijoin_nests(this, join_tables))
|
|
return REOPT_ERROR;
|
|
|
|
/* Re-run the join optimizer to compute a new query plan. */
|
|
if (choose_plan(this, join_tables, 0))
|
|
return REOPT_ERROR;
|
|
|
|
return REOPT_NEW_PLAN;
|
|
}
|
|
|
|
|
|
/**
|
|
Cache constant expressions in WHERE, HAVING, ON conditions.
|
|
*/
|
|
|
|
void JOIN::cache_const_exprs()
|
|
{
|
|
uchar cache_flag= FALSE;
|
|
uchar *analyzer_arg= &cache_flag;
|
|
|
|
/* No need in cache if all tables are constant. */
|
|
if (const_tables == table_count)
|
|
return;
|
|
|
|
if (conds)
|
|
conds->top_level_compile(thd, &Item::cache_const_expr_analyzer, &analyzer_arg,
|
|
&Item::cache_const_expr_transformer, &cache_flag);
|
|
cache_flag= FALSE;
|
|
if (having)
|
|
having->top_level_compile(thd, &Item::cache_const_expr_analyzer,
|
|
&analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag);
|
|
|
|
for (JOIN_TAB *tab= first_depth_first_tab(this); tab;
|
|
tab= next_depth_first_tab(this, tab))
|
|
{
|
|
if (*tab->on_expr_ref)
|
|
{
|
|
cache_flag= FALSE;
|
|
(*tab->on_expr_ref)->top_level_compile(thd, &Item::cache_const_expr_analyzer,
|
|
&analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Get the cost of using index keynr to read #LIMIT matching rows by calling
|
|
ha_index_next() repeatedly (either with index scan, quick or 'ref')
|
|
|
|
@detail
|
|
- If there is a quick select, we try to use it.
|
|
- If there is no quick select return the full cost from
|
|
cost_for_index_read() (Doing a full scan with up to 'limit' records)
|
|
|
|
@param pos Result from best_access_path(). Is NULL for
|
|
single-table UPDATE/DELETE
|
|
@param table Table to be sorted
|
|
@param keynr Which index to use
|
|
@param rows_limit How many rows we want to read.
|
|
This may be different than what was in the original
|
|
LIMIT the caller has included fanouts and extra
|
|
rows needed for handling GROUP BY.
|
|
@param rows_to_scan Number of rows to scan if there is no range.
|
|
@param read_cost Full cost, including cost of WHERE.
|
|
@param read_rows Number of rows that needs to be read
|
|
|
|
@return
|
|
0 No possible range scan, cost is for index scan
|
|
1 Range scan should be used
|
|
|
|
For the moment we don't take selectivity of the WHERE clause into
|
|
account when calculating the number of rows we have to read
|
|
(except what we get from quick select).
|
|
|
|
The cost is calculated the following way:
|
|
(The selectivity is there to take into account the increased number of
|
|
rows that we have to read to find LIMIT matching rows)
|
|
*/
|
|
|
|
static bool get_range_limit_read_cost(const POSITION *pos,
|
|
const TABLE *table,
|
|
uint keynr,
|
|
ha_rows rows_limit_arg,
|
|
ha_rows rows_to_scan,
|
|
double *read_cost,
|
|
double *read_rows)
|
|
{
|
|
double rows_limit= rows2double(rows_limit_arg);
|
|
if (table->opt_range_keys.is_set(keynr))
|
|
{
|
|
/*
|
|
Start from quick select's rows and cost. These are always cheaper than
|
|
full index scan/cost.
|
|
*/
|
|
double best_rows, range_rows;
|
|
double range_cost= (double) table->opt_range[keynr].cost.fetch_cost();
|
|
best_rows= range_rows= (double) table->opt_range[keynr].rows;
|
|
|
|
if (pos)
|
|
{
|
|
double cond_selectivity;
|
|
/*
|
|
Take into count table selectivity as the number of accepted
|
|
rows for this table will be 'records_out'.
|
|
|
|
For example:
|
|
key1 BETWEEN 10 AND 1000 AND key2 BETWEEN 10 AND 20
|
|
|
|
If we are trying to do an ORDER BY on key1, we have to take into
|
|
account that using key2 we have to examine much fewer rows.
|
|
*/
|
|
best_rows= pos->records_out; // Best rows with any key/keys
|
|
/*
|
|
We assign "double range_rows" from integer #rows a few lines above
|
|
so comparison with 0.0 makes sense
|
|
*/
|
|
if (range_rows > 0.0)
|
|
cond_selectivity= best_rows / range_rows;
|
|
else
|
|
cond_selectivity= 1.0;
|
|
|
|
DBUG_ASSERT(cond_selectivity <= 1.000000001);
|
|
set_if_smaller(cond_selectivity, 1.0);
|
|
|
|
/*
|
|
We have to examine more rows in the proportion to the selectivity of the
|
|
the table
|
|
*/
|
|
rows_limit= rows_limit / cond_selectivity;
|
|
}
|
|
|
|
if (best_rows > rows_limit)
|
|
{
|
|
/*
|
|
LIMIT clause specifies that we will need to read fewer records than
|
|
quick select will return. Assume that quick select's cost is
|
|
proportional to the number of records we need to return (e.g. if we
|
|
only need 1/3rd of records, it will cost us 1/3rd of quick select's
|
|
read time)
|
|
*/
|
|
range_cost*= rows_limit / best_rows;
|
|
range_rows= rows_limit;
|
|
}
|
|
*read_cost= range_cost + range_rows * WHERE_COST_THD(table->in_use);
|
|
*read_rows= range_rows;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
Calculate the number of rows we have to check if we are
|
|
doing a full index scan (as a suitable range scan was not available).
|
|
|
|
We assume that each of the tested indexes is not correlated
|
|
with ref_key. Thus, to select first N records we have to scan
|
|
N/selectivity(ref_key) index entries.
|
|
selectivity(ref_key) = #scanned_records/#table_records =
|
|
refkey_rows_estimate/table_records.
|
|
In any case we can't select more than #table_records.
|
|
N/(refkey_rows_estimate/table_records) > table_records
|
|
<=> N > refkey_rows_estimate.
|
|
*/
|
|
ALL_READ_COST cost= cost_for_index_read(table->in_use, table, keynr,
|
|
rows_to_scan, 0);
|
|
*read_cost= (table->file->cost(&cost) +
|
|
rows_to_scan * WHERE_COST_THD(table->in_use));
|
|
*read_rows= rows2double(rows_to_scan);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Find a cheaper access key than a given key
|
|
|
|
@param tab NULL or JOIN_TAB of the accessed table
|
|
@param order Linked list of ORDER BY arguments
|
|
@param table Table if tab == NULL or tab->table
|
|
@param usable_keys Key map to find a cheaper key in
|
|
@param ref_key
|
|
0 <= key < MAX_KEY - Key that is currently used for finding
|
|
row
|
|
MAX_KEY - means index_merge is used
|
|
-1 - means we're currently not using an
|
|
index to find rows.
|
|
|
|
@param select_limit LIMIT value
|
|
@param [out] new_key Key number if success, otherwise undefined
|
|
@param [out] new_key_direction Return -1 (reverse) or +1 if success,
|
|
otherwise undefined
|
|
@param [out] new_select_limit Estimate of the number of rows we have
|
|
to read find 'select_limit' rows.
|
|
@param [out] new_used_key_parts NULL by default, otherwise return number
|
|
of new_key prefix columns if success
|
|
or undefined if the function fails
|
|
@param [out] saved_best_key_parts NULL by default, otherwise preserve the
|
|
value for further use in QUICK_SELECT_DESC
|
|
|
|
@note
|
|
This function takes into account table->opt_range_condition_rows statistic
|
|
(that is calculated by the make_join_statistics function).
|
|
However, single table procedures such as Sql_cmd_update:update_single_table()
|
|
and Sql_cmd_delete::delete_single_table()
|
|
never call make_join_statistics, so they have to update it manually
|
|
(@see get_index_for_order()).
|
|
*/
|
|
|
|
static bool
|
|
test_if_cheaper_ordering(bool in_join_optimizer,
|
|
const JOIN_TAB *tab, ORDER *order, TABLE *table,
|
|
key_map usable_keys, int ref_key,
|
|
ha_rows select_limit_arg,
|
|
int *new_key, int *new_key_direction,
|
|
ha_rows *new_select_limit, double *new_read_time,
|
|
uint *new_used_key_parts,
|
|
uint *saved_best_key_parts)
|
|
{
|
|
DBUG_ENTER("test_if_cheaper_ordering");
|
|
/*
|
|
Check whether there is an index compatible with the given order
|
|
usage of which is cheaper than usage of the ref_key index (ref_key>=0)
|
|
or a table scan.
|
|
It may be the case if ORDER/GROUP BY is used with LIMIT.
|
|
*/
|
|
ha_rows best_select_limit= HA_POS_ERROR;
|
|
JOIN *join;
|
|
uint nr;
|
|
key_map keys;
|
|
int best_key_direction= 0;
|
|
double read_time, filesort_cost;
|
|
enum sort_type filesort_type;
|
|
int best_key= -1;
|
|
double fanout;
|
|
ha_rows table_records= table->stat_records();
|
|
bool group;
|
|
const bool has_limit= (select_limit_arg != HA_POS_ERROR);
|
|
THD *thd= table->in_use;
|
|
POSITION *position;
|
|
ha_rows rows_estimate, refkey_rows_estimate;
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_cheaper_ordering(
|
|
thd, "reconsidering_access_paths_for_index_ordering");
|
|
|
|
if (tab)
|
|
{
|
|
join= tab->join;
|
|
position= &join->best_positions[tab- join->join_tab];
|
|
group=join->group && order == join->group_list;
|
|
/* Take into account that records_out can be < 1.0 in case of GROUP BY */
|
|
rows_estimate= double_to_rows(position->records_out+0.5);
|
|
set_if_bigger(rows_estimate, 1);
|
|
refkey_rows_estimate= rows_estimate;
|
|
}
|
|
else
|
|
{
|
|
join= NULL;
|
|
position= 0;
|
|
refkey_rows_estimate= rows_estimate= table_records;
|
|
group= 0;
|
|
}
|
|
trace_cheaper_ordering.add("clause", group ? "GROUP BY" : "ORDER BY");
|
|
|
|
/*
|
|
If not used with LIMIT, only use keys if the whole query can be
|
|
resolved with a key; This is because filesort() is usually faster than
|
|
retrieving all rows through an index.
|
|
*/
|
|
if (select_limit_arg >= table_records)
|
|
{
|
|
keys= *table->file->keys_to_use_for_scanning();
|
|
keys.merge(table->covering_keys);
|
|
|
|
/*
|
|
We are adding here also the index specified in FORCE INDEX clause,
|
|
if any.
|
|
This is to allow users to use index in ORDER BY.
|
|
*/
|
|
if (table->force_index)
|
|
keys.merge(group ? table->keys_in_use_for_group_by :
|
|
table->keys_in_use_for_order_by);
|
|
keys.intersect(usable_keys);
|
|
}
|
|
else
|
|
keys= usable_keys;
|
|
|
|
|
|
if (join) // True if SELECT
|
|
{
|
|
uint nr= join->const_tables;
|
|
fanout= 1.0;
|
|
if (nr != join->table_count - 1) // If not last table
|
|
fanout= (join->join_record_count / position->records_out);
|
|
else
|
|
{
|
|
/* Only one table. Limit cannot be bigger than table_records */
|
|
set_if_smaller(select_limit_arg, table_records);
|
|
}
|
|
read_time= position->read_time;
|
|
}
|
|
else
|
|
{
|
|
/* Probably an update or delete. Assume we will do a full table scan */
|
|
fanout= 1.0;
|
|
read_time= table->file->cost(table->file->ha_scan_and_compare_time(rows_estimate));
|
|
set_if_smaller(select_limit_arg, table_records);
|
|
}
|
|
|
|
filesort_cost= cost_of_filesort(table, order, rows_estimate,
|
|
select_limit_arg, &filesort_type);
|
|
read_time+= filesort_cost;
|
|
|
|
/*
|
|
Calculate the selectivity of the ref_key for REF_ACCESS. For
|
|
RANGE_ACCESS we use table->opt_range_condition_rows.
|
|
*/
|
|
if (in_join_optimizer)
|
|
{
|
|
if (ref_key >= 0 && ref_key != MAX_KEY &&
|
|
join->best_positions[join->const_tables].type == JT_REF)
|
|
{
|
|
refkey_rows_estimate=
|
|
(ha_rows)join->best_positions[join->const_tables].records_read;
|
|
set_if_bigger(refkey_rows_estimate, 1);
|
|
}
|
|
}
|
|
else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
|
|
{
|
|
/*
|
|
If ref access uses keypart=const for all its key parts,
|
|
and quick select uses the same # of key parts, then they are equivalent.
|
|
Reuse #rows estimate from quick select as it is more precise.
|
|
|
|
Note: we could just have used
|
|
join->best_positions[join->const_tables].records_read
|
|
here. That number was computed in best_access_path() and it already
|
|
includes adjustments based on table->opt_range[ref_key].rows.
|
|
*/
|
|
if (tab->ref.const_ref_part_map ==
|
|
make_prev_keypart_map(tab->ref.key_parts) &&
|
|
table->opt_range_keys.is_set(ref_key) &&
|
|
table->opt_range[ref_key].key_parts == tab->ref.key_parts)
|
|
refkey_rows_estimate= table->opt_range[ref_key].rows;
|
|
else
|
|
{
|
|
const KEY *ref_keyinfo= table->key_info + ref_key;
|
|
refkey_rows_estimate=
|
|
(ha_rows)ref_keyinfo->actual_rec_per_key(tab->ref.key_parts - 1);
|
|
}
|
|
set_if_bigger(refkey_rows_estimate, 1);
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
if (tab)
|
|
trace_cheaper_ordering.add_table_name(tab);
|
|
else
|
|
trace_cheaper_ordering.add_table_name(table);
|
|
trace_cheaper_ordering.
|
|
add("rows_estimation", rows_estimate).
|
|
add("filesort_cost", filesort_cost).
|
|
add("read_cost", read_time).
|
|
add("filesort_type", filesort_names[filesort_type].str).
|
|
add("fanout", fanout);
|
|
}
|
|
|
|
/*
|
|
Force using an index for sorting if there was no ref key
|
|
and FORCE INDEX was used.
|
|
*/
|
|
if (table->force_index && ref_key < 0)
|
|
read_time= DBL_MAX;
|
|
|
|
Json_writer_array possible_keys(thd,"possible_keys");
|
|
for (nr=0; nr < table->s->keys ; nr++)
|
|
{
|
|
int direction;
|
|
ha_rows select_limit= select_limit_arg;
|
|
ha_rows estimated_rows_to_scan;
|
|
uint used_key_parts= 0;
|
|
double range_cost, range_rows;
|
|
Json_writer_object possible_key(thd);
|
|
possible_key.add("index", table->key_info[nr].name);
|
|
|
|
if (keys.is_set(nr) &&
|
|
(direction= test_if_order_by_key(join, order, table, nr,
|
|
&used_key_parts)))
|
|
{
|
|
/*
|
|
At this point we are sure that ref_key is a non-ordering
|
|
key (where "ordering key" is a key that will return rows
|
|
in the order required by ORDER BY).
|
|
*/
|
|
DBUG_ASSERT (ref_key != (int) nr);
|
|
|
|
possible_key.add("can_resolve_order", true);
|
|
possible_key.add("direction", direction);
|
|
bool is_covering= (table->covering_keys.is_set(nr) ||
|
|
table->is_clustering_key(nr));
|
|
/*
|
|
Don't use an index scan with ORDER BY without limit.
|
|
For GROUP BY without limit always use index scan
|
|
if there is a suitable index.
|
|
Why we hold to this asymmetry hardly can be explained
|
|
rationally. It's easy to demonstrate that using
|
|
temporary table + filesort could be cheaper for grouping
|
|
queries too.
|
|
*/
|
|
if (is_covering ||
|
|
has_limit ||
|
|
(ref_key < 0 && (group || table->force_index)))
|
|
{
|
|
double rec_per_key;
|
|
KEY *keyinfo= table->key_info+nr;
|
|
if (group)
|
|
{
|
|
/*
|
|
Used_key_parts can be larger than keyinfo->user_defined_key_parts
|
|
when using a secondary index clustered with a primary
|
|
key (e.g. as in Innodb).
|
|
See Bug #28591 for details.
|
|
*/
|
|
uint used_index_parts= keyinfo->user_defined_key_parts;
|
|
uint used_pk_parts= 0;
|
|
if (used_key_parts > used_index_parts)
|
|
used_pk_parts= used_key_parts-used_index_parts;
|
|
rec_per_key= used_key_parts ?
|
|
keyinfo->actual_rec_per_key(used_key_parts-1) : 1;
|
|
/* Take into account the selectivity of the used pk prefix */
|
|
if (used_pk_parts)
|
|
{
|
|
/*
|
|
TODO: This code need to be tested with debugger
|
|
- Why set rec_per_key to 1 if we don't have primary key data
|
|
or the full key is used ?
|
|
- If used_pk_parts == 1, we don't take into account that
|
|
the first primary key part could part of the current key.
|
|
*/
|
|
KEY *pkinfo=tab->table->key_info+table->s->primary_key;
|
|
/*
|
|
If the values of of records per key for the prefixes
|
|
of the primary key are considered unknown we assume
|
|
they are equal to 1.
|
|
*/
|
|
if (used_key_parts == pkinfo->user_defined_key_parts ||
|
|
pkinfo->rec_per_key[0] == 0)
|
|
rec_per_key= 1;
|
|
if (rec_per_key > 1)
|
|
{
|
|
rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1);
|
|
rec_per_key/= pkinfo->actual_rec_per_key(0);
|
|
/*
|
|
The value of rec_per_key for the extended key has
|
|
to be adjusted accordingly if some components of
|
|
the secondary key are included in the primary key.
|
|
*/
|
|
for(uint i= 1; i < used_pk_parts; i++)
|
|
{
|
|
if (pkinfo->key_part[i].field->key_start.is_set(nr))
|
|
{
|
|
/*
|
|
We presume here that for any index rec_per_key[i] != 0
|
|
if rec_per_key[0] != 0.
|
|
*/
|
|
DBUG_ASSERT(pkinfo->actual_rec_per_key(i));
|
|
rec_per_key*= pkinfo->actual_rec_per_key(i-1);
|
|
rec_per_key/= pkinfo->actual_rec_per_key(i);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
set_if_bigger(rec_per_key, 1);
|
|
/*
|
|
With a grouping query each group containing on average
|
|
rec_per_key records produces only one row that will
|
|
be included into the result set.
|
|
*/
|
|
if (select_limit > table_records/rec_per_key)
|
|
select_limit= table_records;
|
|
else
|
|
select_limit= (ha_rows) (select_limit*rec_per_key);
|
|
} /* group */
|
|
|
|
/*
|
|
If tab=tk is not the last joined table tn then to get first
|
|
L records from the result set we can expect to retrieve
|
|
only L/fanout(tk,tn) where fanout(tk,tn) says how many
|
|
rows in the record set on average will match each row tk.
|
|
Usually our estimates for fanouts are too pessimistic.
|
|
So the estimate for L/fanout(tk,tn) will be too optimistic
|
|
and as result we'll choose an index scan when using ref/range
|
|
access + filesort will be cheaper.
|
|
*/
|
|
select_limit= double_to_rows(select_limit/fanout);
|
|
set_if_bigger(select_limit, 1);
|
|
|
|
if (select_limit > refkey_rows_estimate)
|
|
estimated_rows_to_scan= table_records;
|
|
else
|
|
estimated_rows_to_scan= (ha_rows) (select_limit *
|
|
(double) table_records /
|
|
(double) refkey_rows_estimate);
|
|
|
|
bool range_scan= get_range_limit_read_cost(tab ? position : 0,
|
|
table,
|
|
nr,
|
|
select_limit,
|
|
estimated_rows_to_scan,
|
|
&range_cost,
|
|
&range_rows);
|
|
if (unlikely(possible_key.trace_started()))
|
|
{
|
|
possible_key
|
|
.add("rows_to_examine", range_rows)
|
|
.add("range_scan", range_scan)
|
|
.add("scan_cost", range_cost);
|
|
}
|
|
|
|
/*
|
|
We will try use the key if:
|
|
- If there is no ref key and no usable keys has yet been found and
|
|
there is either a group by or a FORCE_INDEX
|
|
- If the new cost is better than read_time
|
|
*/
|
|
if (range_cost < read_time)
|
|
{
|
|
read_time= range_cost;
|
|
possible_key.add("chosen", true);
|
|
best_key= nr;
|
|
if (saved_best_key_parts)
|
|
*saved_best_key_parts= used_key_parts;
|
|
if (new_used_key_parts)
|
|
*new_used_key_parts= keyinfo->user_defined_key_parts;
|
|
best_key_direction= direction;
|
|
best_select_limit= estimated_rows_to_scan;
|
|
}
|
|
else if (unlikely(possible_key.trace_started()))
|
|
{
|
|
possible_key
|
|
.add("usable", false)
|
|
.add("cause", "cost");
|
|
}
|
|
}
|
|
else if (unlikely(possible_key.trace_started()))
|
|
{
|
|
possible_key.add("usable", false);
|
|
if (!group && select_limit == HA_POS_ERROR)
|
|
possible_key.add("cause", "order by without limit");
|
|
}
|
|
}
|
|
else if (unlikely(possible_key.trace_started()))
|
|
{
|
|
if (keys.is_set(nr))
|
|
{
|
|
possible_key.
|
|
add("can_resolve_order", false).
|
|
add("cause", "order can not be resolved by key");
|
|
}
|
|
else
|
|
{
|
|
possible_key.
|
|
add("can_resolve_order", false).
|
|
add("cause", "not usable index for the query");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_key < 0 || best_key == ref_key)
|
|
DBUG_RETURN(FALSE);
|
|
|
|
*new_key= best_key;
|
|
*new_key_direction= best_key_direction;
|
|
*new_select_limit= has_limit ? best_select_limit : table_records;
|
|
*new_read_time= read_time;
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
Find a key to apply single table UPDATE/DELETE by a given ORDER
|
|
|
|
@param order Linked list of ORDER BY arguments
|
|
@param table Table to find a key
|
|
@param select Pointer to access/update select->quick (if any)
|
|
@param limit LIMIT clause parameter
|
|
@param [out] scanned_limit How many records we expect to scan
|
|
Valid if *need_sort=FALSE.
|
|
@param [out] need_sort TRUE if filesort needed
|
|
@param [out] reverse
|
|
TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
|
|
|
|
@return
|
|
- MAX_KEY if no key found (need_sort == TRUE)
|
|
- MAX_KEY if quick select result order is OK (need_sort == FALSE)
|
|
- key number (either index scan or quick select) (need_sort == FALSE)
|
|
|
|
@note
|
|
Side effects:
|
|
- may deallocate or deallocate and replace select->quick;
|
|
- may set table->opt_range_condition_rows and table->quick_rows[...]
|
|
to table->file->stats.records.
|
|
*/
|
|
|
|
uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
|
|
ha_rows limit, ha_rows *scanned_limit,
|
|
bool *need_sort, bool *reverse)
|
|
{
|
|
if (!order)
|
|
{
|
|
*need_sort= FALSE;
|
|
if (select && select->quick)
|
|
return select->quick->index; // index or MAX_KEY, use quick select as is
|
|
else
|
|
return table->file->key_used_on_scan; // MAX_KEY or index for some engine
|
|
}
|
|
|
|
if (!is_simple_order(order)) // just to cut further expensive checks
|
|
{
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
|
|
if (select && select->quick)
|
|
{
|
|
if (select->quick->index == MAX_KEY)
|
|
{
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
|
|
uint used_key_parts;
|
|
switch (test_if_order_by_key(NULL, order, table, select->quick->index,
|
|
&used_key_parts)) {
|
|
case 1: // desired order
|
|
*need_sort= FALSE;
|
|
*scanned_limit= MY_MIN(limit, select->quick->records);
|
|
return select->quick->index;
|
|
case 0: // unacceptable order
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
case -1: // desired order, but opposite direction
|
|
{
|
|
QUICK_SELECT_I *reverse_quick;
|
|
if ((reverse_quick=
|
|
select->quick->make_reverse(used_key_parts)))
|
|
{
|
|
select->set_quick(reverse_quick);
|
|
*need_sort= FALSE;
|
|
*scanned_limit= MY_MIN(limit, select->quick->records);
|
|
return select->quick->index;
|
|
}
|
|
else
|
|
{
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
}
|
|
else if (limit != HA_POS_ERROR)
|
|
{
|
|
// check if some index scan & LIMIT is more efficient than filesort
|
|
|
|
/*
|
|
Update opt_range_condition_rows since single table UPDATE/DELETE
|
|
procedures don't call make_join_statistics() and leave this
|
|
variable uninitialized.
|
|
*/
|
|
table->opt_range_condition_rows= table->stat_records();
|
|
|
|
int key, direction;
|
|
double new_cost;
|
|
if (test_if_cheaper_ordering(FALSE, NULL, order, table,
|
|
table->keys_in_use_for_order_by, -1,
|
|
limit,
|
|
&key, &direction, &limit, &new_cost) &&
|
|
!is_key_used(table, key, table->write_set))
|
|
{
|
|
*need_sort= FALSE;
|
|
*scanned_limit= limit;
|
|
*reverse= (direction < 0);
|
|
return key;
|
|
}
|
|
}
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
|
|
|
|
/*
|
|
Count how many times the specified conditions are true for first rows_to_read
|
|
rows of the table.
|
|
|
|
@param thd Thread handle
|
|
@param rows_to_read How many rows to sample
|
|
@param table Table to use
|
|
@conds conds INOUT List of conditions and counters for them
|
|
|
|
@return Number of we've checked. It can be equal or less than rows_to_read.
|
|
0 is returned for error or when the table had no rows.
|
|
*/
|
|
|
|
ulong check_selectivity(THD *thd,
|
|
ulong rows_to_read,
|
|
TABLE *table,
|
|
List<COND_STATISTIC> *conds)
|
|
{
|
|
ulong count= 0;
|
|
COND_STATISTIC *cond;
|
|
List_iterator_fast<COND_STATISTIC> it(*conds);
|
|
handler *file= table->file;
|
|
uchar *record= table->record[0];
|
|
int error= 0;
|
|
DBUG_ENTER("check_selectivity");
|
|
|
|
DBUG_ASSERT(rows_to_read > 0);
|
|
while ((cond= it++))
|
|
{
|
|
DBUG_ASSERT(cond->cond);
|
|
DBUG_ASSERT(cond->cond->used_tables() == table->map);
|
|
cond->positive= 0;
|
|
}
|
|
it.rewind();
|
|
|
|
if (unlikely(file->ha_rnd_init_with_error(1)))
|
|
DBUG_RETURN(0);
|
|
do
|
|
{
|
|
error= file->ha_rnd_next(record);
|
|
|
|
if (unlikely(thd->killed))
|
|
{
|
|
thd->send_kill_message();
|
|
count= 0;
|
|
goto err;
|
|
}
|
|
if (unlikely(error))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
|
|
count++;
|
|
while ((cond= it++))
|
|
{
|
|
if (cond->cond->val_bool())
|
|
cond->positive++;
|
|
}
|
|
it.rewind();
|
|
|
|
} while (count < rows_to_read);
|
|
|
|
file->ha_rnd_end();
|
|
DBUG_RETURN(count);
|
|
|
|
err:
|
|
DBUG_PRINT("error", ("error %d", error));
|
|
file->ha_rnd_end();
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/****************************************************************************
|
|
AGGR_OP implementation
|
|
****************************************************************************/
|
|
|
|
/**
|
|
@brief Instantiate tmp table for aggregation and start index scan if needed
|
|
@todo Tmp table always would be created, even for empty result. Extend
|
|
executor to avoid tmp table creation when no rows were written
|
|
into tmp table.
|
|
@return
|
|
true error
|
|
false ok
|
|
*/
|
|
|
|
bool
|
|
AGGR_OP::prepare_tmp_table()
|
|
{
|
|
TABLE *table= join_tab->table;
|
|
JOIN *join= join_tab->join;
|
|
int rc= 0;
|
|
|
|
if (!join_tab->table->is_created())
|
|
{
|
|
if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
join->select_options))
|
|
return true;
|
|
(void) table->file->extra(HA_EXTRA_WRITE_CACHE);
|
|
}
|
|
/* If it wasn't already, start index scan for grouping using table index. */
|
|
if (!table->file->inited && table->group &&
|
|
join_tab->tmp_table_param->sum_func_count && table->s->keys)
|
|
rc= table->file->ha_index_init(0, 0);
|
|
else
|
|
{
|
|
/* Start index scan in scanning mode */
|
|
rc= table->file->ha_rnd_init(true);
|
|
}
|
|
if (rc)
|
|
{
|
|
table->file->print_error(rc, MYF(0));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Prepare table if necessary and call write_func to save record
|
|
|
|
@param end_of_records the end_of_record signal to pass to the writer
|
|
|
|
@return return one of enum_nested_loop_state.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
AGGR_OP::put_record(bool end_of_records)
|
|
{
|
|
// Lasy tmp table creation/initialization
|
|
if (!join_tab->table->file->inited)
|
|
if (prepare_tmp_table())
|
|
return NESTED_LOOP_ERROR;
|
|
enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab,
|
|
end_of_records);
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Finish rnd/index scan after accumulating records, switch ref_array,
|
|
and send accumulated records further.
|
|
@return return one of enum_nested_loop_state.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
AGGR_OP::end_send()
|
|
{
|
|
enum_nested_loop_state rc= NESTED_LOOP_OK;
|
|
TABLE *table= join_tab->table;
|
|
JOIN *join= join_tab->join;
|
|
|
|
// All records were stored, send them further
|
|
int tmp, new_errno= 0;
|
|
|
|
if ((rc= put_record(true)) < NESTED_LOOP_OK)
|
|
return rc;
|
|
|
|
if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
|
|
{
|
|
DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed"));
|
|
new_errno= tmp;
|
|
}
|
|
if ((tmp= table->file->ha_index_or_rnd_end()))
|
|
{
|
|
DBUG_PRINT("error",("ha_index_or_rnd_end() failed"));
|
|
new_errno= tmp;
|
|
}
|
|
if (new_errno)
|
|
{
|
|
table->file->print_error(new_errno,MYF(0));
|
|
return NESTED_LOOP_ERROR;
|
|
}
|
|
|
|
// Update ref array
|
|
join_tab->join->set_items_ref_array(*join_tab->ref_array);
|
|
bool keep_last_filesort_result = join_tab->filesort ? false : true;
|
|
if (join_tab->window_funcs_step)
|
|
{
|
|
if (join_tab->window_funcs_step->exec(join, keep_last_filesort_result))
|
|
return NESTED_LOOP_ERROR;
|
|
}
|
|
|
|
table->reginfo.lock_type= TL_UNLOCK;
|
|
|
|
bool in_first_read= true;
|
|
|
|
/*
|
|
Reset the counter before copying rows from internal temporary table to
|
|
INSERT table.
|
|
*/
|
|
join_tab->join->thd->get_stmt_da()->reset_current_row_for_warning(1);
|
|
while (rc == NESTED_LOOP_OK)
|
|
{
|
|
int error;
|
|
if (in_first_read)
|
|
{
|
|
in_first_read= false;
|
|
error= join_init_read_record(join_tab);
|
|
}
|
|
else
|
|
error= join_tab->read_record.read_record();
|
|
|
|
if (unlikely(error > 0 || (join->thd->is_error()))) // Fatal error
|
|
rc= NESTED_LOOP_ERROR;
|
|
else if (error < 0)
|
|
break;
|
|
else if (unlikely(join->thd->killed)) // Aborted by user
|
|
{
|
|
join->thd->send_kill_message();
|
|
rc= NESTED_LOOP_KILLED;
|
|
}
|
|
else
|
|
{
|
|
rc= evaluate_join_record(join, join_tab, 0);
|
|
}
|
|
}
|
|
|
|
if (keep_last_filesort_result)
|
|
{
|
|
delete join_tab->filesort_result;
|
|
join_tab->filesort_result= NULL;
|
|
}
|
|
|
|
// Finish rnd scn after sending records
|
|
if (join_tab->table->file->inited)
|
|
join_tab->table->file->ha_rnd_end();
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Remove marked top conjuncts of a condition
|
|
|
|
@param thd The thread handle
|
|
@param cond The condition which subformulas are to be removed
|
|
|
|
@details
|
|
The function removes all top conjuncts marked with the flag
|
|
MARKER_FULL_EXTRACTION from the condition 'cond'. The resulting
|
|
formula is returned a the result of the function
|
|
If 'cond' s marked with such flag the function returns 0.
|
|
The function clear the extraction flags for the removed
|
|
formulas
|
|
|
|
@retval
|
|
condition without removed subformulas
|
|
0 if the whole 'cond' is removed
|
|
*/
|
|
|
|
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond)
|
|
{
|
|
if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION)
|
|
{
|
|
cond->clear_extraction_flag();
|
|
return 0;
|
|
}
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION)
|
|
{
|
|
item->clear_extraction_flag();
|
|
li.remove();
|
|
}
|
|
}
|
|
switch (((Item_cond*) cond)->argument_list()->elements)
|
|
{
|
|
case 0:
|
|
return 0;
|
|
case 1:
|
|
return ((Item_cond*) cond)->argument_list()->head();
|
|
default:
|
|
return cond;
|
|
}
|
|
}
|
|
}
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
There are 5 cases in which we shortcut the join optimization process as we
|
|
conclude that the join would be a degenerate one
|
|
1) IMPOSSIBLE WHERE
|
|
2) MIN/MAX optimization (@see opt_sum_query)
|
|
3) EMPTY CONST TABLE
|
|
If a window function is present in any of the above cases then to get the
|
|
result of the window function, we need to execute it. So we need to
|
|
create a temporary table for its execution. Here we need to take in mind
|
|
that aggregate functions and non-aggregate function need not be executed.
|
|
|
|
*/
|
|
|
|
void JOIN::handle_implicit_grouping_with_window_funcs()
|
|
{
|
|
if (select_lex->have_window_funcs() && send_row_on_empty_set())
|
|
{
|
|
const_tables= top_join_tab_count= table_count= 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform a partial cleanup for the JOIN_TAB structure
|
|
|
|
@note
|
|
this is used to cleanup resources for the re-execution of correlated
|
|
subqueries.
|
|
*/
|
|
void JOIN_TAB::partial_cleanup()
|
|
{
|
|
if (!table)
|
|
return;
|
|
|
|
if (table->is_created())
|
|
{
|
|
table->file->ha_index_or_rnd_end();
|
|
DBUG_PRINT("info", ("close index: %s.%s alias: %s",
|
|
table->s->db.str,
|
|
table->s->table_name.str,
|
|
table->alias.c_ptr()));
|
|
if (aggr)
|
|
{
|
|
int tmp= 0;
|
|
if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
|
|
table->file->print_error(tmp, MYF(0));
|
|
}
|
|
}
|
|
delete filesort_result;
|
|
filesort_result= NULL;
|
|
free_cache(&read_record);
|
|
}
|
|
|
|
/**
|
|
@brief
|
|
Construct not null conditions for provingly not nullable fields
|
|
|
|
@details
|
|
For each non-constant joined table the function creates a conjunction
|
|
of IS NOT NULL predicates containing a predicate for each field used
|
|
in the WHERE clause or an OR expression such that
|
|
- is declared as nullable
|
|
- for which it can proved be that it is null-rejected
|
|
- is a part of some index.
|
|
This conjunction could be anded with either the WHERE condition or with
|
|
an ON expression and the modified join query would produce the same
|
|
result set as the original one.
|
|
If a conjunction of IS NOT NULL predicates is constructed for an inner
|
|
table of an outer join OJ that is not an inner table of embedded outer
|
|
joins then it is to be anded with the ON expression of OJ.
|
|
The constructed conjunctions of IS NOT NULL predicates are attached
|
|
to the corresponding tables. They used for range analysis complementary
|
|
to other sargable range conditions.
|
|
|
|
@note
|
|
Let f be a field of the joined table t. In the context of the upper
|
|
paragraph field f is called null-rejected if any the following holds:
|
|
|
|
- t is a table of a top inner join and a conjunctive formula that rejects
|
|
rows with null values for f can be extracted from the WHERE condition
|
|
|
|
- t is an outer table of a top outer join operation and a conjunctive
|
|
formula over the outer tables of the outer join that rejects rows with
|
|
null values for can be extracted from the WHERE condition
|
|
|
|
- t is an outer table of a non-top outer join operation and a conjunctive
|
|
formula over the outer tables of the outer join that rejects rows with
|
|
null values for f can be extracted from the ON expression of the
|
|
embedding outer join
|
|
|
|
- the joined table is an inner table of a outer join operation and
|
|
a conjunctive formula over inner tables of the outer join that rejects
|
|
rows with null values for f can be extracted from the ON expression of
|
|
the outer join operation.
|
|
|
|
It is assumed above that all inner join nests have been eliminated and
|
|
that all possible conversions of outer joins into inner joins have been
|
|
already done.
|
|
*/
|
|
|
|
void JOIN::make_notnull_conds_for_range_scans()
|
|
{
|
|
DBUG_ENTER("JOIN::make_notnull_conds_for_range_scans");
|
|
|
|
if (impossible_where ||
|
|
!optimizer_flag(thd, OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN))
|
|
{
|
|
/* Complementary range analysis is not needed */
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
if (conds && build_notnull_conds_for_range_scans(this, conds,
|
|
conds->used_tables()))
|
|
{
|
|
/*
|
|
Found a IS NULL conjunctive predicate for a null-rejected field
|
|
in the WHERE clause
|
|
*/
|
|
conds= (Item*) Item_false;
|
|
cond_equal= 0;
|
|
impossible_where= true;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *tbl;
|
|
while ((tbl= li++))
|
|
{
|
|
if (tbl->on_expr)
|
|
{
|
|
if (tbl->nested_join)
|
|
{
|
|
build_notnull_conds_for_inner_nest_of_outer_join(this, tbl);
|
|
}
|
|
else if (build_notnull_conds_for_range_scans(this, tbl->on_expr,
|
|
tbl->table->map))
|
|
{
|
|
/*
|
|
Found a IS NULL conjunctive predicate for a null-rejected field
|
|
of the inner table of an outer join with ON expression tbl->on_expr
|
|
*/
|
|
tbl->on_expr= (Item*) Item_false;
|
|
}
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Build not null conditions for range scans of given join tables
|
|
|
|
@param join the join for whose tables not null conditions are to be built
|
|
@param cond the condition from which not null predicates are to be inferred
|
|
@param allowed the bit map of join tables to be taken into account
|
|
|
|
@details
|
|
For each join table t from the 'allowed' set of tables the function finds
|
|
all fields whose null-rejectedness can be inferred from null-rejectedness
|
|
of the condition cond. For each found field f from table t such that it
|
|
participates at least in one index on table t a NOT NULL predicate is
|
|
constructed and a conjunction of all such predicates is attached to t.
|
|
If when looking for null-rejecting fields of t it is discovered one of its
|
|
fields has to be null-rejected and there is IS NULL conjunctive top level
|
|
predicate for this field then the function immediately returns true.
|
|
The function uses the bitmap TABLE::tmp_set to mark found null-rejected
|
|
fields of table t.
|
|
|
|
@note
|
|
Currently only top level conjuncts without disjunctive sub-formulas are
|
|
are taken into account when looking for null-rejected fields.
|
|
|
|
@retval
|
|
true if a contradiction is inferred
|
|
false otherwise
|
|
*/
|
|
|
|
static
|
|
bool build_notnull_conds_for_range_scans(JOIN *join, Item *cond,
|
|
table_map allowed)
|
|
{
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("build_notnull_conds_for_range_scans");
|
|
|
|
for (JOIN_TAB *s= join->join_tab;
|
|
s < join->join_tab + join->table_count ; s++)
|
|
{
|
|
/* Clear all needed bitmaps to mark found fields */
|
|
if ((allowed & s->table->map) &&
|
|
!(s->table->map & join->const_table_map))
|
|
bitmap_clear_all(&s->table->tmp_set);
|
|
}
|
|
|
|
/*
|
|
Find all null-rejected fields assuming that cond is null-rejected and
|
|
only formulas over tables from 'allowed' are to be taken into account
|
|
*/
|
|
if (cond->find_not_null_fields(allowed))
|
|
DBUG_RETURN(true);
|
|
|
|
/*
|
|
For each table t from 'allowed' build a conjunction of NOT NULL predicates
|
|
constructed for all found fields if they are included in some indexes.
|
|
If the construction of the conjunction succeeds attach the formula to
|
|
t->table->notnull_cond. The condition will be used to look for
|
|
complementary range scans.
|
|
*/
|
|
for (JOIN_TAB *s= join->join_tab ;
|
|
s < join->join_tab + join->table_count ; s++)
|
|
{
|
|
TABLE *tab= s->table;
|
|
List<Item> notnull_list;
|
|
Item *notnull_cond= 0;
|
|
|
|
if (!(allowed & tab->map) ||
|
|
(s->table->map && join->const_table_map))
|
|
continue;
|
|
|
|
for (Field** field_ptr= tab->field; *field_ptr; field_ptr++)
|
|
{
|
|
Field *field= *field_ptr;
|
|
if (field->part_of_key.is_clear_all())
|
|
continue;
|
|
if (!bitmap_is_set(&tab->tmp_set, field->field_index))
|
|
continue;
|
|
Item_field *field_item= new (thd->mem_root) Item_field(thd, field);
|
|
if (!field_item)
|
|
continue;
|
|
Item *isnotnull_item=
|
|
new (thd->mem_root) Item_func_isnotnull(thd, field_item);
|
|
if (!isnotnull_item)
|
|
continue;
|
|
if (notnull_list.push_back(isnotnull_item, thd->mem_root))
|
|
continue;
|
|
s->const_keys.merge(field->part_of_key);
|
|
}
|
|
|
|
switch (notnull_list.elements) {
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
notnull_cond= notnull_list.head();
|
|
break;
|
|
default:
|
|
notnull_cond=
|
|
new (thd->mem_root) Item_cond_and(thd, notnull_list);
|
|
}
|
|
if (notnull_cond && !notnull_cond->fix_fields(thd, 0))
|
|
{
|
|
tab->notnull_cond= notnull_cond;
|
|
}
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Build not null conditions for inner nest tables of an outer join
|
|
|
|
@param join the join for whose table nest not null conditions are to be
|
|
built
|
|
@param nest_tbl the nest of the inner tables of an outer join
|
|
|
|
@details
|
|
The function assumes that nest_tbl is the nest of the inner tables
|
|
of an outer join and so an ON expression for this outer join is
|
|
attached to nest_tbl.
|
|
The function selects the tables of the nest_tbl that are not inner
|
|
tables of embedded outer joins and then it calls
|
|
build_notnull_conds_for_range_scans() for nest_tbl->on_expr and
|
|
the bitmap for the selected tables. This call finds all fields
|
|
belonging to the selected tables whose null-rejectedness can be
|
|
inferred from the null-rejectedness of nest_tbl->on_expr. After
|
|
this the function recursively finds all null_rejected fields for
|
|
the remaining tables from the nest of nest_tbl.
|
|
*/
|
|
|
|
static
|
|
void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
|
|
TABLE_LIST *nest_tbl)
|
|
{
|
|
TABLE_LIST *tbl;
|
|
table_map used_tables= 0;
|
|
List_iterator<TABLE_LIST> li(nest_tbl->nested_join->join_list);
|
|
|
|
while ((tbl= li++))
|
|
{
|
|
if (!tbl->on_expr)
|
|
used_tables|= tbl->table->map;
|
|
}
|
|
if (used_tables &&
|
|
build_notnull_conds_for_range_scans(join, nest_tbl->on_expr, used_tables))
|
|
{
|
|
nest_tbl->on_expr= (Item*) Item_false;
|
|
}
|
|
|
|
li.rewind();
|
|
while ((tbl= li++))
|
|
{
|
|
if (tbl->on_expr)
|
|
{
|
|
if (tbl->nested_join)
|
|
{
|
|
build_notnull_conds_for_inner_nest_of_outer_join(join, tbl);
|
|
}
|
|
else if (build_notnull_conds_for_range_scans(join, tbl->on_expr,
|
|
tbl->table->map))
|
|
tbl->on_expr= (Item*) Item_false;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Initialize join cache and enable keyread
|
|
*/
|
|
void JOIN::init_join_cache_and_keyread()
|
|
{
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
TABLE *table= tab->table;
|
|
switch (tab->type) {
|
|
case JT_SYSTEM:
|
|
case JT_CONST:
|
|
case JT_FT:
|
|
case JT_UNKNOWN:
|
|
case JT_MAYBE_REF:
|
|
break;
|
|
case JT_EQ_REF:
|
|
case JT_REF_OR_NULL:
|
|
case JT_REF:
|
|
if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread)
|
|
table->file->ha_start_keyread(tab->ref.key);
|
|
break;
|
|
case JT_HASH:
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
SQL_SELECT *select;
|
|
select= tab->select ? tab->select :
|
|
(tab->filesort ? tab->filesort->select : NULL);
|
|
if (select && select->quick && select->quick->index != MAX_KEY &&
|
|
table->covering_keys.is_set(select->quick->index) &&
|
|
!table->no_keyread)
|
|
table->file->ha_start_keyread(select->quick->index);
|
|
break;
|
|
case JT_HASH_NEXT:
|
|
case JT_NEXT:
|
|
if ((tab->read_first_record == join_read_first ||
|
|
tab->read_first_record == join_read_last) &&
|
|
table->covering_keys.is_set(tab->index) &&
|
|
!table->no_keyread)
|
|
{
|
|
DBUG_ASSERT(!tab->filesort);
|
|
table->file->ha_start_keyread(tab->index);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
/* purecov: end */
|
|
}
|
|
|
|
if (table->file->keyread_enabled() &&
|
|
!table->is_clustering_key(table->file->keyread))
|
|
{
|
|
/*
|
|
Here we set the read_set bitmap for all covering keys
|
|
except CLUSTERED indexes, with all the key-parts inside the key.
|
|
This is needed specifically for an index that contains virtual column.
|
|
|
|
Example:
|
|
Lets say we have this query
|
|
SELECT b FROM t1;
|
|
|
|
and the table definition is like
|
|
CREATE TABLE t1(
|
|
a varchar(10) DEFAULT NULL,
|
|
b varchar(255) GENERATED ALWAYS AS (a) VIRTUAL,
|
|
KEY key1 (b));
|
|
|
|
So we a virtual column b and an index key1 defined on the virtual
|
|
column. So if a query uses a vcol, base columns that it
|
|
depends on are automatically added to the read_set - because they're
|
|
needed to calculate the vcol.
|
|
But if we're doing keyread, vcol is taken
|
|
from the index, not calculated, and base columns do not need to be
|
|
in the read set. To ensure this we try to set the read_set to only
|
|
the key-parts of the indexes.
|
|
|
|
Another side effect of this is
|
|
Lets say you have a query
|
|
select a, b from t1
|
|
and there is an index key1 (a,b,c)
|
|
then as key1 is covering and we would have the keyread enable for
|
|
this key, so the below call will also set the read_set for column
|
|
c, which is not a problem as we read all the columns from the index
|
|
tuple.
|
|
*/
|
|
table->mark_index_columns(table->file->keyread, table->read_set);
|
|
}
|
|
bool init_for_explain= false;
|
|
|
|
/*
|
|
Can we use lightweight initalization mode just for EXPLAINs? We can if
|
|
we're certain that the optimizer will not execute the subquery.
|
|
The optimzier will not execute the subquery if it's too expensive. For
|
|
the exact criteria, see Item_subselect::is_expensive().
|
|
Note that the subquery might be a UNION and we might not yet know if it
|
|
is expensive.
|
|
What we do know is that if this SELECT is too expensive, then the whole
|
|
subquery will be too expensive as well.
|
|
So, we can use lightweight initialization (init_for_explain=true) if this
|
|
SELECT examines more than @@expensive_subquery_limit rows.
|
|
*/
|
|
if ((select_options & SELECT_DESCRIBE) &&
|
|
get_examined_rows() >= thd->variables.expensive_subquery_limit)
|
|
{
|
|
init_for_explain= true;
|
|
}
|
|
if (tab->cache && tab->cache->init(init_for_explain))
|
|
revise_cache_usage(tab);
|
|
else
|
|
tab->remove_redundant_bnl_scan_conds();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Unpack temp table fields to base table fields.
|
|
*/
|
|
|
|
void unpack_to_base_table_fields(TABLE *table)
|
|
{
|
|
JOIN_TAB *tab= table->reginfo.join_tab;
|
|
for (Copy_field *cp= tab->read_record.copy_field;
|
|
cp != tab->read_record.copy_field_end; cp++)
|
|
(*cp->do_copy)(cp);
|
|
}
|
|
|
|
/*
|
|
Call item->fix_after_optimize for all items registered in
|
|
lex->fix_after_optimize
|
|
|
|
This is needed for items like ROWNUM(), which needs access to structures
|
|
created by the early optimizer pass, like JOIN
|
|
*/
|
|
|
|
static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex)
|
|
{
|
|
List_iterator<Item> li(select_lex->fix_after_optimize);
|
|
|
|
while (Item *item= li++)
|
|
item->fix_after_optimize(thd);
|
|
}
|
|
|
|
|
|
/*
|
|
Set a limit for the SELECT_LEX_UNIT based on ROWNUM usage.
|
|
The limit is shown in EXPLAIN
|
|
*/
|
|
|
|
static bool set_limit_for_unit(THD *thd, SELECT_LEX_UNIT *unit, ha_rows lim)
|
|
{
|
|
SELECT_LEX *gpar= unit->global_parameters();
|
|
if (gpar->limit_params.select_limit != 0 &&
|
|
// limit can not be an expression but can be parameter
|
|
(!gpar->limit_params.select_limit->basic_const_item() ||
|
|
((ha_rows)gpar->limit_params.select_limit->val_int()) < lim))
|
|
return false;
|
|
|
|
Query_arena *arena, backup;
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
gpar->limit_params.select_limit=
|
|
new (thd->mem_root) Item_int(thd, lim, MAX_BIGINT_WIDTH);
|
|
if (gpar->limit_params.select_limit == 0)
|
|
return true; // EOM
|
|
|
|
unit->set_limit(gpar);
|
|
|
|
gpar->limit_params.explicit_limit= true; // to show in EXPLAIN
|
|
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
Check possibility of LIMIT setting by rownum() of upper SELECT and do it
|
|
|
|
@note Ideal is to convert something like
|
|
SELECT ...
|
|
FROM (SELECT ...) table
|
|
WHERE rownum() < <CONSTANT>;
|
|
to
|
|
SELECT ...
|
|
FROM (SELECT ... LIMIT <CONSTANT>) table
|
|
WHERE rownum() < <CONSTANT>;
|
|
|
|
@retval true EOM
|
|
@retval false no errors
|
|
*/
|
|
|
|
bool JOIN::optimize_upper_rownum_func()
|
|
{
|
|
DBUG_ASSERT(select_lex->master_unit()->derived);
|
|
|
|
if (select_lex->master_unit()->first_select() != select_lex)
|
|
return false; // first will set parameter
|
|
|
|
if (select_lex->master_unit()->global_parameters()->
|
|
limit_params.offset_limit != NULL)
|
|
return false; // offset is set, we cannot set limit
|
|
|
|
SELECT_LEX *outer_select= select_lex->master_unit()->outer_select();
|
|
/*
|
|
Check that it is safe to use rownum-limit from the outer query
|
|
(the one that has 'WHERE rownum()...')
|
|
*/
|
|
if (outer_select == NULL ||
|
|
!outer_select->with_rownum ||
|
|
(outer_select->options & SELECT_DISTINCT) ||
|
|
outer_select->table_list.elements != 1 ||
|
|
outer_select->where == NULL ||
|
|
outer_select->where->type() != Item::FUNC_ITEM)
|
|
return false;
|
|
|
|
return process_direct_rownum_comparison(thd, unit, outer_select->where);
|
|
}
|
|
|
|
|
|
/**
|
|
Test if the predicate compares rownum() with a constant
|
|
|
|
@return 1 No or invalid rownum() compare
|
|
@return 0 rownum() is compared with a constant.
|
|
In this case *args contains the constant and
|
|
*inv_order constains 1 if the rownum() was the right
|
|
argument, like in 'WHERE 2 >= rownum()'.
|
|
*/
|
|
|
|
static bool check_rownum_usage(Item_func *func_item, longlong *limit,
|
|
bool *inv_order)
|
|
{
|
|
Item *arg1, *arg2;
|
|
*inv_order= 0;
|
|
DBUG_ASSERT(func_item->argument_count() == 2);
|
|
|
|
/* 'rownum op const' or 'const op field' */
|
|
arg1= func_item->arguments()[0]->real_item();
|
|
if (arg1->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) arg1)->functype() == Item_func::ROWNUM_FUNC)
|
|
{
|
|
arg2= func_item->arguments()[1]->real_item();
|
|
if (arg2->can_eval_in_optimize())
|
|
{
|
|
*limit= arg2->val_int();
|
|
return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR;
|
|
}
|
|
}
|
|
else if (arg1->can_eval_in_optimize())
|
|
{
|
|
arg2= func_item->arguments()[1]->real_item();
|
|
if (arg2->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) arg2)->functype() == Item_func::ROWNUM_FUNC)
|
|
{
|
|
*limit= arg1->val_int();
|
|
*inv_order= 1;
|
|
return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
/*
|
|
Limit optimization for ROWNUM()
|
|
|
|
Go through the WHERE clause and find out if there are any of the following
|
|
constructs on the top level:
|
|
rownum() <= integer_constant
|
|
rownum() < integer_constant
|
|
rownum() = 1
|
|
|
|
If yes, then threat the select as if 'LIMIT integer_constant' would
|
|
have been used
|
|
*/
|
|
|
|
static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit,
|
|
Item *cond)
|
|
{
|
|
DBUG_ENTER("optimize_rownum");
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item= li++))
|
|
optimize_rownum(thd, unit, item);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
process_direct_rownum_comparison(thd, unit, cond);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
|
|
Item *cond)
|
|
{
|
|
DBUG_ENTER("process_direct_rownum_comparison");
|
|
if (cond->real_type() == Item::FUNC_ITEM)
|
|
{
|
|
Item_func *pred= (Item_func*) cond;
|
|
longlong limit;
|
|
bool inv;
|
|
|
|
if (pred->argument_count() != 2)
|
|
DBUG_RETURN(false); // Not a compare functions
|
|
if (check_rownum_usage(pred, &limit, &inv))
|
|
DBUG_RETURN(false);
|
|
|
|
Item_func::Functype pred_type= pred->functype();
|
|
|
|
if (inv && pred_type != Item_func::EQ_FUNC)
|
|
{
|
|
if (pred_type == Item_func::GT_FUNC) // # > rownum()
|
|
pred_type= Item_func::LT_FUNC;
|
|
else if (pred_type == Item_func::GE_FUNC) // # >= rownum()
|
|
pred_type= Item_func::LE_FUNC;
|
|
else
|
|
DBUG_RETURN(false);
|
|
}
|
|
switch (pred_type) {
|
|
case Item_func::LT_FUNC: // rownum() < #
|
|
{
|
|
if (limit <= 0)
|
|
DBUG_RETURN(false);
|
|
DBUG_RETURN(set_limit_for_unit(thd, unit, limit - 1));
|
|
case Item_func::LE_FUNC:
|
|
DBUG_RETURN(set_limit_for_unit(thd, unit, limit));
|
|
case Item_func::EQ_FUNC:
|
|
if (limit == 1)
|
|
DBUG_RETURN(set_limit_for_unit(thd, unit, limit));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
/**
|
|
@brief
|
|
Transform IN predicates having equal constant elements to equalities
|
|
|
|
@param thd The context of the statement
|
|
|
|
@details
|
|
If all elements in an IN predicate are constant and equal to each other
|
|
then clause
|
|
- "a IN (e1,..,en)" can be transformed to "a = e1"
|
|
- "a NOT IN (e1,..,en)" can be transformed to "a != e1".
|
|
This means an object of Item_func_in can be replaced with an object of
|
|
Item_func_eq for IN (e1,..,en) clause or Item_func_ne for
|
|
NOT IN (e1,...,en).
|
|
Such a replacement allows the optimizer to choose a better execution plan.
|
|
|
|
This methods applies such transformation for each IN predicate of the WHERE
|
|
condition and ON expressions of this join where possible
|
|
|
|
@retval
|
|
false success
|
|
true failure
|
|
*/
|
|
bool JOIN::transform_in_predicates_into_equalities(THD *thd)
|
|
{
|
|
DBUG_ENTER("JOIN::transform_in_predicates_into_equalities");
|
|
DBUG_RETURN(transform_all_conds_and_on_exprs(
|
|
thd, &Item::in_predicate_to_equality_transformer));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Rewrite datetime comparison conditions into sargable.
|
|
See details in the description for class Date_cmp_func_rewriter
|
|
*/
|
|
|
|
bool JOIN::transform_date_conds_into_sargable()
|
|
{
|
|
DBUG_ENTER("JOIN::transform_date_conds_into_sargable");
|
|
DBUG_RETURN(transform_all_conds_and_on_exprs(
|
|
thd, &Item::date_conds_transformer));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Transform all items in WHERE and ON expressions using a given transformer
|
|
|
|
@param thd The context of the statement
|
|
transformer Pointer to the transformation function
|
|
|
|
@details
|
|
For each item of the WHERE condition and ON expressions of the SELECT
|
|
for this join the method performs the intransformation using the given
|
|
transformation function
|
|
|
|
@retval
|
|
false success
|
|
true failure
|
|
*/
|
|
bool JOIN::transform_all_conds_and_on_exprs(THD *thd,
|
|
Item_transformer transformer)
|
|
{
|
|
if (conds)
|
|
{
|
|
conds= conds->top_level_transform(thd, transformer, (uchar *) 0);
|
|
if (!conds)
|
|
return true;
|
|
}
|
|
if (join_list)
|
|
{
|
|
if (transform_all_conds_and_on_exprs_in_join_list(thd, join_list,
|
|
transformer))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool JOIN::transform_all_conds_and_on_exprs_in_join_list(
|
|
THD *thd, List<TABLE_LIST> *join_list, Item_transformer transformer)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
while ((table= li++))
|
|
{
|
|
if (table->nested_join)
|
|
{
|
|
if (transform_all_conds_and_on_exprs_in_join_list(
|
|
thd, &table->nested_join->join_list, transformer))
|
|
return true;
|
|
}
|
|
if (table->on_expr)
|
|
{
|
|
table->on_expr= table->on_expr->top_level_transform(thd, transformer, 0);
|
|
if (!table->on_expr)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
static void MYSQL_DML_START(THD *thd)
|
|
{
|
|
switch (thd->lex->sql_command) {
|
|
|
|
case SQLCOM_UPDATE:
|
|
MYSQL_UPDATE_START(thd->query());
|
|
break;
|
|
case SQLCOM_UPDATE_MULTI:
|
|
MYSQL_MULTI_UPDATE_START(thd->query());
|
|
break;
|
|
case SQLCOM_DELETE:
|
|
MYSQL_DELETE_START(thd->query());
|
|
break;
|
|
case SQLCOM_DELETE_MULTI:
|
|
MYSQL_MULTI_DELETE_START(thd->query());
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
|
|
static void MYSQL_DML_DONE(THD *thd, int rc)
|
|
{
|
|
switch (thd->lex->sql_command) {
|
|
|
|
case SQLCOM_UPDATE:
|
|
MYSQL_UPDATE_DONE(
|
|
rc,
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_found()),
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_updated()));
|
|
break;
|
|
case SQLCOM_UPDATE_MULTI:
|
|
MYSQL_MULTI_UPDATE_DONE(
|
|
rc,
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_found()),
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_updated()));
|
|
break;
|
|
case SQLCOM_DELETE:
|
|
MYSQL_DELETE_DONE(rc, (rc ? 0 : (ulong) (thd->get_row_count_func())));
|
|
break;
|
|
case SQLCOM_DELETE_MULTI:
|
|
MYSQL_MULTI_DELETE_DONE(
|
|
rc,
|
|
(rc ? 0 :
|
|
((multi_delete*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_deleted()));
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Perform actions needed before locking tables for a DML statement
|
|
|
|
@param thd global context the processed statement
|
|
@returns false if success, true if error
|
|
|
|
@details
|
|
This function calls the precheck() procedure fo the processed statement,
|
|
then is opens tables used in the statement and finally it calls the function
|
|
prepare_inner() that is specific for the type of the statement.
|
|
|
|
@note
|
|
The function are used when processing:
|
|
- a DML statement
|
|
- PREPARE stmt FROM <DML "statement>"
|
|
- EXECUTE stmt when stmt is prepared from a DML statement.
|
|
*/
|
|
|
|
bool Sql_cmd_dml::prepare(THD *thd)
|
|
{
|
|
lex= thd->lex;
|
|
SELECT_LEX_UNIT *unit= &lex->unit;
|
|
|
|
DBUG_ASSERT(!is_prepared());
|
|
|
|
// Perform a coarse statement-specific privilege check.
|
|
if (precheck(thd))
|
|
goto err;
|
|
|
|
MYSQL_DML_START(thd);
|
|
|
|
lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED;
|
|
|
|
if (open_tables_for_query(thd, lex->query_tables, &table_count, 0,
|
|
get_dml_prelocking_strategy()))
|
|
{
|
|
if (thd->is_error())
|
|
goto err;
|
|
(void)unit->cleanup();
|
|
return true;
|
|
}
|
|
|
|
if (prepare_inner(thd))
|
|
goto err;
|
|
|
|
lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED;
|
|
|
|
set_prepared();
|
|
unit->set_prepared();
|
|
|
|
return false;
|
|
|
|
err:
|
|
DBUG_ASSERT(thd->is_error());
|
|
DBUG_PRINT("info", ("report_error: %d", thd->is_error()));
|
|
|
|
(void)unit->cleanup();
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Execute a DML statement
|
|
|
|
@param thd global context the processed statement
|
|
@returns false if success, true if error
|
|
|
|
@details
|
|
The function assumes that each type of a DML statement has its own
|
|
implementation of the virtunal functions precheck(). It is also
|
|
assumed that that the virtual function execute execute_inner() is to be
|
|
overridden by the implementations for specific commands.
|
|
|
|
@note
|
|
Currently only UPDATE and DELETE statement are executed using this function.
|
|
*/
|
|
|
|
bool Sql_cmd_dml::execute(THD *thd)
|
|
{
|
|
lex = thd->lex;
|
|
bool res;
|
|
|
|
SELECT_LEX_UNIT *unit = &lex->unit;
|
|
SELECT_LEX *select_lex= lex->first_select_lex();
|
|
|
|
if (!is_prepared())
|
|
{
|
|
/*
|
|
This is called when processing
|
|
- a DML statement
|
|
- PREPARE stmt FROM <DML "statement>"
|
|
- EXECUTE stmt when stmt is prepared from a DML statement.
|
|
The call will invoke open_tables_for_query()
|
|
*/
|
|
if (prepare(thd))
|
|
goto err;
|
|
}
|
|
else // This branch currently is never used for DML commands
|
|
{
|
|
if (precheck(thd))
|
|
goto err;
|
|
|
|
MYSQL_DML_START(thd);
|
|
|
|
if (open_tables_for_query(thd, lex->query_tables, &table_count, 0,
|
|
get_dml_prelocking_strategy()))
|
|
goto err;
|
|
}
|
|
|
|
THD_STAGE_INFO(thd, stage_init);
|
|
|
|
/*
|
|
Locking of tables is done after preparation but before optimization.
|
|
This allows to do better partition pruning and avoid locking unused
|
|
partitions. As a consequence, in such a case, prepare stage can rely only
|
|
on metadata about tables used and not data from them.
|
|
*/
|
|
if (!is_empty_query())
|
|
{
|
|
if (lock_tables(thd, lex->query_tables, table_count, 0))
|
|
goto err;
|
|
}
|
|
|
|
unit->set_limit(select_lex);
|
|
|
|
/* Perform statement-specific execution */
|
|
res = execute_inner(thd);
|
|
|
|
if (res)
|
|
goto err;
|
|
|
|
thd->push_final_warnings();
|
|
res= unit->cleanup();
|
|
|
|
/* "Unprepare" this object since unit->cleanup actually unprepares */
|
|
unprepare(thd);
|
|
|
|
THD_STAGE_INFO(thd, stage_end);
|
|
|
|
MYSQL_DML_DONE(thd, res);
|
|
|
|
return res;
|
|
|
|
err:
|
|
DBUG_ASSERT(thd->is_error() || thd->killed);
|
|
MYSQL_DML_DONE(thd, 1);
|
|
THD_STAGE_INFO(thd, stage_end);
|
|
(void)unit->cleanup();
|
|
if (is_prepared())
|
|
unprepare(thd);
|
|
|
|
return thd->is_error();
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Generic implemention of optimization and execution phases
|
|
@param thd global context the processed statement
|
|
@returns false if success, true if error
|
|
|
|
@note
|
|
This implementation assumes that the processed DML statement is represented
|
|
as a SELECT_LEX or SELECT_LEX_UNIT tree with attached corresponding
|
|
JOIN structures. Any JOIN structure is constructed at the prepare phase.
|
|
When created at the top level join it is provided with an object of a class
|
|
derived from select_result_sink. The pointer to the object is saved in
|
|
the this->result field. For different types of DML statements different
|
|
derived classes are used for this object. The class of this object determines
|
|
additional specific actions performed at the phases of context analysis,
|
|
optimization and execution.
|
|
*/
|
|
|
|
bool Sql_cmd_dml::execute_inner(THD *thd)
|
|
{
|
|
SELECT_LEX_UNIT *unit = &lex->unit;
|
|
SELECT_LEX *select_lex= unit->first_select();
|
|
JOIN *join= select_lex->join;
|
|
|
|
if (join->optimize())
|
|
goto err;
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
join->conds_history= join->conds;
|
|
join->having_history= (join->having?join->having:join->tmp_having);
|
|
}
|
|
|
|
if (unlikely(thd->is_error()))
|
|
goto err;
|
|
|
|
if (join->exec())
|
|
goto err;
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
select_lex->where= join->conds_history;
|
|
select_lex->having= join->having_history;
|
|
}
|
|
|
|
err:
|
|
return join->error;
|
|
}
|
|
|
|
|
|
/**
|
|
@} (end of group Query_Optimizer)
|
|
*/
|