mirror of
https://github.com/MariaDB/server.git
synced 2025-08-05 10:01:36 +02:00

Also expand vcol field index coverings to include indexes covering all the fields in the expression. The reasoning goes as follows: let f(c1, c2, ..., cn) be a function on applied to columns c1, c2, ..., cn, if f(...) is covered by an index, so should vc whose expression is f(...). For example, if t.vf = t.c1 + t.c2, and t has three indexes (vf), (c1, c2), (c1). Before this change, vf's index covering is a singleton {(vf)}. Let's call that the "conventional" index covering. After this change vf's index covering is now {(vf), (c1, c2)}, since (c1, c2) covers both c1 and c2. Let's call (c1, c2) in this case the "extra" covering. With the coverings updated, when an index in the "extra" covering is chosen for keyread, the vcol also needs to be calculated. In this case we mark vcol in the table read_set, and ensure it is computed. With these changes, we see various improvements, including from using full table scan + filesort to full index scan + filesort when ORDER BY an indexed vcol (here vc = c + 1 is a vcol and both c and vc are indexes): explain select c + 1 from t order by vc; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t ALL NULL NULL NULL NULL 10000 Using filesort +1 SIMPLE t index NULL c 5 NULL 10000 Using index; Using filesort The substitutions are followed updates to all_fields which include a copy of the ORDER BY/GROUP BY item pointers, as well as corresponding updates to ref_pointer_array so that the all_fields and ref_pointer_array remain in sync. Another, related change is the recomputation of table index covering on substitutions. It not only reflects the correct table index covering after the substitutions, but also improve executions where the vcol index can be chosen, such as this example (here vc = c + 1 and vc is the only index in the table), from full table scan + filesort to full index scan: select vc from t order by c + 1; We do it in SELECT as well as in single table DELETE/UPDATE.
572 lines
17 KiB
C++
572 lines
17 KiB
C++
/*
|
|
Copyright (c) 2009, 2021, MariaDB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
|
|
#ifdef USE_PRAGMA_IMPLEMENTATION
|
|
#pragma implementation // gcc: Class implementation
|
|
#endif
|
|
|
|
#include "mariadb.h"
|
|
#include "sql_priv.h"
|
|
#include <m_ctype.h>
|
|
#include "sql_select.h"
|
|
#include "opt_trace.h"
|
|
|
|
/**
|
|
@file
|
|
|
|
@brief
|
|
Virtual Column Substitution feature makes the optimizer recognize usage of
|
|
virtual column expressions in the WHERE/ON clauses. If there is an index
|
|
on the virtual column, the optimizer is able to construct query plans that
|
|
use that index.
|
|
*/
|
|
|
|
|
|
/*
|
|
== Virtual Column Substitution In a Nutshell ==
|
|
|
|
Consider a table that defines a virtual column and an index on it:
|
|
|
|
CREATE TABLE t1 (
|
|
json_col BLOB,
|
|
...
|
|
vcol1 VARCHAR(100) AS (json_extract(json_col, '$.name')), //(*), see below
|
|
INDEX idx1(vcol1)
|
|
);
|
|
|
|
And then a query that uses virtual column's expression:
|
|
|
|
SELECT * FROM t1 WHERE json_extract(json_col, '$.name')='foo'
|
|
|
|
We'd like this query to use index idx1.
|
|
In order to achieve that, we look through potentially sargable conditions
|
|
to find the virtual column expression (we only accept exact match) and
|
|
replace it with a reference to virtual column field so the query becomes:
|
|
|
|
SELECT * FROM t1 WHERE vcol1='foo'
|
|
|
|
Then, the optimizer is able to construct ref access on index idx1.
|
|
|
|
(*) When extracting JSON fields in the real world, you'll probably want to
|
|
use json_unquote().
|
|
|
|
== Datatypes must match ==
|
|
|
|
The type of vcol_field and vcol_expr may not match. Consider
|
|
|
|
CREATE TABLE t1 (
|
|
a varchar(10),
|
|
vcol INT as CONCAT(a,'1')
|
|
);
|
|
|
|
and conditions
|
|
|
|
concat(a,'1')=1.5 vs vcol=1.5.
|
|
|
|
== The same expression in multiple virtual columns ==
|
|
|
|
What if there are multiple options to replace:
|
|
|
|
CREATE TABLE t1 (
|
|
col1 int,
|
|
...
|
|
vcol1 INT as (col1 + 1),
|
|
vcol2 INT as (col1 + 1),
|
|
...
|
|
INDEX idx1(vcol1, ...)
|
|
INDEX idx2(vcol2, ...)
|
|
);
|
|
SELECT * FROM t1 WHERE col1 + 1 = 5;
|
|
|
|
Currently, we will replace with the first matching column (vcol1), like MySQL
|
|
does. Since we rely on the user to define the virtual columns, we can
|
|
request that they define one virtual column instead of multiple identical
|
|
ones.
|
|
*/
|
|
|
|
|
|
class Vcol_subst_context
|
|
{
|
|
public:
|
|
THD *thd;
|
|
/* Indexed virtual columns that we can try substituting */
|
|
List<Field> vcol_fields;
|
|
|
|
/*
|
|
How many times substitution was done. Used to determine whether to print
|
|
the conversion info to the Optimizer Trace
|
|
*/
|
|
uint subst_count;
|
|
|
|
Vcol_subst_context(THD *thd_arg) : thd(thd_arg) {}
|
|
};
|
|
|
|
static Field *is_vcol_expr(Vcol_subst_context *ctx, const Item *item);
|
|
static
|
|
void subst_vcol_if_compatible(Vcol_subst_context *ctx,
|
|
Item_bool_func *cond,
|
|
Item **vcol_expr_ref,
|
|
Field *vcol_field);
|
|
|
|
static
|
|
bool collect_indexed_vcols_for_table(TABLE *table, List<Field> *vcol_fields)
|
|
{
|
|
// TODO: Make use of iterator to loop through keys_in_use_for_query, instead.
|
|
for (uint i=0; i < table->s->keys; i++)
|
|
{
|
|
// note: we could also support histograms here
|
|
if (!table->keys_in_use_for_query.is_set(i))
|
|
continue;
|
|
|
|
KEY *key= &table->key_info[i];
|
|
for (uint kp=0; kp < key->user_defined_key_parts; kp++)
|
|
{
|
|
Field *field= key->key_part[kp].field;
|
|
if (field->vcol_info && vcol_fields->push_back(field))
|
|
return TRUE; // Out of memory
|
|
}
|
|
}
|
|
return FALSE; // Ok
|
|
}
|
|
|
|
|
|
/*
|
|
Collect a list of indexed virtual columns in the JOIN's tables
|
|
*/
|
|
|
|
static
|
|
bool collect_indexed_vcols_for_join(JOIN *join, List<Field> *vcol_fields)
|
|
{
|
|
List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
|
|
TABLE_LIST *tl;
|
|
while ((tl= ti++))
|
|
{
|
|
if (!tl->table) // non-merged semi-join or something like that
|
|
continue;
|
|
if (collect_indexed_vcols_for_table(tl->table, vcol_fields))
|
|
return TRUE; // Out of memory
|
|
}
|
|
return FALSE; // Ok
|
|
}
|
|
|
|
|
|
/* Substitute virtual columns in an Item tree */
|
|
static void subst_vcols_in_item(Vcol_subst_context *ctx, Item *item,
|
|
const char *location)
|
|
{
|
|
uchar *yes= (uchar*) 1;
|
|
ctx->subst_count= 0;
|
|
|
|
item->top_level_compile(ctx->thd,
|
|
&Item::vcol_subst_analyzer, &yes,
|
|
&Item::vcol_subst_transformer, (uchar*)ctx);
|
|
|
|
if (ctx->subst_count && unlikely(ctx->thd->trace_started()))
|
|
trace_condition(ctx->thd, location, "virtual_column_substitution", item);
|
|
}
|
|
|
|
|
|
static
|
|
void subst_vcols_in_join_list(Vcol_subst_context *ctx,
|
|
List<TABLE_LIST> *join_list)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
while ((table= li++))
|
|
{
|
|
if (NESTED_JOIN* nested_join= table->nested_join)
|
|
subst_vcols_in_join_list(ctx, &nested_join->join_list);
|
|
|
|
if (table->on_expr)
|
|
subst_vcols_in_item(ctx, table->on_expr, "ON expression");
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Substitute vcol expressions with vcol fields in ORDER BY or GROUP
|
|
BY, and re-initialise affected tables on substitution.
|
|
*/
|
|
static
|
|
void subst_vcols_in_order(Vcol_subst_context *ctx,
|
|
ORDER *order,
|
|
JOIN *join,
|
|
bool is_group_by)
|
|
{
|
|
Field *vcol_field;
|
|
const char *location= is_group_by ? "GROUP BY" : "ORDER BY";
|
|
for (; order; order= order->next)
|
|
{
|
|
Item *item= *order->item;
|
|
uint old_count= ctx->subst_count;
|
|
/*
|
|
Extra safety: do not rewrite if there is no room in
|
|
ref_pointer_array's slices (see st_select_lex::setup_ref_array)
|
|
This check shouldn't fail, but it's better to have it just in
|
|
case.
|
|
*/
|
|
if (join->all_fields.elements * 5 >=
|
|
join->select_lex->ref_pointer_array.size() - 1)
|
|
break;
|
|
|
|
if ((vcol_field= is_vcol_expr(ctx, item)))
|
|
subst_vcol_if_compatible(ctx, NULL, order->item, vcol_field);
|
|
if (ctx->subst_count > old_count)
|
|
{
|
|
Item *new_item= *order->item;
|
|
/*
|
|
If the old ORDER BY item is a SELECT item, then insert the new
|
|
item to all_fields and keep it in sync with ref_pointer_array.
|
|
Otherwise it is safe to replace the old item with the new item
|
|
in all_fields.
|
|
*/
|
|
if (order->in_field_list)
|
|
{
|
|
uint el= join->all_fields.elements;
|
|
join->all_fields.push_front(new_item);
|
|
join->select_lex->ref_pointer_array[el]= new_item;
|
|
order->item= &join->select_lex->ref_pointer_array[el];
|
|
order->in_field_list= false;
|
|
}
|
|
/*
|
|
TODO: should we deduplicate by calling find_item_in_list on
|
|
new_item like in find_order_in_list, and remove item instead
|
|
of replacing it if new_item is already in all_fields?
|
|
*/
|
|
else
|
|
{
|
|
List_iterator<Item> it(join->all_fields);
|
|
while (Item *item_in_all_fields= it++)
|
|
{
|
|
if (item_in_all_fields == item)
|
|
it.replace(new_item);
|
|
}
|
|
}
|
|
/*
|
|
Re-initialise index covering of affected tables, which will
|
|
be re-computed to account for the substitution.
|
|
*/
|
|
TABLE *tab= vcol_field->table;
|
|
tab->covering_keys= tab->s->keys_for_keyread;
|
|
tab->covering_keys.intersect(tab->keys_in_use_for_query);
|
|
if (unlikely(ctx->thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_wrapper(ctx->thd);
|
|
Json_writer_object trace_order_by(ctx->thd, "virtual_column_substitution");
|
|
trace_order_by.add("location", location);
|
|
trace_order_by.add("from", item);
|
|
trace_order_by.add("to", new_item);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
@brief
|
|
Do substitution for all condition in a JOIN, and all ORDER BY and
|
|
GROUP BY items. This is the primary entry point. Recount field
|
|
types and re-compute index coverings when any substitution has
|
|
happened in ORDER BY or GROUP BY.
|
|
*/
|
|
|
|
bool substitute_indexed_vcols_for_join(JOIN *join)
|
|
{
|
|
Vcol_subst_context ctx(join->thd);
|
|
if (collect_indexed_vcols_for_join(join, &ctx.vcol_fields))
|
|
return true; // Out of memory
|
|
|
|
if (!ctx.vcol_fields.elements)
|
|
return false; // Ok, nothing to do
|
|
|
|
if (join->conds)
|
|
subst_vcols_in_item(&ctx, join->conds, "WHERE");
|
|
if (join->join_list)
|
|
subst_vcols_in_join_list(&ctx, join->join_list);
|
|
ctx.subst_count= 0;
|
|
if (join->order)
|
|
subst_vcols_in_order(&ctx, join->order, join, false);
|
|
if (join->group_list)
|
|
subst_vcols_in_order(&ctx, join->group_list, join, true);
|
|
if (ctx.subst_count)
|
|
{
|
|
count_field_types(join->select_lex, &join->tmp_table_param,
|
|
join->all_fields, 0);
|
|
join->select_lex->update_used_tables();
|
|
}
|
|
|
|
if (join->thd->is_error())
|
|
return true; // Out of memory
|
|
return false; // Ok
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Do substitution for one table and condition. This is for single-table
|
|
UPDATE/DELETE.
|
|
*/
|
|
|
|
bool substitute_indexed_vcols_for_table(TABLE *table, Item *item,
|
|
ORDER *order, SELECT_LEX *select_lex)
|
|
{
|
|
Vcol_subst_context ctx(table->in_use);
|
|
if (collect_indexed_vcols_for_table(table, &ctx.vcol_fields))
|
|
return true; // Out of memory
|
|
|
|
if (!ctx.vcol_fields.elements)
|
|
return false; // Ok, nothing to do
|
|
|
|
if (item)
|
|
subst_vcols_in_item(&ctx, item, "WHERE");
|
|
|
|
ctx.subst_count= 0; /* used in subst_vcol_in_order */
|
|
if (order)
|
|
subst_vcols_in_order(&ctx, order, select_lex->join, false);
|
|
|
|
if (table->in_use->is_error())
|
|
return true; // Out of memory
|
|
|
|
return false; // Ok
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Check if passed item matches Virtual Column definition for some column in
|
|
the Vcol_subst_context list.
|
|
*/
|
|
|
|
static Field *is_vcol_expr(Vcol_subst_context *ctx, const Item *item)
|
|
{
|
|
table_map map= item->used_tables();
|
|
if ((map!=0) && !(map & OUTER_REF_TABLE_BIT) &&
|
|
!(map & (map - 1))) // has exactly one bit set
|
|
{
|
|
List_iterator<Field> it(ctx->vcol_fields);
|
|
Field *field;
|
|
while ((field= it++))
|
|
{
|
|
if (field->vcol_info->expr->eq(item, true))
|
|
return field;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Produce a warning similar to raise_note_cannot_use_key_part().
|
|
*/
|
|
|
|
void print_vcol_subst_warning(THD *thd, Field *field, Item *expr,
|
|
const char *cause)
|
|
{
|
|
StringBuffer<128> expr_buffer;
|
|
size_t expr_length;
|
|
|
|
expr->print(&expr_buffer, QT_EXPLAIN);
|
|
expr_length= Well_formed_prefix(expr_buffer.charset(),
|
|
expr_buffer.ptr(),
|
|
MY_MIN(expr_buffer.length(), 64)).length();
|
|
|
|
push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
|
|
ER_UNKNOWN_ERROR,
|
|
"Cannot substitute virtual column expression %*s -> %*s "
|
|
"due to %s",
|
|
(int) expr_length, expr_buffer.c_ptr_safe(),
|
|
(int) field->field_name.length, field->field_name.str,
|
|
cause);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Check if we can substitute (*vcol_expr_ref) with vcol_field in condition
|
|
cond and if we can, do the substitution.
|
|
|
|
@detail
|
|
Generally, we can do the substitution if the condition after substitution
|
|
is equivalent to the condition before the substitution.
|
|
|
|
They are equivalent if the datatypes of vcol_field and *vcol_expr_ref are
|
|
the same. This requirement can be relaxed - one can come up with cases
|
|
where the datatypes are different but substitution doesn't change the
|
|
condition.
|
|
|
|
Note that the data type of the virtual column is specified by the user and
|
|
so can be totally different from virtual column's expression. For example,
|
|
one can do:
|
|
|
|
col3 INT AS (CONCAT(col1, col2))
|
|
|
|
For strings, we allow two cases:
|
|
- vcol_expr and vcol_field have the same collation
|
|
- vcol_field has the same collation as the condition's comparison collation
|
|
|
|
(Note: MySQL calls resolve_type() after it has done the substitution.
|
|
This can potentially update the comparator. The idea is that this
|
|
shouldn't be necessary as we do not want to change the comparator.
|
|
Changing the comparator will change the semantics of the condition,
|
|
our point is that this must not happen)
|
|
|
|
@return
|
|
None.
|
|
In case OOM error happens in this function, we have no way to propagate
|
|
the error out of Item::compile(). So, the code that calls Item::compile()
|
|
for vcol substitution will check thd->is_error() afterwards.
|
|
*/
|
|
|
|
static
|
|
void subst_vcol_if_compatible(Vcol_subst_context *ctx,
|
|
Item_bool_func *cond,
|
|
Item **vcol_expr_ref,
|
|
Field *vcol_field)
|
|
{
|
|
Item *vcol_expr= *vcol_expr_ref;
|
|
THD *thd= ctx->thd;
|
|
|
|
const char *fail_cause= NULL;
|
|
if (vcol_expr->type_handler_for_comparison() !=
|
|
vcol_field->type_handler_for_comparison() ||
|
|
(vcol_expr->maybe_null() && !vcol_field->maybe_null()))
|
|
fail_cause="type mismatch";
|
|
else
|
|
{
|
|
CHARSET_INFO *cs= cond ? cond->compare_collation() : NULL;
|
|
if (vcol_expr->collation.collation != vcol_field->charset() &&
|
|
cs != vcol_field->charset())
|
|
fail_cause="collation mismatch";
|
|
}
|
|
|
|
if (fail_cause)
|
|
{
|
|
if (thd->give_notes_for_unusable_keys())
|
|
print_vcol_subst_warning(thd, vcol_field, vcol_expr, fail_cause);
|
|
return;
|
|
}
|
|
Item_field *itf= new (thd->mem_root) Item_field(thd, vcol_field);
|
|
if (!itf)
|
|
return; // Out of memory, caller will know from thd->is_error()
|
|
bitmap_set_bit(vcol_field->table->read_set, vcol_field->field_index);
|
|
DBUG_ASSERT(itf->fixed());
|
|
thd->change_item_tree(vcol_expr_ref, itf);
|
|
ctx->subst_count++;
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Do a quick and imprecise check if it makes sense to try Virtual Column
|
|
Substitution transformation for this item.
|
|
|
|
@detail
|
|
For vcol_expr='FOO' the item to be trans formed is the comparison item
|
|
(Item_func_eq in this example), not the item representing vcol_expr.
|
|
*/
|
|
|
|
bool Item::vcol_subst_analyzer(uchar **)
|
|
{
|
|
const ulonglong allowed_cmp_funcs=
|
|
Item_func::BITMAP_EQ |
|
|
Item_func::BITMAP_EQUAL |
|
|
Item_func::BITMAP_LT |
|
|
Item_func::BITMAP_GT |
|
|
Item_func::BITMAP_LE |
|
|
Item_func::BITMAP_GE |
|
|
Item_func::BITMAP_BETWEEN |
|
|
Item_func::BITMAP_IN |
|
|
Item_func::BITMAP_ISNULL |
|
|
Item_func::BITMAP_ISNOTNULL;
|
|
|
|
Item::Type this_type= type();
|
|
/*
|
|
Do transformation
|
|
1. Inside AND/OR
|
|
2. In selected list of comparison predicates
|
|
*/
|
|
return (this_type == Item::COND_ITEM || // (1)
|
|
(this_type == Item::FUNC_ITEM && // (2)
|
|
(((Item_func*)this)->bitmap_bit() & allowed_cmp_funcs))); // (2)
|
|
}
|
|
|
|
|
|
Item* Item_bool_rowready_func2::vcol_subst_transformer(THD *thd, uchar *arg)
|
|
{
|
|
DBUG_ASSERT(this->vcol_subst_analyzer(NULL));
|
|
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
|
|
Field *vcol_field;
|
|
Item **vcol_expr;
|
|
|
|
if (!args[0]->used_tables() && (vcol_field= is_vcol_expr(ctx, args[1])))
|
|
vcol_expr= &args[1];
|
|
else if (!args[1]->used_tables() && (vcol_field= is_vcol_expr(ctx, args[0])))
|
|
vcol_expr= &args[0];
|
|
else
|
|
return this; /* No substitution */
|
|
|
|
DBUG_EXECUTE_IF("vcol_subst_simulate_oom",
|
|
DBUG_SET("+d,simulate_out_of_memory"););
|
|
|
|
subst_vcol_if_compatible(ctx, this, vcol_expr, vcol_field);
|
|
|
|
DBUG_EXECUTE_IF("vcol_subst_simulate_oom",
|
|
DBUG_SET("-d,vcol_subst_simulate_oom"););
|
|
return this;
|
|
}
|
|
|
|
|
|
Item* Item_func_between::vcol_subst_transformer(THD *thd, uchar *arg)
|
|
{
|
|
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
|
|
Field *vcol_field;
|
|
if (!args[1]->used_tables() &&
|
|
!args[2]->used_tables() &&
|
|
(vcol_field= is_vcol_expr(ctx, args[0])))
|
|
{
|
|
subst_vcol_if_compatible(ctx, this, &args[0], vcol_field);
|
|
}
|
|
return this;
|
|
}
|
|
|
|
|
|
Item* Item_func_null_predicate::vcol_subst_transformer(THD *thd, uchar *arg)
|
|
{
|
|
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
|
|
Field *vcol_field;
|
|
if ((vcol_field= is_vcol_expr(ctx, args[0])))
|
|
subst_vcol_if_compatible(ctx, this, &args[0], vcol_field);
|
|
return this;
|
|
}
|
|
|
|
|
|
Item* Item_func_in::vcol_subst_transformer(THD *thd, uchar *arg)
|
|
{
|
|
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
|
|
|
|
/* Check that all arguments inside IN() are constants */
|
|
if (!compatible_types_scalar_bisection_possible())
|
|
return this;
|
|
|
|
Field *vcol_field;
|
|
if ((vcol_field= is_vcol_expr(ctx, args[0])))
|
|
subst_vcol_if_compatible(ctx, this, &args[0], vcol_field);
|
|
return this;
|
|
}
|
|
|