mirror of
https://github.com/MariaDB/server.git
synced 2025-03-23 07:28:40 +01:00

Allows index condition pushdown for reverse ordered scans, a previously disabled feature due to poor performance. This patch adds a new API to the handler class called set_end_range which allows callers to tell the handler what the end of the index range will be when scanning. Combined with a pushed index condition, the handler can scan the index efficiently and not read beyond the end of the given range. When checking if the pushed index condition matches, the handler will also check if scanning has reached the end of the provided range and stop if so. If we instead only enabled ICP for reverse ordered scans without also calling this new API, then the handler would perform unnecessary index condition checks. In fact this would continue until the end of the index is reached. These changes are agnostic of storage engine. That is, any storage engine that supports index condition pushdown will inhereit this new behavior as it is implemented in the SQL and storage engine API layers. The partitioned tables storage meta-engine (ha_partition) adds an override of set_end_range which recursively calls set_end_range on its child storage engine (handler) implementations. This commit updates the test made in an earlier commit to show that ICP matches happen for the reverse ordered case. This patch is based on changes written by Olav Sandstaa in MySQL commit da1d92fd46071cd86de61058b6ea39fd9affcd87
531 lines
17 KiB
C++
531 lines
17 KiB
C++
/*
|
|
Copyright (c) 2009, 2012, Monty Program Ab
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
#include "mariadb.h"
|
|
#include "sql_select.h"
|
|
#include "sql_test.h"
|
|
#include "opt_trace.h"
|
|
#include "opt_hints.h"
|
|
|
|
/*
|
|
Index Condition Pushdown Module
|
|
===============================
|
|
|
|
Storage Engine API
|
|
==================
|
|
SQL layer can push a condition to be checked for index tuple by calling
|
|
|
|
handler::idx_cond_push(uint keyno, Item *cond)
|
|
|
|
After that, the SQL layer is expected to start an index scan on the specified
|
|
index. The scan should be non-index-only (that is, do not use HA_EXTRA_KEYREAD
|
|
option).
|
|
|
|
Then, any call that reads rows from the index:
|
|
|
|
handler->some_index_read_function()
|
|
|
|
will check the index condition (see handler_index_cond_check()) and ignore
|
|
index tuples that do not match it.
|
|
|
|
Pushing index condition requires pushing end-of-range check, too
|
|
================================================================
|
|
|
|
Suppose we're computing
|
|
|
|
select *
|
|
from t1
|
|
where key1 between 10 and 20 and extra_index_cond
|
|
|
|
by using a range scan on (10 <= key1 <= 20) and pushing extra_index_cond as
|
|
pushed index condition.
|
|
SQL could use these calls to read rows:
|
|
|
|
h->idx_cond_push(key1, extra_index_cond);
|
|
h->index_read_map(key1=10, HA_READ_KEY_OR_NEXT); // (read-1)
|
|
while (h->index_next() != HA_ERR_END_OF_FILE) { // (read-2)
|
|
if (cmp_key(h->record, "key1=20" ) < 0)
|
|
break; // end of range
|
|
//process row.
|
|
}
|
|
|
|
Suppose an index read function above (either (read-1) or (read-2)) encounters
|
|
key1=21. Suppose extra_index_cond evaluates to false for this row. Then, it
|
|
will proceed to read next row, e.g. key1=22. If extra_index_cond again
|
|
evaluates to false it will continue further. This way, the index scan can
|
|
continue till the end of the index, ignoring the fact that we are not
|
|
interested in rows with key1>20.
|
|
|
|
The solution is: whenever ICP is used, the storage engine must be aware of the
|
|
end of the range being scanned so it can stop the scan as soon as it is reached.
|
|
|
|
End-of-range checks
|
|
===================
|
|
There are four call patterns:
|
|
|
|
1. Index Navigation commands. End of range check is setup with set_end_range
|
|
call:
|
|
|
|
handler->set_end_range(endpoint, direction);
|
|
handler->index_read_XXXX();
|
|
while (handler->index_next() == 0) // or index_prev()
|
|
{ ... }
|
|
|
|
2. Range Read API. set_end_range is called from read_range_first:
|
|
|
|
handler->read_range_first(start_range, end_range);
|
|
while (handler->read_range_next() == 0) { ... }
|
|
|
|
3. Equality lookups
|
|
|
|
handler->index_read_map(lookup_tuple, HA_READ_KEY_EXACT);
|
|
while (handler->index_next_same() == 0) { ... }
|
|
|
|
Here, set_end_range is not necessary, because index scanning code
|
|
will not read index tuples that do not match the lookup tuple.
|
|
|
|
4. multi_range_read calls.
|
|
These either fall-back to Range Read API or use their own ICP
|
|
implementation with its own ICP checks.
|
|
*/
|
|
|
|
|
|
/*
|
|
Check if given expression uses only table fields covered by the given index
|
|
|
|
SYNOPSIS
|
|
uses_index_fields_only()
|
|
item Expression to check
|
|
tbl The table having the index
|
|
keyno The index number
|
|
other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
|
|
|
|
DESCRIPTION
|
|
Check if given expression only uses fields covered by index #keyno in the
|
|
table tbl. The expression can use any fields in any other tables.
|
|
|
|
The expression is guaranteed not to be AND or OR - those constructs are
|
|
handled outside of this function.
|
|
|
|
RETURN
|
|
TRUE Yes
|
|
FALSE No
|
|
*/
|
|
|
|
bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
|
|
bool other_tbls_ok)
|
|
{
|
|
if (item->walk(&Item::limit_index_condition_pushdown_processor, FALSE, NULL))
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
if (item->const_item())
|
|
return TRUE;
|
|
|
|
/*
|
|
Don't push down the triggered conditions. Nested outer joins execution
|
|
code may need to evaluate a condition several times (both triggered and
|
|
untriggered), and there is no way to put this
|
|
TODO: Consider cloning the triggered condition and using the copies for:
|
|
1. push the first copy down, to have most restrictive index condition
|
|
possible
|
|
2. Put the second copy into tab->select_cond.
|
|
*/
|
|
if (item->type() == Item::FUNC_ITEM &&
|
|
((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC)
|
|
return FALSE;
|
|
|
|
if (!(item->used_tables() & tbl->map))
|
|
return other_tbls_ok;
|
|
|
|
Item::Type item_type= item->type();
|
|
switch (item_type) {
|
|
case Item::FUNC_ITEM:
|
|
{
|
|
/* This is a function, apply condition recursively to arguments */
|
|
Item_func *item_func= (Item_func*)item;
|
|
Item **child;
|
|
Item **item_end= (item_func->arguments()) + item_func->argument_count();
|
|
for (child= item_func->arguments(); child != item_end; child++)
|
|
{
|
|
if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
case Item::COND_ITEM:
|
|
{
|
|
/*
|
|
This is a AND/OR condition. Regular AND/OR clauses are handled by
|
|
make_cond_for_index() which will chop off the part that can be
|
|
checked with index. This code is for handling non-top-level AND/ORs,
|
|
e.g. func(x AND y).
|
|
*/
|
|
List_iterator<Item> li(*((Item_cond*)item)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
case Item::FIELD_ITEM:
|
|
{
|
|
Item_field *item_field= (Item_field*)item;
|
|
Field *field= item_field->field;
|
|
if (field->table != tbl)
|
|
return TRUE;
|
|
/*
|
|
The below is probably a repetition - the first part checks the
|
|
other two, but let's play it safe:
|
|
*/
|
|
if(!field->part_of_key.is_set(keyno) ||
|
|
field->type() == MYSQL_TYPE_GEOMETRY ||
|
|
field->type() == MYSQL_TYPE_BLOB)
|
|
return FALSE;
|
|
KEY *key_info= tbl->key_info + keyno;
|
|
KEY_PART_INFO *key_part= key_info->key_part;
|
|
KEY_PART_INFO *key_part_end= key_part + key_info->user_defined_key_parts;
|
|
for ( ; key_part < key_part_end; key_part++)
|
|
{
|
|
if (field->eq(key_part->field))
|
|
return !(key_part->key_part_flag & HA_PART_KEY_SEG);
|
|
}
|
|
if ((tbl->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
|
|
tbl->s->primary_key != MAX_KEY &&
|
|
tbl->s->primary_key != keyno)
|
|
{
|
|
key_info= tbl->key_info + tbl->s->primary_key;
|
|
key_part= key_info->key_part;
|
|
key_part_end= key_part + key_info->user_defined_key_parts;
|
|
for ( ; key_part < key_part_end; key_part++)
|
|
{
|
|
/*
|
|
It does not make sense to use the fact that the engine can read in
|
|
a full field if the key if the index is built only over a part
|
|
of this field.
|
|
*/
|
|
if (field->eq(key_part->field))
|
|
return !(key_part->key_part_flag & HA_PART_KEY_SEG);
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
case Item::REF_ITEM:
|
|
return uses_index_fields_only(item->real_item(), tbl, keyno,
|
|
other_tbls_ok);
|
|
default:
|
|
return FALSE; /* Play it safe, don't push unknown non-const items */
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Get a part of the condition that can be checked using only index fields
|
|
|
|
SYNOPSIS
|
|
make_cond_for_index()
|
|
cond The source condition
|
|
table The table that is partially available
|
|
keyno The index in the above table. Only fields covered by the
|
|
index are available
|
|
other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
|
|
|
|
DESCRIPTION
|
|
Get a part of the condition that can be checked when for the given table
|
|
we have values only of fields covered by some index. The condition may
|
|
refer to other tables, it is assumed that we have values of all of their
|
|
fields.
|
|
|
|
Example:
|
|
make_cond_for_index(
|
|
"cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
|
|
t2, keyno(t2.key1))
|
|
will return
|
|
"cond(t1.field) AND cond(t2.key2)"
|
|
|
|
RETURN
|
|
Index condition, or NULL if no condition could be inferred.
|
|
*/
|
|
|
|
static Item *make_cond_for_index(THD *thd, Item *cond, TABLE *table, uint keyno,
|
|
bool other_tbls_ok)
|
|
{
|
|
if (!cond || cond->basic_const_item())
|
|
return cond;
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
table_map used_tables= 0;
|
|
Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix= make_cond_for_index(thd, item, table, keyno, other_tbls_ok);
|
|
if (fix)
|
|
{
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
used_tables|= fix->used_tables();
|
|
}
|
|
}
|
|
switch (new_cond->argument_list()->elements) {
|
|
case 0:
|
|
return (COND*) 0;
|
|
case 1:
|
|
/* remove AND level if there is only one argument */
|
|
return new_cond->argument_list()->head();
|
|
default:
|
|
new_cond->quick_fix_field();
|
|
new_cond->used_tables_cache= used_tables;
|
|
return new_cond;
|
|
}
|
|
}
|
|
else /* It's OR */
|
|
{
|
|
Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix= make_cond_for_index(thd, item, table, keyno, other_tbls_ok);
|
|
if (!fix)
|
|
return (COND*) 0;
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
new_cond->quick_fix_field();
|
|
new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
|
|
new_cond->top_level_item();
|
|
return new_cond;
|
|
}
|
|
}
|
|
|
|
if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
|
|
return (COND*) 0;
|
|
return cond;
|
|
}
|
|
|
|
|
|
static Item *make_cond_remainder(THD *thd, Item *cond, TABLE *table, uint keyno,
|
|
bool other_tbls_ok, bool exclude_index)
|
|
{
|
|
if (exclude_index &&
|
|
uses_index_fields_only(cond, table, keyno, other_tbls_ok))
|
|
return 0;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
table_map tbl_map= 0;
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
/* Create new top level AND item */
|
|
Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix= make_cond_remainder(thd, item, table, keyno,
|
|
other_tbls_ok, exclude_index);
|
|
if (fix)
|
|
{
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
tbl_map |= fix->used_tables();
|
|
}
|
|
}
|
|
switch (new_cond->argument_list()->elements) {
|
|
case 0:
|
|
return (COND*) 0;
|
|
case 1:
|
|
return new_cond->argument_list()->head();
|
|
default:
|
|
new_cond->quick_fix_field();
|
|
((Item_cond*)new_cond)->used_tables_cache= tbl_map;
|
|
return new_cond;
|
|
}
|
|
}
|
|
else /* It's OR */
|
|
{
|
|
Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix= make_cond_remainder(thd, item, table, keyno,
|
|
other_tbls_ok, FALSE);
|
|
if (!fix)
|
|
return (COND*) 0;
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
tbl_map |= fix->used_tables();
|
|
}
|
|
new_cond->quick_fix_field();
|
|
((Item_cond*)new_cond)->used_tables_cache= tbl_map;
|
|
new_cond->top_level_item();
|
|
return new_cond;
|
|
}
|
|
}
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
Try to extract and push the index condition
|
|
|
|
SYNOPSIS
|
|
push_index_cond()
|
|
tab A join tab that has tab->table->file and its condition
|
|
in tab->select_cond
|
|
keyno Index for which extract and push the condition
|
|
|
|
DESCRIPTION
|
|
Try to extract and push the index condition down to table handler
|
|
*/
|
|
|
|
void push_index_cond(JOIN_TAB *tab, uint keyno)
|
|
{
|
|
DBUG_ENTER("push_index_cond");
|
|
Item *idx_cond;
|
|
|
|
/*
|
|
Backported the following from MySQL 5.6:
|
|
6. The index is not a clustered index. The performance improvement
|
|
of pushing an index condition on a clustered key is much lower
|
|
than on a non-clustered key. This restriction should be
|
|
re-evaluated when WL#6061 is implemented.
|
|
*/
|
|
if ((tab->table->key_info[keyno].index_flags & HA_DO_INDEX_COND_PUSHDOWN) &&
|
|
hint_key_state(tab->join->thd, tab->table, keyno, ICP_HINT_ENUM,
|
|
OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN) &&
|
|
tab->join->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
|
|
tab->join->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
|
|
tab->type != JT_CONST && tab->type != JT_SYSTEM &&
|
|
!tab->table->is_clustering_key(keyno)) // 6
|
|
{
|
|
DBUG_EXECUTE("where",
|
|
print_where(tab->select_cond, "full cond", QT_ORDINARY););
|
|
|
|
idx_cond= make_cond_for_index(tab->join->thd, tab->select_cond, tab->table,
|
|
keyno, tab->icp_other_tables_ok);
|
|
|
|
DBUG_EXECUTE("where",
|
|
print_where(idx_cond, "idx cond", QT_ORDINARY););
|
|
|
|
if (idx_cond)
|
|
{
|
|
Item *idx_remainder_cond= 0;
|
|
tab->pre_idx_push_select_cond= tab->select_cond;
|
|
Json_writer_object trace(tab->join->thd);
|
|
trace.add_table_name(tab);
|
|
/*
|
|
For BKA cache we store condition to special BKA cache field
|
|
because evaluation of the condition requires additional operations
|
|
before the evaluation. This condition is used in
|
|
JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
|
|
*/
|
|
if (tab->use_join_cache &&
|
|
/*
|
|
if cache is used then the value is TRUE only
|
|
for BKA[_UNIQUE] cache (see check_join_cache_usage func).
|
|
*/
|
|
tab->icp_other_tables_ok &&
|
|
(idx_cond->used_tables() &
|
|
~(tab->table->map | tab->join->const_table_map)))
|
|
tab->cache_idx_cond= idx_cond;
|
|
else
|
|
{
|
|
idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
|
|
|
|
/*
|
|
If (1) there is an index condition that we couldn't push using ICP,
|
|
(2) we are using Join Buffering
|
|
(3) and we are using BKA
|
|
then use BKA's Index Condition Pushdown mechanism to check it.
|
|
*/
|
|
if (idx_remainder_cond && tab->use_join_cache && // (1) && (2)
|
|
tab->icp_other_tables_ok) // (3)
|
|
{
|
|
tab->cache_idx_cond= idx_remainder_cond;
|
|
idx_remainder_cond= NULL;
|
|
}
|
|
}
|
|
trace.add("index_condition", idx_cond);
|
|
|
|
/*
|
|
Disable eq_ref's "lookup cache" if we've pushed down an index
|
|
condition.
|
|
TODO: This check happens to work on current ICP implementations, but
|
|
there may exist a compliant implementation that will not work
|
|
correctly with it. Sort this out when we stabilize the condition
|
|
pushdown APIs.
|
|
*/
|
|
if (idx_remainder_cond != idx_cond)
|
|
tab->ref.disable_cache= TRUE;
|
|
|
|
Item *row_cond= tab->idx_cond_fact_out ?
|
|
make_cond_remainder(tab->join->thd, tab->select_cond,
|
|
tab->table, keyno,
|
|
tab->icp_other_tables_ok, TRUE) :
|
|
tab->pre_idx_push_select_cond;
|
|
|
|
DBUG_EXECUTE("where",
|
|
print_where(row_cond, "remainder cond", QT_ORDINARY););
|
|
|
|
if (row_cond)
|
|
{
|
|
if (!idx_remainder_cond)
|
|
tab->select_cond= row_cond;
|
|
else
|
|
{
|
|
COND *new_cond= new (tab->join->thd->mem_root)
|
|
Item_cond_and(tab->join->thd, row_cond, idx_remainder_cond);
|
|
tab->select_cond= new_cond;
|
|
tab->select_cond->quick_fix_field();
|
|
((Item_cond_and*)tab->select_cond)->used_tables_cache=
|
|
row_cond->used_tables() | idx_remainder_cond->used_tables();
|
|
}
|
|
}
|
|
else
|
|
tab->select_cond= idx_remainder_cond;
|
|
|
|
if (tab->select_cond)
|
|
trace.add("row_condition", tab->select_cond);
|
|
|
|
if (tab->select)
|
|
{
|
|
DBUG_EXECUTE("where",
|
|
print_where(tab->select->cond,
|
|
"select_cond",
|
|
QT_ORDINARY););
|
|
|
|
tab->select->cond= tab->select_cond;
|
|
tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond;
|
|
}
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|