mirror of
https://github.com/MariaDB/server.git
synced 2025-01-24 07:44:22 +01:00
MDEV-34911 Sargable substr(col, 1, n) = str
Make Item_func_eq of the following forms sargable by updating the relevant range analysis methods: 1. substr(col, 1, n) = str 2. str = substr(col, 1, n) 3. left(col, n) = str 4. str = left(col, n) where col is a indexed column and str is a const and inexpensive item of length n. We do this by factoring out Item_func_like::get_mm_leaf() and apply it to a string obtained from escaping str and then appending a wildcard "%" to it. The addition of the two Functype enums, LEFT_FUNC and SUBSTR_FUNC, requires changes in the spider group by handler to continue handling LEFT and SUBSTR correctly. Co-authored-by: Yuchen Pei <ycp@mariadb.com> Co-authored-by: Sergei Petrunia <sergey@mariadb.com>
This commit is contained in:
parent
ae998c22b2
commit
e021770667
13 changed files with 673 additions and 107 deletions
|
@ -73,6 +73,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
|
|||
../sql/net_serv.cc ../sql/opt_range.cc
|
||||
../sql/opt_rewrite_date_cmp.cc
|
||||
../sql/opt_rewrite_remove_casefold.cc
|
||||
../sql/opt_sargable_left.cc
|
||||
../sql/opt_sum.cc
|
||||
../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc
|
||||
../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc
|
||||
|
|
|
@ -5806,4 +5806,154 @@ coercibility(database()) 3
|
|||
#
|
||||
# End of 11.5 tests
|
||||
#
|
||||
#
|
||||
# MDEV-34911 Make conditions SUBSTR(col, 1, n) = const_str sargable
|
||||
#
|
||||
create table t (c varchar(5), key (c));
|
||||
insert into t values ('ddd'), ('bbcd'), ('bba'), ('b%_cd'), ('aaa');
|
||||
explain select * from t where substr(c, 1, 2) = 'bb';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where substr(c, 1, 2) = 'bb';
|
||||
c
|
||||
bba
|
||||
bbcd
|
||||
explain select * from t where 'bb' = substr(c, 1, 2);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where 'bb' = substr(c, 1, 2);
|
||||
c
|
||||
bba
|
||||
bbcd
|
||||
explain select * from t where left(c, 2) = 'bb';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where left(c, 2) = 'bb';
|
||||
c
|
||||
bba
|
||||
bbcd
|
||||
explain select * from t where 'bb' = left(c, 2);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where 'bb' = left(c, 2);
|
||||
c
|
||||
bba
|
||||
bbcd
|
||||
explain select * from t where substr(c, 1, 4) = 'b%_c';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 1 Using where; Using index
|
||||
select * from t where substr(c, 1, 4) = 'b%_c';
|
||||
c
|
||||
b%_cd
|
||||
explain select * from t where substr(c, 1, 2) = concat('b', 'b');
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where substr(c, 1, 2) = concat('b', 'b');
|
||||
c
|
||||
bba
|
||||
bbcd
|
||||
explain select * from t where substr(c, 1, 2) = substr('bb1', 1, 2);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where substr(c, 1, 2) = substr('bb1', 1, 2);
|
||||
c
|
||||
bba
|
||||
bbcd
|
||||
create table t1 (a varchar(32), b int, index(a));
|
||||
insert into t1 select seq, seq / 33 from seq_1_to_100;
|
||||
explain select * from t1 where substr(a, 1, 2)='80';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 range a a 35 NULL 1 Using index condition
|
||||
explain select * from t1 where substr(a, 1, b)='80';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 range a a 35 NULL 1 Using where
|
||||
create view v1 as select a,b from t1;
|
||||
explain select * from v1 where substr(a, 1, 2)='80';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 range a a 35 NULL 1 Using index condition
|
||||
drop view v1;
|
||||
drop table t1;
|
||||
explain select * from t where substr(c, 1, 1) = 'bb';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where substr(c, 1, 1) = 'bb';
|
||||
c
|
||||
explain select * from t where substr(c, 1, 3) = 'bb';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where substr(c, 1, 3) = 'bb';
|
||||
c
|
||||
explain select * from t where substr(c, 1) = 'bb';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
|
||||
select * from t where substr(c, 1) = 'bb';
|
||||
c
|
||||
explain select * from t where substr(c, 1, 2) = substr(c, 2, 3);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t index NULL c 8 NULL 5 Using where; Using index
|
||||
select * from t where substr(c, 1, 2) = substr(c, 2, 3);
|
||||
c
|
||||
aaa
|
||||
ddd
|
||||
drop index c on t;
|
||||
explain select * from t where substr(c, 1, 2) = 'bb';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t ALL NULL NULL NULL NULL 5 Using where
|
||||
select * from t where substr(c, 1, 2) = 'bb';
|
||||
c
|
||||
bbcd
|
||||
bba
|
||||
drop table t;
|
||||
set @old_collation_connection=@@collation_connection;
|
||||
SET collation_connection=utf32_czech_ci;
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('%c'),('%ce'),('%cé'),('%ch'), ('c'), ('d'), ('c%'), ('c\\g');
|
||||
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
|
||||
s1
|
||||
%c
|
||||
%ce
|
||||
%cé
|
||||
%ch
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
|
||||
s1
|
||||
%c
|
||||
%ce
|
||||
%cé
|
||||
%ch
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
|
||||
s1
|
||||
c\g
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
|
||||
s1
|
||||
c\g
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
|
||||
s1
|
||||
%c
|
||||
%ce
|
||||
%cé
|
||||
%ch
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
|
||||
s1
|
||||
%c
|
||||
%ce
|
||||
%cé
|
||||
%ch
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
|
||||
s1
|
||||
c\g
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
|
||||
s1
|
||||
c\g
|
||||
DROP TABLE t1;
|
||||
set collation_connection=@old_collation_connection;
|
||||
create table t1 (a varchar(100), b varchar(100), key(a));
|
||||
insert into t1 select seq, seq from seq_1_to_1000;
|
||||
explain select * from t1 where LEFT(a, 20) ='%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%';
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL a NULL NULL NULL 1000 Using where
|
||||
drop table t1;
|
||||
#
|
||||
# End of 11.8 tests
|
||||
#
|
||||
ALTER DATABASE test CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci;
|
||||
|
|
|
@ -2712,4 +2712,120 @@ SELECT
|
|||
--echo # End of 11.5 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-34911 Make conditions SUBSTR(col, 1, n) = const_str sargable
|
||||
--echo #
|
||||
|
||||
create table t (c varchar(5), key (c));
|
||||
insert into t values ('ddd'), ('bbcd'), ('bba'), ('b%_cd'), ('aaa');
|
||||
|
||||
# positive cases
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 2) = 'bb';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where 'bb' = substr(c, 1, 2);
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where left(c, 2) = 'bb';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where 'bb' = left(c, 2);
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 4) = 'b%_c';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
# RHS can_eval_in_optimize()
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 2) = concat('b', 'b');
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
# RHS can_eval_in_optimize()
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 2) = substr('bb1', 1, 2);
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
# works with view
|
||||
create table t1 (a varchar(32), b int, index(a));
|
||||
insert into t1 select seq, seq / 33 from seq_1_to_100;
|
||||
explain select * from t1 where substr(a, 1, 2)='80';
|
||||
# The range is a necessary condition, and further filtering happens in
|
||||
# the exec stage
|
||||
explain select * from t1 where substr(a, 1, b)='80';
|
||||
create view v1 as select a,b from t1;
|
||||
explain select * from v1 where substr(a, 1, 2)='80';
|
||||
drop view v1;
|
||||
drop table t1;
|
||||
|
||||
# negative cases
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 1) = 'bb';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 3) = 'bb';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1) = 'bb';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 2) = substr(c, 2, 3);
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
# full table scan when no index on sargable substr
|
||||
drop index c on t;
|
||||
|
||||
let $query=
|
||||
select * from t where substr(c, 1, 2) = 'bb';
|
||||
eval explain $query;
|
||||
eval $query;
|
||||
|
||||
drop table t;
|
||||
|
||||
# czech has the same behaviour as in LIKE
|
||||
set @old_collation_connection=@@collation_connection;
|
||||
SET collation_connection=utf32_czech_ci;
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('%c'),('%ce'),('%cé'),('%ch'), ('c'), ('d'), ('c%'), ('c\\g');
|
||||
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
|
||||
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
|
||||
DROP TABLE t1;
|
||||
set collation_connection=@old_collation_connection;
|
||||
|
||||
# no stack overrun escaping long string
|
||||
create table t1 (a varchar(100), b varchar(100), key(a));
|
||||
insert into t1 select seq, seq from seq_1_to_1000;
|
||||
explain select * from t1 where LEFT(a, 20) ='%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%';
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 11.8 tests
|
||||
--echo #
|
||||
--source include/test_db_charset_restore.inc
|
||||
|
|
|
@ -116,6 +116,7 @@ SET (SQL_SOURCE
|
|||
opt_range.cc vector_mhnsw.cc
|
||||
opt_rewrite_date_cmp.cc
|
||||
opt_rewrite_remove_casefold.cc
|
||||
opt_sargable_left.cc
|
||||
opt_sum.cc
|
||||
../sql-common/pack.c parse_file.cc password.c procedure.cc
|
||||
protocol.cc records.cc repl_failsafe.cc rpl_filter.cc
|
||||
|
|
|
@ -248,6 +248,7 @@ public:
|
|||
bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=1; return FALSE; }
|
||||
decimal_digits_t decimal_precision() const override { return 1; }
|
||||
bool need_parentheses_in_default() override { return true; }
|
||||
bool with_sargable_substr(Item_field **field = NULL, int *value_idx = NULL) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -540,8 +541,14 @@ public:
|
|||
may succeed.
|
||||
*/
|
||||
if (!(ftree= get_full_func_mm_tree_for_args(param, args[0], args[1])) &&
|
||||
!(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0])))
|
||||
ftree= Item_func::get_mm_tree(param, cond_ptr);
|
||||
!(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0])) &&
|
||||
!(ftree= Item_func::get_mm_tree(param, cond_ptr)))
|
||||
{
|
||||
Item_field *field= NULL;
|
||||
int value_idx= -1;
|
||||
if (with_sargable_substr(&field, &value_idx))
|
||||
DBUG_RETURN(get_full_func_mm_tree_for_args(param, field, args[value_idx]));
|
||||
}
|
||||
DBUG_RETURN(ftree);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -106,7 +106,7 @@ public:
|
|||
JSON_EXTRACT_FUNC, JSON_VALID_FUNC, ROWNUM_FUNC,
|
||||
CASE_SEARCHED_FUNC, // Used by ColumnStore/Spider
|
||||
CASE_SIMPLE_FUNC, // Used by ColumnStore/spider,
|
||||
DATE_FUNC, YEAR_FUNC
|
||||
DATE_FUNC, YEAR_FUNC, SUBSTR_FUNC, LEFT_FUNC
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -693,6 +693,7 @@ public:
|
|||
bool hash_not_null(Hasher *hasher) override;
|
||||
String *val_str(String *) override;
|
||||
bool fix_length_and_dec(THD *thd) override;
|
||||
enum Functype functype() const override { return LEFT_FUNC; }
|
||||
LEX_CSTRING func_name_cstring() const override
|
||||
{
|
||||
static LEX_CSTRING name= {STRING_WITH_LEN("left") };
|
||||
|
@ -739,6 +740,7 @@ public:
|
|||
print_sql_mode_qualified_name(str, query_type);
|
||||
print_args_parenthesized(str, query_type);
|
||||
}
|
||||
enum Functype functype() const override { return SUBSTR_FUNC; }
|
||||
LEX_CSTRING func_name_cstring() const override
|
||||
{
|
||||
static LEX_CSTRING name= {STRING_WITH_LEN("substr") };
|
||||
|
|
317
sql/opt_range.cc
317
sql/opt_range.cc
|
@ -8983,6 +8983,205 @@ static bool is_field_an_unique_index(Field *field)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Given a string, escape the LIKE pattern characters (%, _, \) with the '\'.
|
||||
|
||||
@detail
|
||||
Currently we fail if the escaped string didn't fit into MAX_FIELD_WIDTH
|
||||
bytes but this is not necessary.
|
||||
*/
|
||||
|
||||
static bool escape_like_characters(String *res)
|
||||
{
|
||||
CHARSET_INFO *cs= res->charset();
|
||||
StringBuffer<MAX_FIELD_WIDTH> tmp2(cs);
|
||||
tmp2.copy(*res);
|
||||
int ret;
|
||||
uchar *src= (uchar *) tmp2.ptr(), *src_end= (uchar *) tmp2.end(),
|
||||
*dst= (uchar *) res->ptr(), *dst_end= dst + MAX_FIELD_WIDTH;
|
||||
my_wc_t wc;
|
||||
while (src < src_end)
|
||||
{
|
||||
/* Advance to the next character */
|
||||
if ((ret= my_ci_mb_wc(cs, &wc, src, src_end)) <= 0)
|
||||
{
|
||||
if (ret == MY_CS_ILSEQ) /* Bad sequence */
|
||||
return true; /* Cannot LIKE optimize */
|
||||
break; /* End of the string */
|
||||
}
|
||||
src+= ret;
|
||||
|
||||
/* If the next char is escape-able in actual LIKE, escape it */
|
||||
if (wc == (my_wc_t) '%' || wc == (my_wc_t) '_' || wc == (my_wc_t) '\\')
|
||||
{
|
||||
if ((ret= my_ci_wc_mb(cs, (my_wc_t) '\\', dst, dst_end)) <= 0)
|
||||
return true; /* No space - no LIKE optimize */
|
||||
dst+= ret;
|
||||
}
|
||||
if ((ret= my_ci_wc_mb(cs, wc, dst, dst_end)) <= 0)
|
||||
return true; /* No space - no LIKE optimize */
|
||||
dst+= ret;
|
||||
}
|
||||
res->length((char *) dst - res->ptr());
|
||||
return false; /* Ok */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Produce SEL_ARG interval for LIKE and prefix match functions.
|
||||
|
||||
@detail
|
||||
This is used for conditions in forms:
|
||||
|
||||
- key_col LIKE 'sargable_pattern'
|
||||
- SUBSTR(key_col, 1, ...) = 'value', or equivalent conditions involving
|
||||
LEFT() instead of SUBSTR() - see with_sargable_substr() for details.
|
||||
|
||||
@param
|
||||
item The comparison item (Item_func_like or Item_func_eq)
|
||||
*/
|
||||
|
||||
static SEL_ARG *
|
||||
get_mm_leaf_for_LIKE(Item_bool_func *item, RANGE_OPT_PARAM *param,
|
||||
Field *field, KEY_PART *key_part,
|
||||
Item_func::Functype type, Item *value)
|
||||
{
|
||||
DBUG_ENTER("get_mm_leaf_for_sargable");
|
||||
DBUG_ASSERT(value);
|
||||
|
||||
if (key_part->image_type != Field::itRAW)
|
||||
DBUG_RETURN(0);
|
||||
|
||||
uint keynr= param->real_keynr[key_part->key];
|
||||
if (param->using_real_indexes &&
|
||||
!field->optimize_range(keynr, key_part->part))
|
||||
DBUG_RETURN(0);
|
||||
|
||||
if (field->result_type() == STRING_RESULT &&
|
||||
field->charset() != item->compare_collation())
|
||||
{
|
||||
/*
|
||||
For equalities where one side is LEFT or SUBSTR
|
||||
param->note_unusable_keys is BITMAP_EXCEPT_ANY_EQUALITY and the
|
||||
following if condition is satisfied. But it will not result in
|
||||
duplicate warnings because the ref optimizer does not cover this
|
||||
case.
|
||||
*/
|
||||
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
item->func_name_cstring(),
|
||||
item->compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_COLLATION);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
|
||||
String *res;
|
||||
|
||||
if (!(res= value->val_str(&tmp)))
|
||||
DBUG_RETURN(&null_element);
|
||||
|
||||
if (field->cmp_type() != STRING_RESULT ||
|
||||
field->type_handler() == &type_handler_enum ||
|
||||
field->type_handler() == &type_handler_set)
|
||||
{
|
||||
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
item->func_name_cstring(),
|
||||
item->compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_DATA_TYPE);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*
|
||||
TODO:
|
||||
Check if this was a function. This should have be optimized away
|
||||
in the sql_select.cc
|
||||
*/
|
||||
if (res != &tmp)
|
||||
{
|
||||
tmp.copy(*res); // Get own copy
|
||||
res= &tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
If we're handling a predicate in one of these forms:
|
||||
- LEFT(key_col, N) ='string_const'
|
||||
- SUBSTRING(key_col, 1, N)='string_const'
|
||||
|
||||
then we need to:
|
||||
- escape the LIKE pattern characters in the string_const,
|
||||
- make the search pattern to be 'string_const%':
|
||||
*/
|
||||
if (type != Item_func::LIKE_FUNC)
|
||||
{
|
||||
DBUG_ASSERT(type == Item_func::EQ_FUNC);
|
||||
if (escape_like_characters(res))
|
||||
DBUG_RETURN(0); /* Error, no optimization */
|
||||
res->append("%", 1);
|
||||
}
|
||||
|
||||
uint maybe_null= (uint) field->real_maybe_null();
|
||||
size_t field_length= field->pack_length() + maybe_null;
|
||||
size_t offset= maybe_null;
|
||||
size_t length= key_part->store_length;
|
||||
|
||||
if (length != key_part->length + maybe_null)
|
||||
{
|
||||
/* key packed with length prefix */
|
||||
offset+= HA_KEY_BLOB_LENGTH;
|
||||
field_length= length - HA_KEY_BLOB_LENGTH;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unlikely(length < field_length))
|
||||
{
|
||||
/*
|
||||
This can only happen in a table created with UNIREG where one key
|
||||
overlaps many fields
|
||||
*/
|
||||
length= field_length;
|
||||
}
|
||||
else
|
||||
field_length= length;
|
||||
}
|
||||
length+= offset;
|
||||
uchar *min_str,*max_str;
|
||||
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
|
||||
DBUG_RETURN(0);
|
||||
max_str= min_str + length;
|
||||
if (maybe_null)
|
||||
max_str[0]= min_str[0]=0;
|
||||
|
||||
size_t min_length, max_length;
|
||||
field_length-= maybe_null;
|
||||
/* If the item is a LIKE, use its escape, otherwise use backslash */
|
||||
int escape= type == Item_func::LIKE_FUNC ?
|
||||
((Item_func_like *) item)->escape : '\\';
|
||||
if (field->charset()->like_range(res->ptr(), res->length(),
|
||||
escape, wild_one, wild_many,
|
||||
field_length,
|
||||
(char*) min_str + offset,
|
||||
(char*) max_str + offset,
|
||||
&min_length, &max_length))
|
||||
DBUG_RETURN(0); // Can't optimize with LIKE
|
||||
|
||||
if (offset != maybe_null) // BLOB or VARCHAR
|
||||
{
|
||||
int2store(min_str + maybe_null, min_length);
|
||||
int2store(max_str + maybe_null, max_length);
|
||||
}
|
||||
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
|
||||
DBUG_RETURN(tree);
|
||||
}
|
||||
|
||||
|
||||
SEL_TREE *
|
||||
Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
|
||||
Item_func::Functype type, Item *value)
|
||||
|
@ -8995,6 +9194,9 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
|
|||
KEY_PART *end = param->key_parts_end;
|
||||
SEL_TREE *tree=0;
|
||||
table_map value_used_tables= 0;
|
||||
bool know_sargable_substr= false;
|
||||
bool sargable_substr; // protected by know_sargable_substr
|
||||
|
||||
if (value &&
|
||||
(value_used_tables= value->used_tables()) &
|
||||
~(param->prev_tables | param->read_tables))
|
||||
|
@ -9019,6 +9221,17 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
|
|||
*/
|
||||
MEM_ROOT *tmp_root= param->mem_root;
|
||||
param->thd->mem_root= param->old_root;
|
||||
if (!know_sargable_substr)
|
||||
{
|
||||
sargable_substr= with_sargable_substr();
|
||||
know_sargable_substr= true;
|
||||
}
|
||||
if (sargable_substr)
|
||||
{
|
||||
sel_arg= get_mm_leaf_for_LIKE(this, param, key_part->field, key_part,
|
||||
type, value);
|
||||
}
|
||||
else
|
||||
sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
|
||||
param->thd->mem_root= tmp_root;
|
||||
|
||||
|
@ -9087,109 +9300,7 @@ Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param,
|
|||
Item_func::Functype type, Item *value)
|
||||
{
|
||||
DBUG_ENTER("Item_func_like::get_mm_leaf");
|
||||
DBUG_ASSERT(value);
|
||||
|
||||
if (key_part->image_type != Field::itRAW)
|
||||
DBUG_RETURN(0);
|
||||
|
||||
uint keynr= param->real_keynr[key_part->key];
|
||||
if (param->using_real_indexes &&
|
||||
!field->optimize_range(keynr, key_part->part))
|
||||
DBUG_RETURN(0);
|
||||
|
||||
if (field->result_type() == STRING_RESULT &&
|
||||
field->charset() != compare_collation())
|
||||
{
|
||||
if (param->note_unusable_keys & BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
func_name_cstring(),
|
||||
compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_COLLATION);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
|
||||
String *res;
|
||||
|
||||
if (!(res= value->val_str(&tmp)))
|
||||
DBUG_RETURN(&null_element);
|
||||
|
||||
if (field->cmp_type() != STRING_RESULT ||
|
||||
field->type_handler() == &type_handler_enum ||
|
||||
field->type_handler() == &type_handler_set)
|
||||
{
|
||||
if (param->note_unusable_keys & BITMAP_LIKE)
|
||||
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
|
||||
func_name_cstring(),
|
||||
compare_collation(),
|
||||
value,
|
||||
Data_type_compatibility::
|
||||
INCOMPATIBLE_DATA_TYPE);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*
|
||||
TODO:
|
||||
Check if this was a function. This should have be optimized away
|
||||
in the sql_select.cc
|
||||
*/
|
||||
if (res != &tmp)
|
||||
{
|
||||
tmp.copy(*res); // Get own copy
|
||||
res= &tmp;
|
||||
}
|
||||
|
||||
uint maybe_null= (uint) field->real_maybe_null();
|
||||
size_t field_length= field->pack_length() + maybe_null;
|
||||
size_t offset= maybe_null;
|
||||
size_t length= key_part->store_length;
|
||||
|
||||
if (length != key_part->length + maybe_null)
|
||||
{
|
||||
/* key packed with length prefix */
|
||||
offset+= HA_KEY_BLOB_LENGTH;
|
||||
field_length= length - HA_KEY_BLOB_LENGTH;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unlikely(length < field_length))
|
||||
{
|
||||
/*
|
||||
This can only happen in a table created with UNIREG where one key
|
||||
overlaps many fields
|
||||
*/
|
||||
length= field_length;
|
||||
}
|
||||
else
|
||||
field_length= length;
|
||||
}
|
||||
length+= offset;
|
||||
uchar *min_str,*max_str;
|
||||
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
|
||||
DBUG_RETURN(0);
|
||||
max_str= min_str + length;
|
||||
if (maybe_null)
|
||||
max_str[0]= min_str[0]=0;
|
||||
|
||||
size_t min_length, max_length;
|
||||
field_length-= maybe_null;
|
||||
if (field->charset()->like_range(res->ptr(), res->length(),
|
||||
escape, wild_one, wild_many,
|
||||
field_length,
|
||||
(char*) min_str + offset,
|
||||
(char*) max_str + offset,
|
||||
&min_length, &max_length))
|
||||
DBUG_RETURN(0); // Can't optimize with LIKE
|
||||
|
||||
if (offset != maybe_null) // BLOB or VARCHAR
|
||||
{
|
||||
int2store(min_str + maybe_null, min_length);
|
||||
int2store(max_str + maybe_null, max_length);
|
||||
}
|
||||
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
|
||||
DBUG_RETURN(tree);
|
||||
DBUG_RETURN(get_mm_leaf_for_LIKE(this, param, field, key_part, type, value));
|
||||
}
|
||||
|
||||
|
||||
|
|
158
sql/opt_sargable_left.cc
Normal file
158
sql/opt_sargable_left.cc
Normal file
|
@ -0,0 +1,158 @@
|
|||
/* Copyright (c) 2024, MariaDB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
||||
|
||||
/**
|
||||
@file
|
||||
|
||||
@brief
|
||||
This file contains SargableLeft optimization
|
||||
*/
|
||||
|
||||
#include "mariadb.h"
|
||||
#include "sql_priv.h"
|
||||
#include <m_ctype.h>
|
||||
#include "sql_select.h"
|
||||
|
||||
/*
|
||||
SargableLeft
|
||||
============
|
||||
|
||||
This optimization makes conditions in forms like
|
||||
|
||||
LEFT(key_col, N) = 'string_const'
|
||||
SUBSTRING(key_col, 1, N) = 'string_const'
|
||||
|
||||
sargable. The conditions take the first N characters of key_col and
|
||||
compare them with a string constant.
|
||||
However, producing index lookup intervals for this collation is complex
|
||||
due to contractions.
|
||||
|
||||
Contractions
|
||||
------------
|
||||
A contraction is a property of collation where a sequence of multiple
|
||||
characters is compared as some other character(s).
|
||||
For example, in utfmb4_danish_ci, 'AA' is compared as one character 'Å'
|
||||
which sorts after 'Z':
|
||||
|
||||
MariaDB [test]> select a from t1 order by col1;
|
||||
+------+
|
||||
| col1 |
|
||||
+------+
|
||||
| BA1 | (1)
|
||||
| BC |
|
||||
| BZ |
|
||||
| BAA2 | (2)
|
||||
+------+
|
||||
|
||||
Now suppose we're producing lookup ranges for condition
|
||||
|
||||
LEFT(col1, 2)='BA'
|
||||
|
||||
In addition to looking near 'BA' (1), we need to look into the area right
|
||||
after 'BZ' (2), where we may find 'BAA'.
|
||||
|
||||
Fortunately, this was already implemented for handling LIKE conditions in
|
||||
form 'key_col LIKE 'BA%'. Each collation provides like_range() call which
|
||||
produces lookup range in a collation-aware way.
|
||||
|
||||
Differences between LIKE and LEFT=
|
||||
----------------------------------
|
||||
So can one reduce or even rewrite conditions with LEFT() into LIKE? No, there
|
||||
are differences.
|
||||
|
||||
First, LIKE does character-by-character comparison, ignoring the collation's
|
||||
contractions:
|
||||
|
||||
MariaDB [test]> select col1, col1='AA', col1 LIKE 'AA' from t1;
|
||||
+------+-----------+----------------+
|
||||
| col1 | col1='AA' | col1 LIKE 'AA' |
|
||||
+------+-----------+----------------+
|
||||
| AA | 1 | 1 |
|
||||
| Å | 1 | 0 |
|
||||
+------+-----------+----------------+
|
||||
|
||||
(However, index comparison function uses equality's comparison rules.
|
||||
my_like_range() will produce an index range 'AA' <= col1 <= 'AA'. Reading rows
|
||||
from it will return 'Å' as well)
|
||||
|
||||
Second, LEFT imposes additional constraints on the length of both parts. For
|
||||
example:
|
||||
- LEFT(col,2)='string-longer-than-two-chars' - is false for any value of col.
|
||||
- LEFT(col,2)='A' is not equivalent to (col LIKE 'A%'), consider col='Ab'.
|
||||
|
||||
Take-aways
|
||||
----------
|
||||
- SargableLeft makes use of my_like_range() to produce index intervals.
|
||||
- LEFT(col, N)='foo'
|
||||
- We ignore the value of N when producing the lookup range (this may make the
|
||||
range to include rows for which the predicate is false)
|
||||
= For the SUBSTRING form, we only need to check that M=1 in the
|
||||
SUBSTRING(col, M, N)='foo'.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
@brief Check if this condition is sargable LEFT(key_col, N)='foo', or
|
||||
similar condition with SUBSTRING().
|
||||
|
||||
@detail
|
||||
'foo' here can be any constant we can compute during optimization,
|
||||
Only equality conditions are supported.
|
||||
See SargableLeft above for detals.
|
||||
|
||||
@param field The first argument of LEFT or SUBSTRING if sargable,
|
||||
otherwise deferenced to NULL
|
||||
@param value_idx The index of argument that is the prefix string
|
||||
if sargable, otherwise dereferenced to -1
|
||||
*/
|
||||
|
||||
bool Item_bool_func::with_sargable_substr(Item_field **field, int *value_idx) const
|
||||
{
|
||||
int func_idx, val_idx= -1;
|
||||
Item **func_args, *real= NULL;
|
||||
bool ret= false;
|
||||
enum Functype type;
|
||||
if (functype() != EQ_FUNC)
|
||||
goto done;
|
||||
if (args[0]->type() == FUNC_ITEM)
|
||||
func_idx= 0;
|
||||
else if (args[1]->type() == FUNC_ITEM)
|
||||
func_idx= 1;
|
||||
else
|
||||
goto done;
|
||||
type= ((Item_func *) args[func_idx])->functype();
|
||||
if (type != SUBSTR_FUNC && type != LEFT_FUNC)
|
||||
goto done;
|
||||
func_args= ((Item_func *) args[func_idx])->arguments();
|
||||
real= func_args[0]->real_item();
|
||||
val_idx= 1 - func_idx;
|
||||
if (real->type() == FIELD_ITEM &&
|
||||
args[val_idx]->can_eval_in_optimize() &&
|
||||
(type == LEFT_FUNC || func_args[1]->val_int() == 1))
|
||||
{
|
||||
ret= true;
|
||||
goto done;
|
||||
}
|
||||
real= NULL;
|
||||
val_idx= -1;
|
||||
done:
|
||||
if (field != NULL)
|
||||
*field= (Item_field *) real;
|
||||
if (value_idx != NULL)
|
||||
*value_idx= val_idx;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -7089,6 +7089,17 @@ Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields,
|
|||
(Item_field*) args[0]->real_item(), equal_func,
|
||||
args + 1, 1, usable_tables, sargables);
|
||||
}
|
||||
else
|
||||
{
|
||||
Item_field *field= NULL;
|
||||
int value_idx= -1;
|
||||
/* Handle SUBSTR(key,1,N)='const', 'const'=SUBSTR(key,1,N), etc */
|
||||
if (with_sargable_substr(&field, &value_idx))
|
||||
{
|
||||
add_key_equal_fields(join, key_fields, *and_level, this, field,
|
||||
false, args + value_idx, 1, usable_tables, sargables);
|
||||
}
|
||||
}
|
||||
if (is_local_field(args[1]))
|
||||
{
|
||||
add_key_equal_fields(join, key_fields, *and_level, this,
|
||||
|
|
|
@ -42,11 +42,17 @@ pkey LEFT(`txt_utf8`, 4)
|
|||
0 0123
|
||||
1 1234
|
||||
2 2345
|
||||
SELECT `pkey`, SUBSTR(`txt_utf8`, 1, 4) FROM `auto_test_local`.`tbl_a` ORDER BY SUBSTR(`txt_utf8`, 1, 4) LIMIT 3;
|
||||
pkey SUBSTR(`txt_utf8`, 1, 4)
|
||||
0 0123
|
||||
1 1234
|
||||
2 2345
|
||||
connection child2_1;
|
||||
SET NAMES utf8;
|
||||
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
|
||||
argument
|
||||
select t0.`pkey` `pkey`,(left(t0.`txt_utf8` , 4)) `LEFT(``txt_utf8``, 4)` from `auto_test_remote`.`tbl_a` t0 order by (left(`txt_utf8` , 4)) limit 3
|
||||
select t0.`pkey` `pkey`,(substr(t0.`txt_utf8` , 1 , 4)) `SUBSTR(``txt_utf8``, 1, 4)` from `auto_test_remote`.`tbl_a` t0 order by (substr(`txt_utf8` , 1 , 4)) limit 3
|
||||
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'
|
||||
SELECT pkey, txt_utf8 FROM tbl_a ORDER BY pkey;
|
||||
pkey txt_utf8
|
||||
|
|
|
@ -55,6 +55,7 @@ TRUNCATE TABLE mysql.general_log;
|
|||
SET NAMES utf8;
|
||||
--disable_ps_protocol
|
||||
SELECT `pkey`, LEFT(`txt_utf8`, 4) FROM `auto_test_local`.`tbl_a` ORDER BY LEFT(`txt_utf8`, 4) LIMIT 3;
|
||||
SELECT `pkey`, SUBSTR(`txt_utf8`, 1, 4) FROM `auto_test_local`.`tbl_a` ORDER BY SUBSTR(`txt_utf8`, 1, 4) LIMIT 3;
|
||||
--enable_ps_protocol
|
||||
|
||||
--connection child2_1
|
||||
|
|
|
@ -5028,6 +5028,8 @@ int spider_db_mbase_util::print_item_func(
|
|||
last_str = SPIDER_SQL_IS_NOT_NULL_STR;
|
||||
last_str_length = SPIDER_SQL_IS_NOT_NULL_LEN;
|
||||
break;
|
||||
case Item_func::LEFT_FUNC:
|
||||
case Item_func::SUBSTR_FUNC:
|
||||
case Item_func::UNKNOWN_FUNC:
|
||||
org_func_name= item_func->func_name_cstring();
|
||||
func_name= org_func_name.str;
|
||||
|
|
Loading…
Add table
Reference in a new issue