MDEV-34911 Sargable substr(col, 1, n) = str

Make Item_func_eq of the following forms sargable by updating the relevant range
analysis methods:

1. substr(col, 1, n) = str
2. str = substr(col, 1, n)
3. left(col, n) = str
4. str = left(col, n)

where col is a indexed column and str is a const and inexpensive item
of length n.

We do this by factoring out Item_func_like::get_mm_leaf() and apply it
to a string obtained from escaping str and then appending a wildcard
"%" to it.

The addition of the two Functype enums, LEFT_FUNC and SUBSTR_FUNC,
requires changes in the spider group by handler to continue handling
LEFT and SUBSTR correctly.

Co-authored-by: Yuchen Pei <ycp@mariadb.com>
Co-authored-by: Sergei Petrunia <sergey@mariadb.com>
This commit is contained in:
Yuchen Pei 2024-11-26 11:28:57 +11:00
parent ae998c22b2
commit e021770667
No known key found for this signature in database
GPG key ID: 3DD1B35105743563
13 changed files with 673 additions and 107 deletions

View file

@ -73,6 +73,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/net_serv.cc ../sql/opt_range.cc
../sql/opt_rewrite_date_cmp.cc
../sql/opt_rewrite_remove_casefold.cc
../sql/opt_sargable_left.cc
../sql/opt_sum.cc
../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc
../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc

View file

@ -5806,4 +5806,154 @@ coercibility(database()) 3
#
# End of 11.5 tests
#
#
# MDEV-34911 Make conditions SUBSTR(col, 1, n) = const_str sargable
#
create table t (c varchar(5), key (c));
insert into t values ('ddd'), ('bbcd'), ('bba'), ('b%_cd'), ('aaa');
explain select * from t where substr(c, 1, 2) = 'bb';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where substr(c, 1, 2) = 'bb';
c
bba
bbcd
explain select * from t where 'bb' = substr(c, 1, 2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where 'bb' = substr(c, 1, 2);
c
bba
bbcd
explain select * from t where left(c, 2) = 'bb';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where left(c, 2) = 'bb';
c
bba
bbcd
explain select * from t where 'bb' = left(c, 2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where 'bb' = left(c, 2);
c
bba
bbcd
explain select * from t where substr(c, 1, 4) = 'b%_c';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 1 Using where; Using index
select * from t where substr(c, 1, 4) = 'b%_c';
c
b%_cd
explain select * from t where substr(c, 1, 2) = concat('b', 'b');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where substr(c, 1, 2) = concat('b', 'b');
c
bba
bbcd
explain select * from t where substr(c, 1, 2) = substr('bb1', 1, 2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where substr(c, 1, 2) = substr('bb1', 1, 2);
c
bba
bbcd
create table t1 (a varchar(32), b int, index(a));
insert into t1 select seq, seq / 33 from seq_1_to_100;
explain select * from t1 where substr(a, 1, 2)='80';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 35 NULL 1 Using index condition
explain select * from t1 where substr(a, 1, b)='80';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 35 NULL 1 Using where
create view v1 as select a,b from t1;
explain select * from v1 where substr(a, 1, 2)='80';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 35 NULL 1 Using index condition
drop view v1;
drop table t1;
explain select * from t where substr(c, 1, 1) = 'bb';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where substr(c, 1, 1) = 'bb';
c
explain select * from t where substr(c, 1, 3) = 'bb';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where substr(c, 1, 3) = 'bb';
c
explain select * from t where substr(c, 1) = 'bb';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range c c 8 NULL 2 Using where; Using index
select * from t where substr(c, 1) = 'bb';
c
explain select * from t where substr(c, 1, 2) = substr(c, 2, 3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t index NULL c 8 NULL 5 Using where; Using index
select * from t where substr(c, 1, 2) = substr(c, 2, 3);
c
aaa
ddd
drop index c on t;
explain select * from t where substr(c, 1, 2) = 'bb';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ALL NULL NULL NULL NULL 5 Using where
select * from t where substr(c, 1, 2) = 'bb';
c
bbcd
bba
drop table t;
set @old_collation_connection=@@collation_connection;
SET collation_connection=utf32_czech_ci;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('%c'),('%ce'),('%cé'),('%ch'), ('c'), ('d'), ('c%'), ('c\\g');
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
s1
%c
%ce
%cé
%ch
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
s1
%c
%ce
%cé
%ch
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
s1
c\g
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
s1
c\g
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
s1
%c
%ce
%cé
%ch
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
s1
%c
%ce
%cé
%ch
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
s1
c\g
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
s1
c\g
DROP TABLE t1;
set collation_connection=@old_collation_connection;
create table t1 (a varchar(100), b varchar(100), key(a));
insert into t1 select seq, seq from seq_1_to_1000;
explain select * from t1 where LEFT(a, 20) ='%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL a NULL NULL NULL 1000 Using where
drop table t1;
#
# End of 11.8 tests
#
ALTER DATABASE test CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci;

View file

@ -2712,4 +2712,120 @@ SELECT
--echo # End of 11.5 tests
--echo #
--echo #
--echo # MDEV-34911 Make conditions SUBSTR(col, 1, n) = const_str sargable
--echo #
create table t (c varchar(5), key (c));
insert into t values ('ddd'), ('bbcd'), ('bba'), ('b%_cd'), ('aaa');
# positive cases
let $query=
select * from t where substr(c, 1, 2) = 'bb';
eval explain $query;
eval $query;
let $query=
select * from t where 'bb' = substr(c, 1, 2);
eval explain $query;
eval $query;
let $query=
select * from t where left(c, 2) = 'bb';
eval explain $query;
eval $query;
let $query=
select * from t where 'bb' = left(c, 2);
eval explain $query;
eval $query;
let $query=
select * from t where substr(c, 1, 4) = 'b%_c';
eval explain $query;
eval $query;
# RHS can_eval_in_optimize()
let $query=
select * from t where substr(c, 1, 2) = concat('b', 'b');
eval explain $query;
eval $query;
# RHS can_eval_in_optimize()
let $query=
select * from t where substr(c, 1, 2) = substr('bb1', 1, 2);
eval explain $query;
eval $query;
# works with view
create table t1 (a varchar(32), b int, index(a));
insert into t1 select seq, seq / 33 from seq_1_to_100;
explain select * from t1 where substr(a, 1, 2)='80';
# The range is a necessary condition, and further filtering happens in
# the exec stage
explain select * from t1 where substr(a, 1, b)='80';
create view v1 as select a,b from t1;
explain select * from v1 where substr(a, 1, 2)='80';
drop view v1;
drop table t1;
# negative cases
let $query=
select * from t where substr(c, 1, 1) = 'bb';
eval explain $query;
eval $query;
let $query=
select * from t where substr(c, 1, 3) = 'bb';
eval explain $query;
eval $query;
let $query=
select * from t where substr(c, 1) = 'bb';
eval explain $query;
eval $query;
let $query=
select * from t where substr(c, 1, 2) = substr(c, 2, 3);
eval explain $query;
eval $query;
# full table scan when no index on sargable substr
drop index c on t;
let $query=
select * from t where substr(c, 1, 2) = 'bb';
eval explain $query;
eval $query;
drop table t;
# czech has the same behaviour as in LIKE
set @old_collation_connection=@@collation_connection;
SET collation_connection=utf32_czech_ci;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('%c'),('%ce'),('%cé'),('%ch'), ('c'), ('d'), ('c%'), ('c\\g');
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE '\\%c%';
SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c';
SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%';
SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\';
DROP TABLE t1;
set collation_connection=@old_collation_connection;
# no stack overrun escaping long string
create table t1 (a varchar(100), b varchar(100), key(a));
insert into t1 select seq, seq from seq_1_to_1000;
explain select * from t1 where LEFT(a, 20) ='%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%';
drop table t1;
--echo #
--echo # End of 11.8 tests
--echo #
--source include/test_db_charset_restore.inc

View file

@ -116,6 +116,7 @@ SET (SQL_SOURCE
opt_range.cc vector_mhnsw.cc
opt_rewrite_date_cmp.cc
opt_rewrite_remove_casefold.cc
opt_sargable_left.cc
opt_sum.cc
../sql-common/pack.c parse_file.cc password.c procedure.cc
protocol.cc records.cc repl_failsafe.cc rpl_filter.cc

View file

@ -248,6 +248,7 @@ public:
bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=1; return FALSE; }
decimal_digits_t decimal_precision() const override { return 1; }
bool need_parentheses_in_default() override { return true; }
bool with_sargable_substr(Item_field **field = NULL, int *value_idx = NULL) const;
};
@ -540,8 +541,14 @@ public:
may succeed.
*/
if (!(ftree= get_full_func_mm_tree_for_args(param, args[0], args[1])) &&
!(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0])))
ftree= Item_func::get_mm_tree(param, cond_ptr);
!(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0])) &&
!(ftree= Item_func::get_mm_tree(param, cond_ptr)))
{
Item_field *field= NULL;
int value_idx= -1;
if (with_sargable_substr(&field, &value_idx))
DBUG_RETURN(get_full_func_mm_tree_for_args(param, field, args[value_idx]));
}
DBUG_RETURN(ftree);
}
};

View file

@ -106,7 +106,7 @@ public:
JSON_EXTRACT_FUNC, JSON_VALID_FUNC, ROWNUM_FUNC,
CASE_SEARCHED_FUNC, // Used by ColumnStore/Spider
CASE_SIMPLE_FUNC, // Used by ColumnStore/spider,
DATE_FUNC, YEAR_FUNC
DATE_FUNC, YEAR_FUNC, SUBSTR_FUNC, LEFT_FUNC
};
/*

View file

@ -693,6 +693,7 @@ public:
bool hash_not_null(Hasher *hasher) override;
String *val_str(String *) override;
bool fix_length_and_dec(THD *thd) override;
enum Functype functype() const override { return LEFT_FUNC; }
LEX_CSTRING func_name_cstring() const override
{
static LEX_CSTRING name= {STRING_WITH_LEN("left") };
@ -739,6 +740,7 @@ public:
print_sql_mode_qualified_name(str, query_type);
print_args_parenthesized(str, query_type);
}
enum Functype functype() const override { return SUBSTR_FUNC; }
LEX_CSTRING func_name_cstring() const override
{
static LEX_CSTRING name= {STRING_WITH_LEN("substr") };

View file

@ -8983,6 +8983,205 @@ static bool is_field_an_unique_index(Field *field)
}
/*
@brief
Given a string, escape the LIKE pattern characters (%, _, \) with the '\'.
@detail
Currently we fail if the escaped string didn't fit into MAX_FIELD_WIDTH
bytes but this is not necessary.
*/
static bool escape_like_characters(String *res)
{
CHARSET_INFO *cs= res->charset();
StringBuffer<MAX_FIELD_WIDTH> tmp2(cs);
tmp2.copy(*res);
int ret;
uchar *src= (uchar *) tmp2.ptr(), *src_end= (uchar *) tmp2.end(),
*dst= (uchar *) res->ptr(), *dst_end= dst + MAX_FIELD_WIDTH;
my_wc_t wc;
while (src < src_end)
{
/* Advance to the next character */
if ((ret= my_ci_mb_wc(cs, &wc, src, src_end)) <= 0)
{
if (ret == MY_CS_ILSEQ) /* Bad sequence */
return true; /* Cannot LIKE optimize */
break; /* End of the string */
}
src+= ret;
/* If the next char is escape-able in actual LIKE, escape it */
if (wc == (my_wc_t) '%' || wc == (my_wc_t) '_' || wc == (my_wc_t) '\\')
{
if ((ret= my_ci_wc_mb(cs, (my_wc_t) '\\', dst, dst_end)) <= 0)
return true; /* No space - no LIKE optimize */
dst+= ret;
}
if ((ret= my_ci_wc_mb(cs, wc, dst, dst_end)) <= 0)
return true; /* No space - no LIKE optimize */
dst+= ret;
}
res->length((char *) dst - res->ptr());
return false; /* Ok */
}
/*
@brief
Produce SEL_ARG interval for LIKE and prefix match functions.
@detail
This is used for conditions in forms:
- key_col LIKE 'sargable_pattern'
- SUBSTR(key_col, 1, ...) = 'value', or equivalent conditions involving
LEFT() instead of SUBSTR() - see with_sargable_substr() for details.
@param
item The comparison item (Item_func_like or Item_func_eq)
*/
static SEL_ARG *
get_mm_leaf_for_LIKE(Item_bool_func *item, RANGE_OPT_PARAM *param,
Field *field, KEY_PART *key_part,
Item_func::Functype type, Item *value)
{
DBUG_ENTER("get_mm_leaf_for_sargable");
DBUG_ASSERT(value);
if (key_part->image_type != Field::itRAW)
DBUG_RETURN(0);
uint keynr= param->real_keynr[key_part->key];
if (param->using_real_indexes &&
!field->optimize_range(keynr, key_part->part))
DBUG_RETURN(0);
if (field->result_type() == STRING_RESULT &&
field->charset() != item->compare_collation())
{
/*
For equalities where one side is LEFT or SUBSTR
param->note_unusable_keys is BITMAP_EXCEPT_ANY_EQUALITY and the
following if condition is satisfied. But it will not result in
duplicate warnings because the ref optimizer does not cover this
case.
*/
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
item->func_name_cstring(),
item->compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_COLLATION);
DBUG_RETURN(0);
}
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
String *res;
if (!(res= value->val_str(&tmp)))
DBUG_RETURN(&null_element);
if (field->cmp_type() != STRING_RESULT ||
field->type_handler() == &type_handler_enum ||
field->type_handler() == &type_handler_set)
{
if (param->note_unusable_keys & Item_func::BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
item->func_name_cstring(),
item->compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_DATA_TYPE);
DBUG_RETURN(0);
}
/*
TODO:
Check if this was a function. This should have be optimized away
in the sql_select.cc
*/
if (res != &tmp)
{
tmp.copy(*res); // Get own copy
res= &tmp;
}
/*
If we're handling a predicate in one of these forms:
- LEFT(key_col, N) ='string_const'
- SUBSTRING(key_col, 1, N)='string_const'
then we need to:
- escape the LIKE pattern characters in the string_const,
- make the search pattern to be 'string_const%':
*/
if (type != Item_func::LIKE_FUNC)
{
DBUG_ASSERT(type == Item_func::EQ_FUNC);
if (escape_like_characters(res))
DBUG_RETURN(0); /* Error, no optimization */
res->append("%", 1);
}
uint maybe_null= (uint) field->real_maybe_null();
size_t field_length= field->pack_length() + maybe_null;
size_t offset= maybe_null;
size_t length= key_part->store_length;
if (length != key_part->length + maybe_null)
{
/* key packed with length prefix */
offset+= HA_KEY_BLOB_LENGTH;
field_length= length - HA_KEY_BLOB_LENGTH;
}
else
{
if (unlikely(length < field_length))
{
/*
This can only happen in a table created with UNIREG where one key
overlaps many fields
*/
length= field_length;
}
else
field_length= length;
}
length+= offset;
uchar *min_str,*max_str;
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
DBUG_RETURN(0);
max_str= min_str + length;
if (maybe_null)
max_str[0]= min_str[0]=0;
size_t min_length, max_length;
field_length-= maybe_null;
/* If the item is a LIKE, use its escape, otherwise use backslash */
int escape= type == Item_func::LIKE_FUNC ?
((Item_func_like *) item)->escape : '\\';
if (field->charset()->like_range(res->ptr(), res->length(),
escape, wild_one, wild_many,
field_length,
(char*) min_str + offset,
(char*) max_str + offset,
&min_length, &max_length))
DBUG_RETURN(0); // Can't optimize with LIKE
if (offset != maybe_null) // BLOB or VARCHAR
{
int2store(min_str + maybe_null, min_length);
int2store(max_str + maybe_null, max_length);
}
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
DBUG_RETURN(tree);
}
SEL_TREE *
Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
Item_func::Functype type, Item *value)
@ -8995,6 +9194,9 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
KEY_PART *end = param->key_parts_end;
SEL_TREE *tree=0;
table_map value_used_tables= 0;
bool know_sargable_substr= false;
bool sargable_substr; // protected by know_sargable_substr
if (value &&
(value_used_tables= value->used_tables()) &
~(param->prev_tables | param->read_tables))
@ -9019,6 +9221,17 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field,
*/
MEM_ROOT *tmp_root= param->mem_root;
param->thd->mem_root= param->old_root;
if (!know_sargable_substr)
{
sargable_substr= with_sargable_substr();
know_sargable_substr= true;
}
if (sargable_substr)
{
sel_arg= get_mm_leaf_for_LIKE(this, param, key_part->field, key_part,
type, value);
}
else
sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value);
param->thd->mem_root= tmp_root;
@ -9087,109 +9300,7 @@ Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param,
Item_func::Functype type, Item *value)
{
DBUG_ENTER("Item_func_like::get_mm_leaf");
DBUG_ASSERT(value);
if (key_part->image_type != Field::itRAW)
DBUG_RETURN(0);
uint keynr= param->real_keynr[key_part->key];
if (param->using_real_indexes &&
!field->optimize_range(keynr, key_part->part))
DBUG_RETURN(0);
if (field->result_type() == STRING_RESULT &&
field->charset() != compare_collation())
{
if (param->note_unusable_keys & BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
func_name_cstring(),
compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_COLLATION);
DBUG_RETURN(0);
}
StringBuffer<MAX_FIELD_WIDTH> tmp(value->collation.collation);
String *res;
if (!(res= value->val_str(&tmp)))
DBUG_RETURN(&null_element);
if (field->cmp_type() != STRING_RESULT ||
field->type_handler() == &type_handler_enum ||
field->type_handler() == &type_handler_set)
{
if (param->note_unusable_keys & BITMAP_LIKE)
field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part,
func_name_cstring(),
compare_collation(),
value,
Data_type_compatibility::
INCOMPATIBLE_DATA_TYPE);
DBUG_RETURN(0);
}
/*
TODO:
Check if this was a function. This should have be optimized away
in the sql_select.cc
*/
if (res != &tmp)
{
tmp.copy(*res); // Get own copy
res= &tmp;
}
uint maybe_null= (uint) field->real_maybe_null();
size_t field_length= field->pack_length() + maybe_null;
size_t offset= maybe_null;
size_t length= key_part->store_length;
if (length != key_part->length + maybe_null)
{
/* key packed with length prefix */
offset+= HA_KEY_BLOB_LENGTH;
field_length= length - HA_KEY_BLOB_LENGTH;
}
else
{
if (unlikely(length < field_length))
{
/*
This can only happen in a table created with UNIREG where one key
overlaps many fields
*/
length= field_length;
}
else
field_length= length;
}
length+= offset;
uchar *min_str,*max_str;
if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2)))
DBUG_RETURN(0);
max_str= min_str + length;
if (maybe_null)
max_str[0]= min_str[0]=0;
size_t min_length, max_length;
field_length-= maybe_null;
if (field->charset()->like_range(res->ptr(), res->length(),
escape, wild_one, wild_many,
field_length,
(char*) min_str + offset,
(char*) max_str + offset,
&min_length, &max_length))
DBUG_RETURN(0); // Can't optimize with LIKE
if (offset != maybe_null) // BLOB or VARCHAR
{
int2store(min_str + maybe_null, min_length);
int2store(max_str + maybe_null, max_length);
}
SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str);
DBUG_RETURN(tree);
DBUG_RETURN(get_mm_leaf_for_LIKE(this, param, field, key_part, type, value));
}

158
sql/opt_sargable_left.cc Normal file
View file

@ -0,0 +1,158 @@
/* Copyright (c) 2024, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
/**
@file
@brief
This file contains SargableLeft optimization
*/
#include "mariadb.h"
#include "sql_priv.h"
#include <m_ctype.h>
#include "sql_select.h"
/*
SargableLeft
============
This optimization makes conditions in forms like
LEFT(key_col, N) = 'string_const'
SUBSTRING(key_col, 1, N) = 'string_const'
sargable. The conditions take the first N characters of key_col and
compare them with a string constant.
However, producing index lookup intervals for this collation is complex
due to contractions.
Contractions
------------
A contraction is a property of collation where a sequence of multiple
characters is compared as some other character(s).
For example, in utfmb4_danish_ci, 'AA' is compared as one character 'Å'
which sorts after 'Z':
MariaDB [test]> select a from t1 order by col1;
+------+
| col1 |
+------+
| BA1 | (1)
| BC |
| BZ |
| BAA2 | (2)
+------+
Now suppose we're producing lookup ranges for condition
LEFT(col1, 2)='BA'
In addition to looking near 'BA' (1), we need to look into the area right
after 'BZ' (2), where we may find 'BAA'.
Fortunately, this was already implemented for handling LIKE conditions in
form 'key_col LIKE 'BA%'. Each collation provides like_range() call which
produces lookup range in a collation-aware way.
Differences between LIKE and LEFT=
----------------------------------
So can one reduce or even rewrite conditions with LEFT() into LIKE? No, there
are differences.
First, LIKE does character-by-character comparison, ignoring the collation's
contractions:
MariaDB [test]> select col1, col1='AA', col1 LIKE 'AA' from t1;
+------+-----------+----------------+
| col1 | col1='AA' | col1 LIKE 'AA' |
+------+-----------+----------------+
| AA | 1 | 1 |
| Å | 1 | 0 |
+------+-----------+----------------+
(However, index comparison function uses equality's comparison rules.
my_like_range() will produce an index range 'AA' <= col1 <= 'AA'. Reading rows
from it will return 'Å' as well)
Second, LEFT imposes additional constraints on the length of both parts. For
example:
- LEFT(col,2)='string-longer-than-two-chars' - is false for any value of col.
- LEFT(col,2)='A' is not equivalent to (col LIKE 'A%'), consider col='Ab'.
Take-aways
----------
- SargableLeft makes use of my_like_range() to produce index intervals.
- LEFT(col, N)='foo'
- We ignore the value of N when producing the lookup range (this may make the
range to include rows for which the predicate is false)
= For the SUBSTRING form, we only need to check that M=1 in the
SUBSTRING(col, M, N)='foo'.
*/
/*
@brief Check if this condition is sargable LEFT(key_col, N)='foo', or
similar condition with SUBSTRING().
@detail
'foo' here can be any constant we can compute during optimization,
Only equality conditions are supported.
See SargableLeft above for detals.
@param field The first argument of LEFT or SUBSTRING if sargable,
otherwise deferenced to NULL
@param value_idx The index of argument that is the prefix string
if sargable, otherwise dereferenced to -1
*/
bool Item_bool_func::with_sargable_substr(Item_field **field, int *value_idx) const
{
int func_idx, val_idx= -1;
Item **func_args, *real= NULL;
bool ret= false;
enum Functype type;
if (functype() != EQ_FUNC)
goto done;
if (args[0]->type() == FUNC_ITEM)
func_idx= 0;
else if (args[1]->type() == FUNC_ITEM)
func_idx= 1;
else
goto done;
type= ((Item_func *) args[func_idx])->functype();
if (type != SUBSTR_FUNC && type != LEFT_FUNC)
goto done;
func_args= ((Item_func *) args[func_idx])->arguments();
real= func_args[0]->real_item();
val_idx= 1 - func_idx;
if (real->type() == FIELD_ITEM &&
args[val_idx]->can_eval_in_optimize() &&
(type == LEFT_FUNC || func_args[1]->val_int() == 1))
{
ret= true;
goto done;
}
real= NULL;
val_idx= -1;
done:
if (field != NULL)
*field= (Item_field *) real;
if (value_idx != NULL)
*value_idx= val_idx;
return ret;
}

View file

@ -7089,6 +7089,17 @@ Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields,
(Item_field*) args[0]->real_item(), equal_func,
args + 1, 1, usable_tables, sargables);
}
else
{
Item_field *field= NULL;
int value_idx= -1;
/* Handle SUBSTR(key,1,N)='const', 'const'=SUBSTR(key,1,N), etc */
if (with_sargable_substr(&field, &value_idx))
{
add_key_equal_fields(join, key_fields, *and_level, this, field,
false, args + value_idx, 1, usable_tables, sargables);
}
}
if (is_local_field(args[1]))
{
add_key_equal_fields(join, key_fields, *and_level, this,

View file

@ -42,11 +42,17 @@ pkey LEFT(`txt_utf8`, 4)
0 0123
1 1234
2 2345
SELECT `pkey`, SUBSTR(`txt_utf8`, 1, 4) FROM `auto_test_local`.`tbl_a` ORDER BY SUBSTR(`txt_utf8`, 1, 4) LIMIT 3;
pkey SUBSTR(`txt_utf8`, 1, 4)
0 0123
1 1234
2 2345
connection child2_1;
SET NAMES utf8;
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %';
argument
select t0.`pkey` `pkey`,(left(t0.`txt_utf8` , 4)) `LEFT(``txt_utf8``, 4)` from `auto_test_remote`.`tbl_a` t0 order by (left(`txt_utf8` , 4)) limit 3
select t0.`pkey` `pkey`,(substr(t0.`txt_utf8` , 1 , 4)) `SUBSTR(``txt_utf8``, 1, 4)` from `auto_test_remote`.`tbl_a` t0 order by (substr(`txt_utf8` , 1 , 4)) limit 3
SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'
SELECT pkey, txt_utf8 FROM tbl_a ORDER BY pkey;
pkey txt_utf8

View file

@ -55,6 +55,7 @@ TRUNCATE TABLE mysql.general_log;
SET NAMES utf8;
--disable_ps_protocol
SELECT `pkey`, LEFT(`txt_utf8`, 4) FROM `auto_test_local`.`tbl_a` ORDER BY LEFT(`txt_utf8`, 4) LIMIT 3;
SELECT `pkey`, SUBSTR(`txt_utf8`, 1, 4) FROM `auto_test_local`.`tbl_a` ORDER BY SUBSTR(`txt_utf8`, 1, 4) LIMIT 3;
--enable_ps_protocol
--connection child2_1

View file

@ -5028,6 +5028,8 @@ int spider_db_mbase_util::print_item_func(
last_str = SPIDER_SQL_IS_NOT_NULL_STR;
last_str_length = SPIDER_SQL_IS_NOT_NULL_LEN;
break;
case Item_func::LEFT_FUNC:
case Item_func::SUBSTR_FUNC:
case Item_func::UNKNOWN_FUNC:
org_func_name= item_func->func_name_cstring();
func_name= org_func_name.str;