From e021770667851233c8cda34e9360606adfd3ea0c Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Tue, 26 Nov 2024 11:28:57 +1100 Subject: [PATCH] MDEV-34911 Sargable substr(col, 1, n) = str Make Item_func_eq of the following forms sargable by updating the relevant range analysis methods: 1. substr(col, 1, n) = str 2. str = substr(col, 1, n) 3. left(col, n) = str 4. str = left(col, n) where col is a indexed column and str is a const and inexpensive item of length n. We do this by factoring out Item_func_like::get_mm_leaf() and apply it to a string obtained from escaping str and then appending a wildcard "%" to it. The addition of the two Functype enums, LEFT_FUNC and SUBSTR_FUNC, requires changes in the spider group by handler to continue handling LEFT and SUBSTR correctly. Co-authored-by: Yuchen Pei Co-authored-by: Sergei Petrunia --- libmysqld/CMakeLists.txt | 1 + mysql-test/main/func_str.result | 150 ++++++++ mysql-test/main/func_str.test | 116 +++++++ sql/CMakeLists.txt | 1 + sql/item_cmpfunc.h | 11 +- sql/item_func.h | 2 +- sql/item_strfunc.h | 2 + sql/opt_range.cc | 319 ++++++++++++------ sql/opt_sargable_left.cc | 158 +++++++++ sql/sql_select.cc | 11 + .../bugfix/r/select_with_backquote.result | 6 + .../bugfix/t/select_with_backquote.test | 1 + storage/spider/spd_db_mysql.cc | 2 + 13 files changed, 673 insertions(+), 107 deletions(-) create mode 100644 sql/opt_sargable_left.cc diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index dad7631d2ed..b1114f9c91e 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -73,6 +73,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc ../sql/net_serv.cc ../sql/opt_range.cc ../sql/opt_rewrite_date_cmp.cc ../sql/opt_rewrite_remove_casefold.cc + ../sql/opt_sargable_left.cc ../sql/opt_sum.cc ../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc ../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc diff --git a/mysql-test/main/func_str.result b/mysql-test/main/func_str.result index f73cfa0ada8..5c89b9aa6b4 100644 --- a/mysql-test/main/func_str.result +++ b/mysql-test/main/func_str.result @@ -5806,4 +5806,154 @@ coercibility(database()) 3 # # End of 11.5 tests # +# +# MDEV-34911 Make conditions SUBSTR(col, 1, n) = const_str sargable +# +create table t (c varchar(5), key (c)); +insert into t values ('ddd'), ('bbcd'), ('bba'), ('b%_cd'), ('aaa'); +explain select * from t where substr(c, 1, 2) = 'bb'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where substr(c, 1, 2) = 'bb'; +c +bba +bbcd +explain select * from t where 'bb' = substr(c, 1, 2); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where 'bb' = substr(c, 1, 2); +c +bba +bbcd +explain select * from t where left(c, 2) = 'bb'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where left(c, 2) = 'bb'; +c +bba +bbcd +explain select * from t where 'bb' = left(c, 2); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where 'bb' = left(c, 2); +c +bba +bbcd +explain select * from t where substr(c, 1, 4) = 'b%_c'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 1 Using where; Using index +select * from t where substr(c, 1, 4) = 'b%_c'; +c +b%_cd +explain select * from t where substr(c, 1, 2) = concat('b', 'b'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where substr(c, 1, 2) = concat('b', 'b'); +c +bba +bbcd +explain select * from t where substr(c, 1, 2) = substr('bb1', 1, 2); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where substr(c, 1, 2) = substr('bb1', 1, 2); +c +bba +bbcd +create table t1 (a varchar(32), b int, index(a)); +insert into t1 select seq, seq / 33 from seq_1_to_100; +explain select * from t1 where substr(a, 1, 2)='80'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 35 NULL 1 Using index condition +explain select * from t1 where substr(a, 1, b)='80'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 35 NULL 1 Using where +create view v1 as select a,b from t1; +explain select * from v1 where substr(a, 1, 2)='80'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 35 NULL 1 Using index condition +drop view v1; +drop table t1; +explain select * from t where substr(c, 1, 1) = 'bb'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where substr(c, 1, 1) = 'bb'; +c +explain select * from t where substr(c, 1, 3) = 'bb'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where substr(c, 1, 3) = 'bb'; +c +explain select * from t where substr(c, 1) = 'bb'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t range c c 8 NULL 2 Using where; Using index +select * from t where substr(c, 1) = 'bb'; +c +explain select * from t where substr(c, 1, 2) = substr(c, 2, 3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t index NULL c 8 NULL 5 Using where; Using index +select * from t where substr(c, 1, 2) = substr(c, 2, 3); +c +aaa +ddd +drop index c on t; +explain select * from t where substr(c, 1, 2) = 'bb'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t ALL NULL NULL NULL NULL 5 Using where +select * from t where substr(c, 1, 2) = 'bb'; +c +bbcd +bba +drop table t; +set @old_collation_connection=@@collation_connection; +SET collation_connection=utf32_czech_ci; +CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0; +INSERT INTO t1 VALUES ('%c'),('%ce'),('%cé'),('%ch'), ('c'), ('d'), ('c%'), ('c\\g'); +SELECT * FROM t1 WHERE s1 LIKE '\\%c%'; +s1 +%c +%ce +%cé +%ch +SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c'; +s1 +%c +%ce +%cé +%ch +SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%'; +s1 +c\g +SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\'; +s1 +c\g +ALTER TABLE t1 ADD KEY s1 (s1); +SELECT * FROM t1 WHERE s1 LIKE '\\%c%'; +s1 +%c +%ce +%cé +%ch +SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c'; +s1 +%c +%ce +%cé +%ch +SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%'; +s1 +c\g +SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\'; +s1 +c\g +DROP TABLE t1; +set collation_connection=@old_collation_connection; +create table t1 (a varchar(100), b varchar(100), key(a)); +insert into t1 select seq, seq from seq_1_to_1000; +explain select * from t1 where LEFT(a, 20) ='%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 1000 Using where +drop table t1; +# +# End of 11.8 tests +# ALTER DATABASE test CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci; diff --git a/mysql-test/main/func_str.test b/mysql-test/main/func_str.test index ff3fbb00d98..0fbefac6bc2 100644 --- a/mysql-test/main/func_str.test +++ b/mysql-test/main/func_str.test @@ -2712,4 +2712,120 @@ SELECT --echo # End of 11.5 tests --echo # +--echo # +--echo # MDEV-34911 Make conditions SUBSTR(col, 1, n) = const_str sargable +--echo # + +create table t (c varchar(5), key (c)); +insert into t values ('ddd'), ('bbcd'), ('bba'), ('b%_cd'), ('aaa'); + +# positive cases + +let $query= +select * from t where substr(c, 1, 2) = 'bb'; +eval explain $query; +eval $query; + +let $query= +select * from t where 'bb' = substr(c, 1, 2); +eval explain $query; +eval $query; + +let $query= +select * from t where left(c, 2) = 'bb'; +eval explain $query; +eval $query; + +let $query= +select * from t where 'bb' = left(c, 2); +eval explain $query; +eval $query; + +let $query= +select * from t where substr(c, 1, 4) = 'b%_c'; +eval explain $query; +eval $query; + +# RHS can_eval_in_optimize() +let $query= +select * from t where substr(c, 1, 2) = concat('b', 'b'); +eval explain $query; +eval $query; + +# RHS can_eval_in_optimize() +let $query= +select * from t where substr(c, 1, 2) = substr('bb1', 1, 2); +eval explain $query; +eval $query; + +# works with view +create table t1 (a varchar(32), b int, index(a)); +insert into t1 select seq, seq / 33 from seq_1_to_100; +explain select * from t1 where substr(a, 1, 2)='80'; +# The range is a necessary condition, and further filtering happens in +# the exec stage +explain select * from t1 where substr(a, 1, b)='80'; +create view v1 as select a,b from t1; +explain select * from v1 where substr(a, 1, 2)='80'; +drop view v1; +drop table t1; + +# negative cases + +let $query= +select * from t where substr(c, 1, 1) = 'bb'; +eval explain $query; +eval $query; + +let $query= +select * from t where substr(c, 1, 3) = 'bb'; +eval explain $query; +eval $query; + +let $query= +select * from t where substr(c, 1) = 'bb'; +eval explain $query; +eval $query; + +let $query= +select * from t where substr(c, 1, 2) = substr(c, 2, 3); +eval explain $query; +eval $query; + +# full table scan when no index on sargable substr +drop index c on t; + +let $query= +select * from t where substr(c, 1, 2) = 'bb'; +eval explain $query; +eval $query; + +drop table t; + +# czech has the same behaviour as in LIKE +set @old_collation_connection=@@collation_connection; +SET collation_connection=utf32_czech_ci; +CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0; +INSERT INTO t1 VALUES ('%c'),('%ce'),('%cé'),('%ch'), ('c'), ('d'), ('c%'), ('c\\g'); +SELECT * FROM t1 WHERE s1 LIKE '\\%c%'; +SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c'; +SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%'; +SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\'; +ALTER TABLE t1 ADD KEY s1 (s1); +SELECT * FROM t1 WHERE s1 LIKE '\\%c%'; +SELECT * FROM t1 WHERE substr(s1, 1, 2) = '%c'; +SELECT * FROM t1 WHERE s1 LIKE 'c\\\\%'; +SELECT * FROM t1 WHERE substr(s1, 1, 2) = 'c\\'; +DROP TABLE t1; +set collation_connection=@old_collation_connection; + +# no stack overrun escaping long string +create table t1 (a varchar(100), b varchar(100), key(a)); +insert into t1 select seq, seq from seq_1_to_1000; +explain select * from t1 where LEFT(a, 20) ='%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'; +drop table t1; + +--echo # +--echo # End of 11.8 tests +--echo # --source include/test_db_charset_restore.inc diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 95a83d12440..fca1a2f5381 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -116,6 +116,7 @@ SET (SQL_SOURCE opt_range.cc vector_mhnsw.cc opt_rewrite_date_cmp.cc opt_rewrite_remove_casefold.cc + opt_sargable_left.cc opt_sum.cc ../sql-common/pack.c parse_file.cc password.c procedure.cc protocol.cc records.cc repl_failsafe.cc rpl_filter.cc diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index d439776ca82..36410752a55 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -248,6 +248,7 @@ public: bool fix_length_and_dec(THD *thd) override { decimals=0; max_length=1; return FALSE; } decimal_digits_t decimal_precision() const override { return 1; } bool need_parentheses_in_default() override { return true; } + bool with_sargable_substr(Item_field **field = NULL, int *value_idx = NULL) const; }; @@ -540,8 +541,14 @@ public: may succeed. */ if (!(ftree= get_full_func_mm_tree_for_args(param, args[0], args[1])) && - !(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0]))) - ftree= Item_func::get_mm_tree(param, cond_ptr); + !(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0])) && + !(ftree= Item_func::get_mm_tree(param, cond_ptr))) + { + Item_field *field= NULL; + int value_idx= -1; + if (with_sargable_substr(&field, &value_idx)) + DBUG_RETURN(get_full_func_mm_tree_for_args(param, field, args[value_idx])); + } DBUG_RETURN(ftree); } }; diff --git a/sql/item_func.h b/sql/item_func.h index ca977b78d74..39c8f7175a4 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -106,7 +106,7 @@ public: JSON_EXTRACT_FUNC, JSON_VALID_FUNC, ROWNUM_FUNC, CASE_SEARCHED_FUNC, // Used by ColumnStore/Spider CASE_SIMPLE_FUNC, // Used by ColumnStore/spider, - DATE_FUNC, YEAR_FUNC + DATE_FUNC, YEAR_FUNC, SUBSTR_FUNC, LEFT_FUNC }; /* diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 824902a0d0b..27751210a8a 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -693,6 +693,7 @@ public: bool hash_not_null(Hasher *hasher) override; String *val_str(String *) override; bool fix_length_and_dec(THD *thd) override; + enum Functype functype() const override { return LEFT_FUNC; } LEX_CSTRING func_name_cstring() const override { static LEX_CSTRING name= {STRING_WITH_LEN("left") }; @@ -739,6 +740,7 @@ public: print_sql_mode_qualified_name(str, query_type); print_args_parenthesized(str, query_type); } + enum Functype functype() const override { return SUBSTR_FUNC; } LEX_CSTRING func_name_cstring() const override { static LEX_CSTRING name= {STRING_WITH_LEN("substr") }; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 27744790161..a3b3b292009 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8983,6 +8983,205 @@ static bool is_field_an_unique_index(Field *field) } +/* + @brief + Given a string, escape the LIKE pattern characters (%, _, \) with the '\'. + + @detail + Currently we fail if the escaped string didn't fit into MAX_FIELD_WIDTH + bytes but this is not necessary. +*/ + +static bool escape_like_characters(String *res) +{ + CHARSET_INFO *cs= res->charset(); + StringBuffer tmp2(cs); + tmp2.copy(*res); + int ret; + uchar *src= (uchar *) tmp2.ptr(), *src_end= (uchar *) tmp2.end(), + *dst= (uchar *) res->ptr(), *dst_end= dst + MAX_FIELD_WIDTH; + my_wc_t wc; + while (src < src_end) + { + /* Advance to the next character */ + if ((ret= my_ci_mb_wc(cs, &wc, src, src_end)) <= 0) + { + if (ret == MY_CS_ILSEQ) /* Bad sequence */ + return true; /* Cannot LIKE optimize */ + break; /* End of the string */ + } + src+= ret; + + /* If the next char is escape-able in actual LIKE, escape it */ + if (wc == (my_wc_t) '%' || wc == (my_wc_t) '_' || wc == (my_wc_t) '\\') + { + if ((ret= my_ci_wc_mb(cs, (my_wc_t) '\\', dst, dst_end)) <= 0) + return true; /* No space - no LIKE optimize */ + dst+= ret; + } + if ((ret= my_ci_wc_mb(cs, wc, dst, dst_end)) <= 0) + return true; /* No space - no LIKE optimize */ + dst+= ret; + } + res->length((char *) dst - res->ptr()); + return false; /* Ok */ +} + + +/* + @brief + Produce SEL_ARG interval for LIKE and prefix match functions. + + @detail + This is used for conditions in forms: + + - key_col LIKE 'sargable_pattern' + - SUBSTR(key_col, 1, ...) = 'value', or equivalent conditions involving + LEFT() instead of SUBSTR() - see with_sargable_substr() for details. + + @param + item The comparison item (Item_func_like or Item_func_eq) +*/ + +static SEL_ARG * +get_mm_leaf_for_LIKE(Item_bool_func *item, RANGE_OPT_PARAM *param, + Field *field, KEY_PART *key_part, + Item_func::Functype type, Item *value) +{ + DBUG_ENTER("get_mm_leaf_for_sargable"); + DBUG_ASSERT(value); + + if (key_part->image_type != Field::itRAW) + DBUG_RETURN(0); + + uint keynr= param->real_keynr[key_part->key]; + if (param->using_real_indexes && + !field->optimize_range(keynr, key_part->part)) + DBUG_RETURN(0); + + if (field->result_type() == STRING_RESULT && + field->charset() != item->compare_collation()) + { + /* + For equalities where one side is LEFT or SUBSTR + param->note_unusable_keys is BITMAP_EXCEPT_ANY_EQUALITY and the + following if condition is satisfied. But it will not result in + duplicate warnings because the ref optimizer does not cover this + case. + */ + if (param->note_unusable_keys & Item_func::BITMAP_LIKE) + field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, + item->func_name_cstring(), + item->compare_collation(), + value, + Data_type_compatibility:: + INCOMPATIBLE_COLLATION); + DBUG_RETURN(0); + } + + StringBuffer tmp(value->collation.collation); + String *res; + + if (!(res= value->val_str(&tmp))) + DBUG_RETURN(&null_element); + + if (field->cmp_type() != STRING_RESULT || + field->type_handler() == &type_handler_enum || + field->type_handler() == &type_handler_set) + { + if (param->note_unusable_keys & Item_func::BITMAP_LIKE) + field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, + item->func_name_cstring(), + item->compare_collation(), + value, + Data_type_compatibility:: + INCOMPATIBLE_DATA_TYPE); + DBUG_RETURN(0); + } + + /* + TODO: + Check if this was a function. This should have be optimized away + in the sql_select.cc + */ + if (res != &tmp) + { + tmp.copy(*res); // Get own copy + res= &tmp; + } + + /* + If we're handling a predicate in one of these forms: + - LEFT(key_col, N) ='string_const' + - SUBSTRING(key_col, 1, N)='string_const' + + then we need to: + - escape the LIKE pattern characters in the string_const, + - make the search pattern to be 'string_const%': + */ + if (type != Item_func::LIKE_FUNC) + { + DBUG_ASSERT(type == Item_func::EQ_FUNC); + if (escape_like_characters(res)) + DBUG_RETURN(0); /* Error, no optimization */ + res->append("%", 1); + } + + uint maybe_null= (uint) field->real_maybe_null(); + size_t field_length= field->pack_length() + maybe_null; + size_t offset= maybe_null; + size_t length= key_part->store_length; + + if (length != key_part->length + maybe_null) + { + /* key packed with length prefix */ + offset+= HA_KEY_BLOB_LENGTH; + field_length= length - HA_KEY_BLOB_LENGTH; + } + else + { + if (unlikely(length < field_length)) + { + /* + This can only happen in a table created with UNIREG where one key + overlaps many fields + */ + length= field_length; + } + else + field_length= length; + } + length+= offset; + uchar *min_str,*max_str; + if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2))) + DBUG_RETURN(0); + max_str= min_str + length; + if (maybe_null) + max_str[0]= min_str[0]=0; + + size_t min_length, max_length; + field_length-= maybe_null; + /* If the item is a LIKE, use its escape, otherwise use backslash */ + int escape= type == Item_func::LIKE_FUNC ? + ((Item_func_like *) item)->escape : '\\'; + if (field->charset()->like_range(res->ptr(), res->length(), + escape, wild_one, wild_many, + field_length, + (char*) min_str + offset, + (char*) max_str + offset, + &min_length, &max_length)) + DBUG_RETURN(0); // Can't optimize with LIKE + + if (offset != maybe_null) // BLOB or VARCHAR + { + int2store(min_str + maybe_null, min_length); + int2store(max_str + maybe_null, max_length); + } + SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str); + DBUG_RETURN(tree); +} + + SEL_TREE * Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field, Item_func::Functype type, Item *value) @@ -8995,6 +9194,9 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field, KEY_PART *end = param->key_parts_end; SEL_TREE *tree=0; table_map value_used_tables= 0; + bool know_sargable_substr= false; + bool sargable_substr; // protected by know_sargable_substr + if (value && (value_used_tables= value->used_tables()) & ~(param->prev_tables | param->read_tables)) @@ -9019,7 +9221,18 @@ Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field, */ MEM_ROOT *tmp_root= param->mem_root; param->thd->mem_root= param->old_root; - sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value); + if (!know_sargable_substr) + { + sargable_substr= with_sargable_substr(); + know_sargable_substr= true; + } + if (sargable_substr) + { + sel_arg= get_mm_leaf_for_LIKE(this, param, key_part->field, key_part, + type, value); + } + else + sel_arg= get_mm_leaf(param, key_part->field, key_part, type, value); param->thd->mem_root= tmp_root; if (!sel_arg) @@ -9087,109 +9300,7 @@ Item_func_like::get_mm_leaf(RANGE_OPT_PARAM *param, Item_func::Functype type, Item *value) { DBUG_ENTER("Item_func_like::get_mm_leaf"); - DBUG_ASSERT(value); - - if (key_part->image_type != Field::itRAW) - DBUG_RETURN(0); - - uint keynr= param->real_keynr[key_part->key]; - if (param->using_real_indexes && - !field->optimize_range(keynr, key_part->part)) - DBUG_RETURN(0); - - if (field->result_type() == STRING_RESULT && - field->charset() != compare_collation()) - { - if (param->note_unusable_keys & BITMAP_LIKE) - field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, - func_name_cstring(), - compare_collation(), - value, - Data_type_compatibility:: - INCOMPATIBLE_COLLATION); - DBUG_RETURN(0); - } - - StringBuffer tmp(value->collation.collation); - String *res; - - if (!(res= value->val_str(&tmp))) - DBUG_RETURN(&null_element); - - if (field->cmp_type() != STRING_RESULT || - field->type_handler() == &type_handler_enum || - field->type_handler() == &type_handler_set) - { - if (param->note_unusable_keys & BITMAP_LIKE) - field->raise_note_cannot_use_key_part(param->thd, keynr, key_part->part, - func_name_cstring(), - compare_collation(), - value, - Data_type_compatibility:: - INCOMPATIBLE_DATA_TYPE); - DBUG_RETURN(0); - } - - /* - TODO: - Check if this was a function. This should have be optimized away - in the sql_select.cc - */ - if (res != &tmp) - { - tmp.copy(*res); // Get own copy - res= &tmp; - } - - uint maybe_null= (uint) field->real_maybe_null(); - size_t field_length= field->pack_length() + maybe_null; - size_t offset= maybe_null; - size_t length= key_part->store_length; - - if (length != key_part->length + maybe_null) - { - /* key packed with length prefix */ - offset+= HA_KEY_BLOB_LENGTH; - field_length= length - HA_KEY_BLOB_LENGTH; - } - else - { - if (unlikely(length < field_length)) - { - /* - This can only happen in a table created with UNIREG where one key - overlaps many fields - */ - length= field_length; - } - else - field_length= length; - } - length+= offset; - uchar *min_str,*max_str; - if (!(min_str= (uchar*) alloc_root(param->mem_root, length*2))) - DBUG_RETURN(0); - max_str= min_str + length; - if (maybe_null) - max_str[0]= min_str[0]=0; - - size_t min_length, max_length; - field_length-= maybe_null; - if (field->charset()->like_range(res->ptr(), res->length(), - escape, wild_one, wild_many, - field_length, - (char*) min_str + offset, - (char*) max_str + offset, - &min_length, &max_length)) - DBUG_RETURN(0); // Can't optimize with LIKE - - if (offset != maybe_null) // BLOB or VARCHAR - { - int2store(min_str + maybe_null, min_length); - int2store(max_str + maybe_null, max_length); - } - SEL_ARG *tree= new (param->mem_root) SEL_ARG(field, min_str, max_str); - DBUG_RETURN(tree); + DBUG_RETURN(get_mm_leaf_for_LIKE(this, param, field, key_part, type, value)); } diff --git a/sql/opt_sargable_left.cc b/sql/opt_sargable_left.cc new file mode 100644 index 00000000000..6ae10e3701e --- /dev/null +++ b/sql/opt_sargable_left.cc @@ -0,0 +1,158 @@ +/* Copyright (c) 2024, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/** + @file + + @brief + This file contains SargableLeft optimization +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include +#include "sql_select.h" + +/* + SargableLeft + ============ + + This optimization makes conditions in forms like + + LEFT(key_col, N) = 'string_const' + SUBSTRING(key_col, 1, N) = 'string_const' + + sargable. The conditions take the first N characters of key_col and + compare them with a string constant. + However, producing index lookup intervals for this collation is complex + due to contractions. + + Contractions + ------------ + A contraction is a property of collation where a sequence of multiple + characters is compared as some other character(s). + For example, in utfmb4_danish_ci, 'AA' is compared as one character 'Å' + which sorts after 'Z': + + MariaDB [test]> select a from t1 order by col1; + +------+ + | col1 | + +------+ + | BA1 | (1) + | BC | + | BZ | + | BAA2 | (2) + +------+ + + Now suppose we're producing lookup ranges for condition + + LEFT(col1, 2)='BA' + + In addition to looking near 'BA' (1), we need to look into the area right + after 'BZ' (2), where we may find 'BAA'. + + Fortunately, this was already implemented for handling LIKE conditions in + form 'key_col LIKE 'BA%'. Each collation provides like_range() call which + produces lookup range in a collation-aware way. + + Differences between LIKE and LEFT= + ---------------------------------- + So can one reduce or even rewrite conditions with LEFT() into LIKE? No, there + are differences. + + First, LIKE does character-by-character comparison, ignoring the collation's + contractions: + + MariaDB [test]> select col1, col1='AA', col1 LIKE 'AA' from t1; + +------+-----------+----------------+ + | col1 | col1='AA' | col1 LIKE 'AA' | + +------+-----------+----------------+ + | AA | 1 | 1 | + | Å | 1 | 0 | + +------+-----------+----------------+ + + (However, index comparison function uses equality's comparison rules. + my_like_range() will produce an index range 'AA' <= col1 <= 'AA'. Reading rows + from it will return 'Å' as well) + + Second, LEFT imposes additional constraints on the length of both parts. For + example: + - LEFT(col,2)='string-longer-than-two-chars' - is false for any value of col. + - LEFT(col,2)='A' is not equivalent to (col LIKE 'A%'), consider col='Ab'. + + Take-aways + ---------- + - SargableLeft makes use of my_like_range() to produce index intervals. + - LEFT(col, N)='foo' + - We ignore the value of N when producing the lookup range (this may make the + range to include rows for which the predicate is false) + = For the SUBSTRING form, we only need to check that M=1 in the + SUBSTRING(col, M, N)='foo'. +*/ + + +/* + @brief Check if this condition is sargable LEFT(key_col, N)='foo', or + similar condition with SUBSTRING(). + + @detail + 'foo' here can be any constant we can compute during optimization, + Only equality conditions are supported. + See SargableLeft above for detals. + + @param field The first argument of LEFT or SUBSTRING if sargable, + otherwise deferenced to NULL + @param value_idx The index of argument that is the prefix string + if sargable, otherwise dereferenced to -1 +*/ + +bool Item_bool_func::with_sargable_substr(Item_field **field, int *value_idx) const +{ + int func_idx, val_idx= -1; + Item **func_args, *real= NULL; + bool ret= false; + enum Functype type; + if (functype() != EQ_FUNC) + goto done; + if (args[0]->type() == FUNC_ITEM) + func_idx= 0; + else if (args[1]->type() == FUNC_ITEM) + func_idx= 1; + else + goto done; + type= ((Item_func *) args[func_idx])->functype(); + if (type != SUBSTR_FUNC && type != LEFT_FUNC) + goto done; + func_args= ((Item_func *) args[func_idx])->arguments(); + real= func_args[0]->real_item(); + val_idx= 1 - func_idx; + if (real->type() == FIELD_ITEM && + args[val_idx]->can_eval_in_optimize() && + (type == LEFT_FUNC || func_args[1]->val_int() == 1)) + { + ret= true; + goto done; + } + real= NULL; + val_idx= -1; +done: + if (field != NULL) + *field= (Item_field *) real; + if (value_idx != NULL) + *value_idx= val_idx; + return ret; +} + + diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 32a53348cd1..8d995ce8faa 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7089,6 +7089,17 @@ Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields, (Item_field*) args[0]->real_item(), equal_func, args + 1, 1, usable_tables, sargables); } + else + { + Item_field *field= NULL; + int value_idx= -1; + /* Handle SUBSTR(key,1,N)='const', 'const'=SUBSTR(key,1,N), etc */ + if (with_sargable_substr(&field, &value_idx)) + { + add_key_equal_fields(join, key_fields, *and_level, this, field, + false, args + value_idx, 1, usable_tables, sargables); + } + } if (is_local_field(args[1])) { add_key_equal_fields(join, key_fields, *and_level, this, diff --git a/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result b/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result index e6d4ac38862..7fe798b6e39 100644 --- a/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result +++ b/storage/spider/mysql-test/spider/bugfix/r/select_with_backquote.result @@ -42,11 +42,17 @@ pkey LEFT(`txt_utf8`, 4) 0 0123 1 1234 2 2345 +SELECT `pkey`, SUBSTR(`txt_utf8`, 1, 4) FROM `auto_test_local`.`tbl_a` ORDER BY SUBSTR(`txt_utf8`, 1, 4) LIMIT 3; +pkey SUBSTR(`txt_utf8`, 1, 4) +0 0123 +1 1234 +2 2345 connection child2_1; SET NAMES utf8; SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %'; argument select t0.`pkey` `pkey`,(left(t0.`txt_utf8` , 4)) `LEFT(``txt_utf8``, 4)` from `auto_test_remote`.`tbl_a` t0 order by (left(`txt_utf8` , 4)) limit 3 +select t0.`pkey` `pkey`,(substr(t0.`txt_utf8` , 1 , 4)) `SUBSTR(``txt_utf8``, 1, 4)` from `auto_test_remote`.`tbl_a` t0 order by (substr(`txt_utf8` , 1 , 4)) limit 3 SELECT argument FROM mysql.general_log WHERE command_type != 'Execute' AND argument LIKE '%select %' SELECT pkey, txt_utf8 FROM tbl_a ORDER BY pkey; pkey txt_utf8 diff --git a/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test b/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test index 3713befa3fc..f90cc16e9cd 100644 --- a/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test +++ b/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test @@ -55,6 +55,7 @@ TRUNCATE TABLE mysql.general_log; SET NAMES utf8; --disable_ps_protocol SELECT `pkey`, LEFT(`txt_utf8`, 4) FROM `auto_test_local`.`tbl_a` ORDER BY LEFT(`txt_utf8`, 4) LIMIT 3; +SELECT `pkey`, SUBSTR(`txt_utf8`, 1, 4) FROM `auto_test_local`.`tbl_a` ORDER BY SUBSTR(`txt_utf8`, 1, 4) LIMIT 3; --enable_ps_protocol --connection child2_1 diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index 39b024e861e..5d18246f867 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -5028,6 +5028,8 @@ int spider_db_mbase_util::print_item_func( last_str = SPIDER_SQL_IS_NOT_NULL_STR; last_str_length = SPIDER_SQL_IS_NOT_NULL_LEN; break; + case Item_func::LEFT_FUNC: + case Item_func::SUBSTR_FUNC: case Item_func::UNKNOWN_FUNC: org_func_name= item_func->func_name_cstring(); func_name= org_func_name.str;