MDEV-35616: Add basic optimizer support for virtual column

(Review input addressed)

After this patch, the optimizer can handle virtual column expressions
in WHERE/ON clauses. If the table has an indexed virtual column:

  ALTER TABLE t1
    ADD COLUMN vcol INT AS (col1+1),
    ADD INDEX idx1(vcol);

and the query uses the exact virtual column expression:

  SELECT * FROM t1 WHERE col1+1 <= 100

then the optimizer will be able use index idx1 for it.

This is achieved by walking the WHERE/ON clauses and replacing instances
of virtual column expression (like "col1+1" above) with virtual column's
Item_field (like "vcol"). The latter can be processed by the optimizer.

Replacement is considered (and done) only in items that are potentially
usable to the range optimizer.
This commit is contained in:
Sergei Petrunia 2024-11-26 14:50:41 +02:00
parent 759df4cc5f
commit 1c2a83179d
17 changed files with 1057 additions and 18 deletions

View file

@ -75,6 +75,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/opt_rewrite_remove_casefold.cc
../sql/opt_sargable_left.cc
../sql/opt_sum.cc
../sql/opt_vcol_substitution.cc
../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc
../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc
../sql/rpl_record.cc ../sql/des_key_file.cc

View file

@ -354,20 +354,20 @@ f1 gc
9 10
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > 7;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 3 Using index condition
SELECT * FROM t1 WHERE f1 + 1 = 7;
f1 gc
6 7
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 = 7;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 const gc gc 5 const 1
SELECT * FROM t1 WHERE f1 + 1 IN (7,5);
f1 gc
4 5
6 7
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 IN(7,5);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 2 Using index condition
SELECT * FROM t1 WHERE f1 + 1 BETWEEN 5 AND 7;
f1 gc
4 5
@ -375,7 +375,7 @@ f1 gc
6 7
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 BETWEEN 5 AND 7;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 3 Using index condition
# Check that expression isn't transformed for a disabled key
SELECT * FROM t1 IGNORE KEY (gc) WHERE f1 + 1 BETWEEN 5 AND 7;
f1 gc
@ -432,7 +432,7 @@ f1 gc
9 10
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > 7.0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 3 Using index condition
DROP TABLE t1;
# Pick index with proper type
CREATE TABLE t1 (f1 int,
@ -463,7 +463,7 @@ f1 gc_int gc_date
# INT column & index should be picked
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > 070707;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 18 Using where
1 SIMPLE t1 range gc_int_idx gc_int_idx 5 NULL 4 Using index condition
SELECT * FROM t1 WHERE f1 + 1 > CAST(070707 AS DATE);
f1 gc_int gc_date
101010 101011 2010-10-11
@ -473,7 +473,9 @@ f1 gc_int gc_date
# DATE column & index should be picked
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > CAST(070707 AS DATE);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 18 Using where
1 SIMPLE t1 ALL gc_int_idx NULL NULL NULL 18 Using where
Warnings:
Note 1105 Cannot use key `gc_int_idx` part[0] for lookup: `test`.`t1`.`gc_int` of type `int` > "cast(70707 as date)" of type `date`
DROP TABLE t1;
#
# BUG#21229846: WL8170: SIGNAL 11 IN JOIN::MAKE_SUM_FUNC_LIST
@ -764,14 +766,14 @@ a b
1 1
EXPLAIN SELECT a, b FROM t WHERE (a XOR b) = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ALL NULL NULL NULL NULL 4 Using where
1 SIMPLE t ref gc_xor gc_xor 5 const 2
SELECT a, b FROM t WHERE (a XOR b) = 1;
a b
0 1
1 0
EXPLAIN SELECT a FROM t WHERE (NOT a) = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ALL NULL NULL NULL NULL 4 Using where
1 SIMPLE t ref gc_not gc_not 5 const 2
SELECT a FROM t WHERE (NOT a) = 1;
a
0

View file

@ -354,20 +354,20 @@ f1 gc
9 10
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > 7;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 3 Using index condition
SELECT * FROM t1 WHERE f1 + 1 = 7;
f1 gc
6 7
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 = 7;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 const gc gc 5 const 1
SELECT * FROM t1 WHERE f1 + 1 IN (7,5);
f1 gc
4 5
6 7
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 IN(7,5);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 2 Using index condition
SELECT * FROM t1 WHERE f1 + 1 BETWEEN 5 AND 7;
f1 gc
4 5
@ -375,7 +375,7 @@ f1 gc
6 7
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 BETWEEN 5 AND 7;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 3 Using index condition
# Check that expression isn't transformed for a disabled key
SELECT * FROM t1 IGNORE KEY (gc) WHERE f1 + 1 BETWEEN 5 AND 7;
f1 gc
@ -432,7 +432,7 @@ f1 gc
9 10
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > 7.0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 range gc gc 5 NULL 3 Using index condition
DROP TABLE t1;
# Pick index with proper type
CREATE TABLE t1 (f1 int,
@ -463,7 +463,7 @@ f1 gc_int gc_date
# INT column & index should be picked
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > 070707;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 18 Using where
1 SIMPLE t1 range gc_int_idx gc_int_idx 5 NULL 4 Using index condition
SELECT * FROM t1 WHERE f1 + 1 > CAST(070707 AS DATE);
f1 gc_int gc_date
101010 101011 2010-10-11
@ -473,7 +473,9 @@ f1 gc_int gc_date
# DATE column & index should be picked
EXPLAIN SELECT * FROM t1 WHERE f1 + 1 > CAST(070707 AS DATE);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 18 Using where
1 SIMPLE t1 ALL gc_int_idx NULL NULL NULL 18 Using where
Warnings:
Note 1105 Cannot use key `gc_int_idx` part[0] for lookup: `test`.`t1`.`gc_int` of type `int` > "cast(70707 as date)" of type `date`
DROP TABLE t1;
#
# BUG#21229846: WL8170: SIGNAL 11 IN JOIN::MAKE_SUM_FUNC_LIST
@ -764,14 +766,14 @@ a b
1 1
EXPLAIN SELECT a, b FROM t WHERE (a XOR b) = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ALL NULL NULL NULL NULL 4 Using where
1 SIMPLE t ref gc_xor gc_xor 5 const 2
SELECT a, b FROM t WHERE (a XOR b) = 1;
a b
0 1
1 0
EXPLAIN SELECT a FROM t WHERE (NOT a) = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ALL NULL NULL NULL NULL 4 Using where
1 SIMPLE t ref gc_not gc_not 5 const 2
SELECT a FROM t WHERE (NOT a) = 1;
a
0

View file

@ -0,0 +1,300 @@
# Check various condition types
create table t1 (a int, vcol1 int as (a+1), index(vcol1));
insert into t1 (a) select seq from seq_1_to_100;
explain select * from t1 where a+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref vcol1 vcol1 5 const 1
# Try renaming the table
explain select * from t1 as TBL where TBL.a+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE TBL ref vcol1 vcol1 5 const 1
explain select * from t1 where a+1<=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 1 Using index condition
explain select * from t1 where a+1<2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 1 Using index condition
explain select * from t1 where a+1>100;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 6 Using index condition
explain select * from t1 where a+1>=100;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 13 Using index condition
explain select * from t1 where a+1 between 10 and 12;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 2 Using index condition
explain select * from t1 where (a+1) IS NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref vcol1 vcol1 5 const 1 Using index condition
explain select * from t1 force index(vcol1) where (a+1) IS NOT NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 100 Using index condition
explain select * from t1 where (a+1) in (1,2,3,4);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 4 Using index condition
# Check UPDATE/DELETE:
explain delete from t1 where a+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 1 Using where
explain update t1 set a=a+1 where a+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range vcol1 vcol1 5 NULL 1 Using where; Using buffer
# Try merged VIEWs:
create view v1 as select * from t1;
explain select * from v1 where a+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref vcol1 vcol1 5 const 1
create view v2 as select a as A_COL from t1;
explain select * from v2 where A_COL+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref vcol1 vcol1 5 const 1
drop view v1;
drop view v2;
set names utf8mb4;
select @@collation_connection;
@@collation_connection
utf8mb4_uca1400_ai_ci
# Check VARCHAR
create table t2 (
a varchar(32),
vcol1 varchar(32) as (concat('hello-',a)),
index(vcol1)
);
insert into t2 (a) select seq from seq_1_to_100;
select collation('aaa'), collation(vcol1) from t2 limit 1;
collation('aaa') collation(vcol1)
utf8mb4_uca1400_ai_ci utf8mb4_uca1400_ai_ci
set @tmp_trace=@@optimizer_trace;
set optimizer_trace=1;
# This won't work:
explain select * from t2 where concat('bye-', a)='hello-5';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where
# This will work:
explain select * from t2 where concat('hello-', a)='hello-5';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ref vcol1 vcol1 131 const 1 Using index condition
select
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
from
information_schema.optimizer_trace;
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
[
{
"condition": "WHERE",
"resulting_condition": "t2.vcol1 = 'hello-5'"
}
]
# Try also ON expressions
explain
select *
from t1 left join t2 on concat('hello-', t2.a)='hello-5'
where
t1.a+1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref vcol1 vcol1 5 const 1
1 SIMPLE t2 ref vcol1 vcol1 131 const 1 Using where
select
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
from
information_schema.optimizer_trace;
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
[
{
"condition": "WHERE",
"resulting_condition": "t1.vcol1 = 2"
},
{
"condition": "ON expression",
"resulting_condition": "t2.vcol1 = 'hello-5'"
}
]
create table t3 (a int);
insert into t3 values (1),(2);
explain
select *
from
t3 left join
(t1 join t2 on concat('hello-', t2.a)='hello-5' and t1.a+1=2)
on t3.a<3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 ALL NULL NULL NULL NULL 2
1 SIMPLE t1 ref vcol1 vcol1 5 const 1 Using where
1 SIMPLE t2 ref vcol1 vcol1 131 const 1 Using where
select
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
from
information_schema.optimizer_trace;
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
[
{
"condition": "ON expression",
"resulting_condition": "t3.a < 3 and t2.vcol1 = 'hello-5' and t1.vcol1 = 2"
}
]
drop table t1,t2,t3;
set optimizer_trace=@tmp_trace;
#
# Implicit type/charset conversions
#
create table t3 (
a varchar(32) collate utf8mb4_general_ci,
vcol1 int as (concat('100',a)),
vcol2 varchar(32) collate utf8mb4_unicode_ci as (concat('hello-',a)),
index(vcol1),
index(vcol2)
);
insert into t3 (a) select seq from seq_1_to_100;
# Type conversion
explain select * from t3 where concat('100', a)=10010;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 ALL NULL NULL NULL NULL 100 Using where
Warnings:
Note 1105 Cannot substitute virtual column expression concat('100',`t3`.`a`) -> vcol1 due to type mismatch
# Character set change
explain select * from t3 where concat('hello-', a)='abcd';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 ALL NULL NULL NULL NULL 100 Using where
Warnings:
Note 1105 Cannot substitute virtual column expression concat('hello-',`t3`.`a`) -> vcol2 due to collation mismatch
drop table t3;
# Try JSON_EXTRACT
create table t1 (a int, js1 blob);
insert into t1
select seq, concat('{"size":', seq, ', "color":"hue', seq ,'"}') from seq_1_to_100;
select * from t1 limit 3;
a js1
1 {"size":1, "color":"hue1"}
2 {"size":2, "color":"hue2"}
3 {"size":3, "color":"hue3"}
alter table t1 add size1 int as (cast(json_extract(js1, '$.size') as int));
alter table t1 add index(size1);
explain select * from t1 where cast(json_extract(js1,'$.size') as int)=5 ;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref size1 size1 5 const 1
alter table t1 add
color varchar(100) COLLATE utf8mb3_general_ci
as (json_unquote(json_extract(js1, '$.color')));
alter table t1 add index(color);
select * from t1 limit 3;
a js1 size1 color
1 {"size":1, "color":"hue1"} 1 hue1
2 {"size":2, "color":"hue2"} 2 hue2
3 {"size":3, "color":"hue3"} 3 hue3
# Index is used:
explain select * from t1 where json_unquote(json_extract(js1, '$.color'))='hue5';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref color color 303 const 1 Using index condition
explain select * from t1 where json_unquote(json_extract(js1, '$.color')) IS NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref color color 303 const 1 Using index condition
explain select * from t1 force index(color)
where json_unquote(json_extract(js1, '$.color')) IS NOT NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range color color 303 NULL 100 Using index condition
alter table t1 drop column color;
alter table t1 add
color2 varchar(100)
as (json_unquote(json_extract(js1, '$.color')));
alter table t1 add index(color2);
# Index is not used due to collation mismatch:
explain select * from t1 where json_unquote(json_extract(js1, '$.color'))='hue5';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
Warnings:
Note 1105 Cannot substitute virtual column expression json_unquote(json_extract(`t1`.`js1`,'$.color')) -> color2 due to collation mismatch
drop table t1;
#
# Test interplay with sargable_casefold optimization:
#
create table t1 (
a varchar(100) collate utf8mb3_general_ci,
vcol varchar(100) collate utf8mb3_general_ci as (UPPER(a)),
index(a),
index(vcol)
);
insert into t1 (a) select seq from seq_1_to_100;
# Note that possible_keys doesn't include 'vcol'.
# Sargable_casefold is applied before vcol substitution:
explain select * from t1 where UPPER(a)='abc';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref a a 303 const 1 Using index condition
explain select * from t1 ignore index(vcol) where UPPER(a)='abc';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref a a 303 const 1 Using index condition
explain select * from t1 ignore index(a) where UPPER(a)='abc';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
set optimizer_switch='sargable_casefold=off';
explain select * from t1 ignore index(a) where UPPER(a)='abc';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref vcol vcol 303 const 1 Using index condition
set optimizer_switch=default;
drop table t1;
#
# Test interplay with Sargable YEAR/DATE optimization:
#
create table t1 (
a date,
vcol int as (year(a)),
index(a),
index(vcol)
);
insert into t1 (a) select date_add('2024-01-01', interval (seq*365) day) from seq_1_to_100;
# Note that possible_keys doesn't include 'vcol'.
# Sargable Year is applied before vcol substitution:
explain format=json select * from t1 where year(a)=2025;
EXPLAIN
{
"query_block": {
"select_id": 1,
"cost": 0.002574553,
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["a"],
"key": "a",
"key_length": "4",
"used_key_parts": ["a"],
"loops": 1,
"rows": 1,
"cost": 0.002574553,
"filtered": 100,
"index_condition": "t1.a between '2025-01-01' and '2025-12-31'"
}
}
]
}
}
# Check that vcol would work if Sargable Year didn't disable it:
alter table t1
add vcol2 int as (year(a)+1),
add index(vcol2);
explain format=json select * from t1 where year(a)+1=2025;
EXPLAIN
{
"query_block": {
"select_id": 1,
"cost": 0.002024411,
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["vcol2"],
"key": "vcol2",
"key_length": "5",
"used_key_parts": ["vcol2"],
"ref": ["const"],
"loops": 1,
"rows": 1,
"cost": 0.002024411,
"filtered": 100
}
}
]
}
}
drop table t1;

View file

@ -0,0 +1,8 @@
create table t1 (a int, vcol1 int as (a+1), index(vcol1));
insert into t1 (a) select seq from seq_1_to_100;
SET SESSION debug_dbug="+d,vcol_subst_simulate_oom";
explain select * from t1 where a+1=2;
ERROR HY000: Out of memory.
SET @saved_dbug = @@SESSION.debug_dbug;
SET debug_dbug= @saved_dbug;
drop table t1;

View file

@ -0,0 +1,187 @@
--source include/have_sequence.inc
--source include/not_embedded.inc
--echo # Check various condition types
create table t1 (a int, vcol1 int as (a+1), index(vcol1));
insert into t1 (a) select seq from seq_1_to_100;
explain select * from t1 where a+1=2;
--echo # Try renaming the table
explain select * from t1 as TBL where TBL.a+1=2;
explain select * from t1 where a+1<=2;
explain select * from t1 where a+1<2;
explain select * from t1 where a+1>100;
explain select * from t1 where a+1>=100;
explain select * from t1 where a+1 between 10 and 12;
explain select * from t1 where (a+1) IS NULL;
explain select * from t1 force index(vcol1) where (a+1) IS NOT NULL;
explain select * from t1 where (a+1) in (1,2,3,4);
--echo # Check UPDATE/DELETE:
explain delete from t1 where a+1=2;
explain update t1 set a=a+1 where a+1=2;
--echo # Try merged VIEWs:
create view v1 as select * from t1;
explain select * from v1 where a+1=2;
create view v2 as select a as A_COL from t1;
explain select * from v2 where A_COL+1=2;
drop view v1;
drop view v2;
set names utf8mb4;
select @@collation_connection;
--echo # Check VARCHAR
create table t2 (
a varchar(32),
vcol1 varchar(32) as (concat('hello-',a)),
index(vcol1)
);
insert into t2 (a) select seq from seq_1_to_100;
select collation('aaa'), collation(vcol1) from t2 limit 1;
# Also check optimizer trace coverage
set @tmp_trace=@@optimizer_trace;
set optimizer_trace=1;
--echo # This won't work:
explain select * from t2 where concat('bye-', a)='hello-5';
--echo # This will work:
explain select * from t2 where concat('hello-', a)='hello-5';
--disable_view_protocol
select
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
from
information_schema.optimizer_trace;
--enable_view_protocol
--echo # Try also ON expressions
explain
select *
from t1 left join t2 on concat('hello-', t2.a)='hello-5'
where
t1.a+1=2;
--disable_view_protocol
select
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
from
information_schema.optimizer_trace;
--enable_view_protocol
create table t3 (a int);
insert into t3 values (1),(2);
explain
select *
from
t3 left join
(t1 join t2 on concat('hello-', t2.a)='hello-5' and t1.a+1=2)
on t3.a<3;
--disable_view_protocol
select
json_detailed(json_extract(trace, '$**.virtual_column_substitution'))
from
information_schema.optimizer_trace;
--enable_view_protocol
drop table t1,t2,t3;
set optimizer_trace=@tmp_trace;
--echo #
--echo # Implicit type/charset conversions
--echo #
create table t3 (
a varchar(32) collate utf8mb4_general_ci,
vcol1 int as (concat('100',a)),
vcol2 varchar(32) collate utf8mb4_unicode_ci as (concat('hello-',a)),
index(vcol1),
index(vcol2)
);
insert into t3 (a) select seq from seq_1_to_100;
--echo # Type conversion
explain select * from t3 where concat('100', a)=10010;
--echo # Character set change
explain select * from t3 where concat('hello-', a)='abcd';
drop table t3;
--echo # Try JSON_EXTRACT
create table t1 (a int, js1 blob);
insert into t1
select seq, concat('{"size":', seq, ', "color":"hue', seq ,'"}') from seq_1_to_100;
select * from t1 limit 3;
alter table t1 add size1 int as (cast(json_extract(js1, '$.size') as int));
alter table t1 add index(size1);
explain select * from t1 where cast(json_extract(js1,'$.size') as int)=5 ;
#
# JSON_UNQUOTE() returns utf8mb3_unicode_ci, even if JSON_VALID() and other
# functions seem to accept utf8mb4 characters (This is a bug, MDEV-35496)
#
# Without COLLATE clause, the default is utf8mb4_uca1400_ai_ci.
#
alter table t1 add
color varchar(100) COLLATE utf8mb3_general_ci
as (json_unquote(json_extract(js1, '$.color')));
alter table t1 add index(color);
select * from t1 limit 3;
--echo # Index is used:
explain select * from t1 where json_unquote(json_extract(js1, '$.color'))='hue5';
explain select * from t1 where json_unquote(json_extract(js1, '$.color')) IS NULL;
explain select * from t1 force index(color)
where json_unquote(json_extract(js1, '$.color')) IS NOT NULL;
alter table t1 drop column color;
alter table t1 add
color2 varchar(100)
as (json_unquote(json_extract(js1, '$.color')));
alter table t1 add index(color2);
--echo # Index is not used due to collation mismatch:
explain select * from t1 where json_unquote(json_extract(js1, '$.color'))='hue5';
drop table t1;
--echo #
--echo # Test interplay with sargable_casefold optimization:
--echo #
create table t1 (
a varchar(100) collate utf8mb3_general_ci,
vcol varchar(100) collate utf8mb3_general_ci as (UPPER(a)),
index(a),
index(vcol)
);
insert into t1 (a) select seq from seq_1_to_100;
--echo # Note that possible_keys doesn't include 'vcol'.
--echo # Sargable_casefold is applied before vcol substitution:
explain select * from t1 where UPPER(a)='abc';
explain select * from t1 ignore index(vcol) where UPPER(a)='abc';
explain select * from t1 ignore index(a) where UPPER(a)='abc';
set optimizer_switch='sargable_casefold=off';
explain select * from t1 ignore index(a) where UPPER(a)='abc';
set optimizer_switch=default;
drop table t1;
--echo #
--echo # Test interplay with Sargable YEAR/DATE optimization:
--echo #
create table t1 (
a date,
vcol int as (year(a)),
index(a),
index(vcol)
);
insert into t1 (a) select date_add('2024-01-01', interval (seq*365) day) from seq_1_to_100;
--echo # Note that possible_keys doesn't include 'vcol'.
--echo # Sargable Year is applied before vcol substitution:
explain format=json select * from t1 where year(a)=2025;
--echo # Check that vcol would work if Sargable Year didn't disable it:
alter table t1
add vcol2 int as (year(a)+1),
add index(vcol2);
explain format=json select * from t1 where year(a)+1=2025;
drop table t1;

View file

@ -0,0 +1,15 @@
--source include/have_sequence.inc
--source include/have_debug.inc
--source include/not_embedded.inc
create table t1 (a int, vcol1 int as (a+1), index(vcol1));
insert into t1 (a) select seq from seq_1_to_100;
SET SESSION debug_dbug="+d,vcol_subst_simulate_oom";
--error ER_OUT_OF_RESOURCES
explain select * from t1 where a+1=2;
SET @saved_dbug = @@SESSION.debug_dbug;
SET debug_dbug= @saved_dbug;
drop table t1;

View file

@ -118,6 +118,7 @@ SET (SQL_SOURCE
opt_rewrite_remove_casefold.cc
opt_sargable_left.cc
opt_sum.cc
opt_vcol_substitution.cc
../sql-common/pack.c parse_file.cc password.c procedure.cc
protocol.cc records.cc repl_failsafe.cc rpl_filter.cc
session_tracker.cc
@ -188,6 +189,8 @@ SET (SQL_SOURCE
json_table.cc
proxy_protocol.cc backup.cc xa.cc
socketpair.c socketpair.h
opt_vcol_substitution.h
opt_vcol_substitution.cc
${CMAKE_CURRENT_BINARY_DIR}/lex_hash.h
${CMAKE_CURRENT_BINARY_DIR}/lex_token.h
${GEN_SOURCES}

View file

@ -2463,6 +2463,9 @@ public:
bool cache_const_expr_analyzer(uchar **arg);
Item* cache_const_expr_transformer(THD *thd, uchar *arg);
bool vcol_subst_analyzer(uchar **);
virtual Item* vcol_subst_transformer(THD *thd, uchar *arg) { return this; }
virtual Item* propagate_equal_fields(THD*, const Context &, COND_EQUAL *)
{
return this;

View file

@ -626,6 +626,7 @@ public:
}
return clone;
}
Item* vcol_subst_transformer(THD *thd, uchar *arg) override;
};
/**
@ -1075,6 +1076,8 @@ public:
longlong val_int_cmp_int();
longlong val_int_cmp_real();
longlong val_int_cmp_decimal();
Item* vcol_subst_transformer(THD *thd, uchar *arg) override;
};
@ -2687,6 +2690,8 @@ public:
Item *in_predicate_to_equality_transformer(THD *thd, uchar *arg) override;
uint32 max_length_of_left_expr();
Item* varchar_upper_cmp_transformer(THD *thd, uchar *arg) override;
Item* vcol_subst_transformer(THD *thd, uchar *arg) override;
};
class cmp_item_row :public cmp_item
@ -2769,6 +2774,8 @@ public:
return FALSE;
}
bool count_sargable_conds(void *arg) override;
Item* vcol_subst_transformer(THD *thd, uchar *arg) override;
};

View file

@ -128,6 +128,8 @@ public:
BITMAP_BETWEEN= 1ULL << BETWEEN,
BITMAP_IN= 1ULL << IN_FUNC,
BITMAP_MULT_EQUAL= 1ULL << MULT_EQUAL_FUNC,
BITMAP_ISNULL= 1ULL << ISNULL_FUNC,
BITMAP_ISNOTNULL= 1ULL << ISNOTNULL_FUNC,
BITMAP_OTHER= 1ULL << 63,
BITMAP_ALL= 0xFFFFFFFFFFFFFFFFULL,
BITMAP_ANY_EQUALITY= BITMAP_EQ | BITMAP_EQUAL | BITMAP_MULT_EQUAL,

View file

@ -0,0 +1,466 @@
/*
Copyright (c) 2009, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation // gcc: Class implementation
#endif
#include "mariadb.h"
#include "sql_priv.h"
#include <m_ctype.h>
#include "sql_select.h"
#include "opt_trace.h"
/**
@file
@brief
Virtual Column Substitution feature makes the optimizer recongize usage of
virtual column expressions in the WHERE/ON clauses. If there is an index
on the virtual column, the optimizer is able construct query plans that
use that index.
*/
/*
== Virtual Column Substitution In a Nutshell ==
Consider a table that defines a virtual column and an index on it:
CREATE TABLE t1 (
json_col BLOB,
...
vcol1 VARCHAR(100) AS (json_extract(json_col, '$.name')), //(*), see below
INDEX idx1(vcol1)
);
And then a query that uses virtual column's expression:
SELECT * FROM t1 WHERE json_extract(json_col, '$.name')='foo'
We'd like this query to use index idx1.
In order to achieve that, we look through potentially sargable conditions
to find the virtual column expression (we only accept exact match) and
replace it with a reference to virtual column field so the query becomes:
SELECT * FROM t1 WHERE vcol1='foo'
Then, the optimizer is able to construct ref access on index idx1.
(*) When extracting JSON fields in the real world, you'll probably want to
use json_unquote().
== Datatypes must match ==
The type of vcol_field and vcol_expr may not match. Consider
CREATE TABLE t1 (
a varchar(10),
vcol INT as CONCAT(a,'1')
);
and conditions
concat(a,'1')=1.5 vs vcol=1.5.
== The same expression in multiple virtual columns ==
What if there are multiple options to replace:
CREATE TABLE t1 (
col1 int,
...
vcol1 INT as (col1 + 1),
vcol2 INT as (col1 + 1),
...
INDEX idx1(vcol1, ...)
INDEX idx2(vcol2, ...)
);
SELECT * FROM t1 WHERE col1 + 1 = 5;
Currently, we will replace with the first matching column (vcol1), like MySQL
does. Since we rely on the user to define the virtual columns, we can
request that they define one virtual column instead of multiple identical
ones.
*/
class Vcol_subst_context
{
public:
THD *thd;
/* Indexed virtual columns that we can try substituting */
List<Field> vcol_fields;
/*
How many times substitution was done. Used to determine whether to print
the conversion info to the Optimizer Trace
*/
uint subst_count;
Vcol_subst_context(THD *thd_arg) : thd(thd_arg) {}
};
static
bool collect_indexed_vcols_for_table(TABLE *table, List<Field> *vcol_fields)
{
// TODO: Make use of iterator to loop through keys_in_use_for_query, instead.
for (uint i=0; i < table->s->keys; i++)
{
// note: we could also support histograms here
if (!table->keys_in_use_for_query.is_set(i))
continue;
KEY *key= &table->key_info[i];
for (uint kp=0; kp < key->user_defined_key_parts; kp++)
{
Field *field= key->key_part[kp].field;
if (field->vcol_info && vcol_fields->push_back(field))
return TRUE; // Out of memory
}
}
return FALSE; // Ok
}
/*
Collect a list of indexed virtual columns in the JOIN's tables
*/
static
bool collect_indexed_vcols_for_join(JOIN *join, List<Field> *vcol_fields)
{
List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
TABLE_LIST *tl;
while ((tl= ti++))
{
if (!tl->table) // non-merged semi-join or something like that
continue;
if (collect_indexed_vcols_for_table(tl->table, vcol_fields))
return TRUE; // Out of memory
}
return FALSE; // Ok
}
/* Substitute virtual columns in an Item tree */
static void subst_vcols_in_item(Vcol_subst_context *ctx, Item *item,
const char *location)
{
uchar *yes= (uchar*) 1;
ctx->subst_count= 0;
item->top_level_compile(ctx->thd,
&Item::vcol_subst_analyzer, &yes,
&Item::vcol_subst_transformer, (uchar*)ctx);
if (ctx->subst_count && unlikely(ctx->thd->trace_started()))
trace_condition(ctx->thd, location, "virtual_column_substitution", item);
}
static
void subst_vcols_in_join_list(Vcol_subst_context *ctx,
List<TABLE_LIST> *join_list)
{
TABLE_LIST *table;
List_iterator<TABLE_LIST> li(*join_list);
while ((table= li++))
{
if (NESTED_JOIN* nested_join= table->nested_join)
subst_vcols_in_join_list(ctx, &nested_join->join_list);
if (table->on_expr)
subst_vcols_in_item(ctx, table->on_expr, "ON expression");
}
}
/*
@brief
Do substitution for all condition in a JOIN. This is the primary entry
point.
*/
bool substitute_indexed_vcols_for_join(JOIN *join)
{
Vcol_subst_context ctx(join->thd);
if (collect_indexed_vcols_for_join(join, &ctx.vcol_fields))
return true; // Out of memory
if (!ctx.vcol_fields.elements)
return false; // Ok, nothing to do
if (join->conds)
subst_vcols_in_item(&ctx, join->conds, "WHERE");
if (join->join_list)
subst_vcols_in_join_list(&ctx, join->join_list);
if (join->thd->is_error())
return true; // Out of memory
return false; // Ok
}
/*
@brief
Do substitution for one table and condition. This is for single-table
UPDATE/DELETE.
*/
bool substitute_indexed_vcols_for_table(TABLE *table, Item *item)
{
Vcol_subst_context ctx(table->in_use);
if (collect_indexed_vcols_for_table(table, &ctx.vcol_fields))
return true; // Out of memory
if (!ctx.vcol_fields.elements)
return false; // Ok, nothing to do
if (item)
subst_vcols_in_item(&ctx, item, "WHERE");
if (table->in_use->is_error())
return true; // Out of memory
return false; // Ok
}
/*
@brief
Check if passed item matches Virtual Column definition for some column in
the Vcol_subst_context list.
*/
static Field *is_vcol_expr(Vcol_subst_context *ctx, const Item *item)
{
table_map map= item->used_tables();
if ((map!=0) && !(map & OUTER_REF_TABLE_BIT) &&
!(map & (map - 1))) // has exactly one bit set
{
List_iterator<Field> it(ctx->vcol_fields);
Field *field;
while ((field= it++))
{
if (field->vcol_info->expr->eq(item, true))
return field;
}
}
return NULL;
}
/*
@brief
Produce a warning similar to raise_note_cannot_use_key_part().
*/
void print_vcol_subst_warning(THD *thd, Field *field, Item *expr,
const char *cause)
{
StringBuffer<128> expr_buffer;
size_t expr_length;
expr->print(&expr_buffer, QT_EXPLAIN);
expr_length= Well_formed_prefix(expr_buffer.charset(),
expr_buffer.ptr(),
MY_MIN(expr_buffer.length(), 64)).length();
push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
ER_UNKNOWN_ERROR,
"Cannot substitute virtual column expression %*s -> %*s "
"due to %s",
expr_length, expr_buffer.c_ptr_safe(),
(int) field->field_name.length, field->field_name.str,
cause);
}
/*
@brief
Check if we can substitute (*vcol_expr_ref) with vcol_field in condition
cond and if we can, do the substitution.
@detail
Generally, we can do the substitution if the condition after substitution
is equivalent to the condition before the substitution.
They are equivalent if the datatypes of vcol_field and *vcol_expr_ref are
the same. This requirement can be relaxed - one can come up with cases
where the datatypes are different but substitution doesn't change the
condition.
Note that the data type of the virtual column is specified by the user and
so can be totally different from virtual column's expression. For example,
one can do:
col3 INT AS (CONCAT(col1, col2))
For strings, we allow two cases:
- vcol_expr and vcol_field have the same collation
- vcol_field has the same collation as the condition's comparison collation
(Note: MySQL calls resolve_type() after it has done the substitution.
This can potentially update the comparator. The idea is that this
shouldn't be necessary as we do not want to change the comparator.
Changing the comparator will change the semantics of the condition,
our point is that this must not happen)
@return
None.
In case OOM error happens in this function, we have no way to propagate
the error out of Item::compile(). So, the code that calls Item::compile()
for vcol substitution will check thd->is_error() afterwards.
*/
static
void subst_vcol_if_compatible(Vcol_subst_context *ctx,
Item_bool_func *cond,
Item **vcol_expr_ref,
Field *vcol_field)
{
Item *vcol_expr= *vcol_expr_ref;
THD *thd= ctx->thd;
const char *fail_cause= NULL;
if (vcol_expr->type_handler_for_comparison() !=
vcol_field->type_handler_for_comparison() ||
(vcol_expr->maybe_null() && !vcol_field->maybe_null()))
fail_cause="type mismatch";
else
if (vcol_expr->collation.collation != vcol_field->charset() &&
cond->compare_collation() != vcol_field->charset())
fail_cause="collation mismatch";
if (fail_cause)
{
if (thd->give_notes_for_unusable_keys())
print_vcol_subst_warning(thd, vcol_field, vcol_expr, fail_cause);
return;
}
Item_field *itf= new (thd->mem_root) Item_field(thd, vcol_field);
if (!itf)
return; // Out of memory, caller will know from thd->is_error()
DBUG_ASSERT(itf->fixed());
thd->change_item_tree(vcol_expr_ref, itf);
ctx->subst_count++;
return;
}
/*
@brief
Do a quick and imprecise check if it makes sense to try Virtual Column
Substitutiion transformation for this item.
@detail
For vcol_expr='FOO' the item to be trans formed is the comparison item
(Item_func_eq in this example), not the item representing vcol_expr.
*/
bool Item::vcol_subst_analyzer(uchar **)
{
const ulonglong allowed_cmp_funcs=
Item_func::BITMAP_EQ |
Item_func::BITMAP_EQUAL |
Item_func::BITMAP_LT |
Item_func::BITMAP_GT |
Item_func::BITMAP_LE |
Item_func::BITMAP_GE |
Item_func::BITMAP_BETWEEN |
Item_func::BITMAP_IN |
Item_func::BITMAP_ISNULL |
Item_func::BITMAP_ISNOTNULL;
Item::Type this_type= type();
/*
Do transformation
1. Inside AND/OR
2. In selected list of comparison predicates
*/
return (this_type == Item::COND_ITEM || // (1)
(this_type == Item::FUNC_ITEM && // (2)
(((Item_func*)this)->bitmap_bit() & allowed_cmp_funcs))); // (2)
}
Item* Item_bool_rowready_func2::vcol_subst_transformer(THD *thd, uchar *arg)
{
DBUG_ASSERT(this->vcol_subst_analyzer(NULL));
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
Field *vcol_field;
Item **vcol_expr;
if (!args[0]->used_tables() && (vcol_field= is_vcol_expr(ctx, args[1])))
vcol_expr= &args[1];
else if (!args[1]->used_tables() && (vcol_field= is_vcol_expr(ctx, args[0])))
vcol_expr= &args[0];
else
return this; /* No substitution */
DBUG_EXECUTE_IF("vcol_subst_simulate_oom",
DBUG_SET("+d,simulate_out_of_memory"););
subst_vcol_if_compatible(ctx, this, vcol_expr, vcol_field);
DBUG_EXECUTE_IF("vcol_subst_simulate_oom",
DBUG_SET("-d,vcol_subst_simulate_oom"););
return this;
}
Item* Item_func_between::vcol_subst_transformer(THD *thd, uchar *arg)
{
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
Field *vcol_field;
if (!args[1]->used_tables() &&
!args[2]->used_tables() &&
(vcol_field= is_vcol_expr(ctx, args[0])))
{
subst_vcol_if_compatible(ctx, this, &args[0], vcol_field);
}
return this;
}
Item* Item_func_null_predicate::vcol_subst_transformer(THD *thd, uchar *arg)
{
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
Field *vcol_field;
if ((vcol_field= is_vcol_expr(ctx, args[0])))
subst_vcol_if_compatible(ctx, this, &args[0], vcol_field);
return this;
}
Item* Item_func_in::vcol_subst_transformer(THD *thd, uchar *arg)
{
Vcol_subst_context *ctx= (Vcol_subst_context*)arg;
/* Check that all arguments inside IN() are constants */
if (!compatible_types_scalar_bisection_possible())
return this;
Field *vcol_field;
if ((vcol_field= is_vcol_expr(ctx, args[0])))
subst_vcol_if_compatible(ctx, this, &args[0], vcol_field);
return this;
}

View file

@ -0,0 +1,29 @@
/*
Copyright (c) 2009, 2021, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
/*
Interface to indexed virtual column substitution module
*/
/* Do substitution in one join */
bool substitute_indexed_vcols_for_join(JOIN *join);
/*
Do substitution for one table and condition. This is for single-table
UPDATE/DELETE.
*/
bool substitute_indexed_vcols_for_table(TABLE *table, Item *item);

View file

@ -515,6 +515,9 @@ bool Sql_cmd_delete::delete_from_single_table(THD *thd)
(uchar *) 0);
}
if (conds && substitute_indexed_vcols_for_table(table, conds))
DBUG_RETURN(1); // Fatal error
#ifdef WITH_PARTITION_STORAGE_ENGINE
if (prune_partitions(thd, table, conds))
{

View file

@ -2413,6 +2413,12 @@ JOIN::optimize_inner()
thd, &Item::varchar_upper_cmp_transformer);
}
if (substitute_indexed_vcols_for_join(this))
{
error= 1;
DBUG_RETURN(1);
}
conds= optimize_cond(this, conds, join_list, ignore_on_expr,
&cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS);

View file

@ -2719,4 +2719,6 @@ void propagate_new_equalities(THD *thd, Item *cond,
bool dbug_user_var_equals_str(THD *thd, const char *name, const char *value);
#include "opt_vcol_substitution.h"
#endif /* SQL_SELECT_INCLUDED */

View file

@ -470,6 +470,9 @@ bool Sql_cmd_update::update_single_table(THD *thd)
(uchar *) 0);
}
if (conds && substitute_indexed_vcols_for_table(table, conds))
DBUG_RETURN(1); // Fatal error
// Don't count on usage of 'only index' when calculating which key to use
table->covering_keys.clear_all();
transactional_table= table->file->has_transactions_and_rollback();