MDEV-30218: Incorrect optimization for rowid_filtering, correction

Enable use of Rowid Filter optimization with eq_ref access.
Use the following assumptions:
- Assume index-only access cost is 50% of non-index-only access cost.
- Take into account that "Eq_ref access cache" reduces the number of
  lookups eq_ref access will make.
  = This means the number of Rowid Filter checks is reduced also
  = Eq_ref access cost is computed using that assumption (see
    prev_record_reads() call), so we should use it in all cost '
    computations.
This commit is contained in:
Sergei Petrunia 2023-01-26 12:04:28 +03:00
parent dd24fa3063
commit c8f9bb2718
3 changed files with 73 additions and 15 deletions

View file

@ -336,7 +336,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using index condition
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where
1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (5%) Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
@ -369,6 +369,14 @@ EXPLAIN
"key_length": "4",
"used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 69,
"selectivity_pct": 4.6
},
"rows": 1,
"filtered": 4.6,
"attached_condition": "orders.o_totalprice between 200000 and 230000"
@ -381,7 +389,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using index condition
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 4.60 11.22 Using where
1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (5%) 0.11 (10%) 4.60 100.00 Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
@ -420,12 +428,25 @@ ANALYZE
"key_length": "4",
"used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 69,
"selectivity_pct": 4.6,
"r_rows": 71,
"r_lookups": 96,
"r_selectivity_pct": 10.417,
"r_buffer_size": "REPLACED",
"r_filling_time_ms": "REPLACED"
},
"r_loops": 98,
"rows": 1,
"r_rows": 1,
"r_rows": 0.1122,
"r_total_time_ms": "REPLACED",
"filtered": 4.6,
"r_filtered": 11.224,
"r_filtered": 100,
"attached_condition": "orders.o_totalprice between 200000 and 230000"
}
}
@ -569,7 +590,7 @@ l_quantity > 45 AND
o_totalprice between 180000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 509 (12%) Using index condition; Using where; Using rowid filter
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where
1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (9%) Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
@ -613,6 +634,14 @@ EXPLAIN
"key_length": "4",
"used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 139,
"selectivity_pct": 9.2667
},
"rows": 1,
"filtered": 9.2667,
"attached_condition": "orders.o_totalprice between 180000 and 230000"
@ -626,7 +655,7 @@ l_quantity > 45 AND
o_totalprice between 180000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 509 (12%) 60.00 (11%) 11.69 100.00 Using index condition; Using where; Using rowid filter
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 9.27 26.67 Using where
1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (9%) 0.27 (25%) 9.27 100.00 Using where; Using rowid filter
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
@ -681,12 +710,25 @@ ANALYZE
"key_length": "4",
"used_key_parts": ["o_orderkey"],
"ref": ["dbt3_s001.lineitem.l_orderkey"],
"rowid_filter": {
"range": {
"key": "i_o_totalprice",
"used_key_parts": ["o_totalprice"]
},
"rows": 139,
"selectivity_pct": 9.2667,
"r_rows": 144,
"r_lookups": 59,
"r_selectivity_pct": 25.424,
"r_buffer_size": "REPLACED",
"r_filling_time_ms": "REPLACED"
},
"r_loops": 60,
"rows": 1,
"r_rows": 1,
"r_rows": 0.2667,
"r_total_time_ms": "REPLACED",
"filtered": 9.2667,
"r_filtered": 26.667,
"r_filtered": 100,
"attached_condition": "orders.o_totalprice between 180000 and 230000"
}
}

View file

@ -132,7 +132,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX PRIMARY 34 test.t3.PARENTID 1 Using where
1 PRIMARY t3 ref|filter PRIMARY,FFOLDERID_IDX,CMFLDRPARNT_IDX FFOLDERID_IDX|CMFLDRPARNT_IDX 34|35 test.t3.PARENTID 1 (29%) Using where; Using rowid filter
drop table t1, t2, t3, t4;
CREATE TABLE t1 (a int(10) , PRIMARY KEY (a)) Engine=InnoDB;
INSERT INTO t1 VALUES (1),(2);

View file

@ -7902,7 +7902,27 @@ best_access_path(JOIN *join,
(s->table->file->index_flags(start_key->key,0,1) &
HA_DO_RANGE_FILTER_PUSHDOWN))
{
double rows= record_count * records;
double rows;
if (type == JT_EQ_REF)
{
/*
Treat EQ_REF access in a special way:
1. We have no cost for index-only read. Assume its cost is 50% of
the cost of the full read.
2. A regular ref access will do #record_count lookups, but eq_ref
has "lookup cache" which reduces the number of lookups made.
The estimation code uses prev_record_reads() call to estimate:
tmp = prev_record_reads(join_positions, idx, found_ref);
Set the effective number of rows from "tmp" here.
*/
keyread_tmp= tmp/ 2;
rows= tmp;
}
else
rows= record_count * records;
/*
If we use filter F with selectivity s the the cost of fetching data
@ -7945,10 +7965,6 @@ best_access_path(JOIN *join,
we cannot use filters as the cost calculation below would cause
tmp to become negative. The future resultion is to not limit
cost with worst_seek.
We cannot use filter with JT_EQ_REF as in this case 'tmp' is
number of rows from prev_record_read() and keyread_tmp is 0. These
numbers are not usable with rowid filter code.
*/
double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
if (!(records < s->worst_seeks &&
@ -7956,7 +7972,7 @@ best_access_path(JOIN *join,
trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping");
else if (access_cost_factor <= 0.0)
trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0");
else if (type != JT_EQ_REF)
else
{
filter=
table->best_range_rowid_filter_for_partial_join(start_key->key,