MDEV-19474: Histogram statistics are used even with optimizer_use_condition_selectivity=3

The issue here was histogram statistics were being used even when
the level of optimizer_use_condition_selectivity doesn't allow
usage of statistics from histogram.

The histogram statistics are read for a table only when
optimizer_use_condition_selectivity > 3. But the TABLE structure can be
stored in the internal table cache and be reused for the next query.
So in this case the histogram statistics will be available for the next query.

The fix would be to make sure to use the histogram statistics only when
optimizer_use_condition_selectivity > 3.
This commit is contained in:
Varun Gupta 2021-02-09 20:27:21 +05:30
parent e926964cb8
commit a461e4d306
5 changed files with 111 additions and 4 deletions

View file

@ -1910,5 +1910,39 @@ HEX(a) b
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
DROP TABLE t1;
#
# MDEV-19474: Histogram statistics are used even with optimizer_use_condition_selectivity=3
#
CREATE TABLE t1(a int);
INSERT INTO t1 values (1),(2),(2),(3),(4);
SET optimizer_use_condition_selectivity=4;
SET histogram_size= 255;
set use_stat_tables='preferably';
ANALYZE TABLE t1 PERSISTENT FOR ALL;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 39.84 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
SET optimizer_use_condition_selectivity=3;
# filtered should show 25 %
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 25.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
FLUSH TABLES;
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 25.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
DROP TABLE t1;
# End of 10.2 tests
set @@global.histogram_size=@save_histogram_size;

View file

@ -1920,6 +1920,40 @@ HEX(a) b
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
DROP TABLE t1;
#
# MDEV-19474: Histogram statistics are used even with optimizer_use_condition_selectivity=3
#
CREATE TABLE t1(a int);
INSERT INTO t1 values (1),(2),(2),(3),(4);
SET optimizer_use_condition_selectivity=4;
SET histogram_size= 255;
set use_stat_tables='preferably';
ANALYZE TABLE t1 PERSISTENT FOR ALL;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 39.84 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
SET optimizer_use_condition_selectivity=3;
# filtered should show 25 %
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 25.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
FLUSH TABLES;
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 25.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
DROP TABLE t1;
# End of 10.2 tests
set @@global.histogram_size=@save_histogram_size;
set optimizer_switch=@save_optimizer_switch_for_selectivity_test;

View file

@ -1302,6 +1302,33 @@ set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivi
set histogram_size=@save_histogram_size;
DROP TABLE t1;
--echo #
--echo # MDEV-19474: Histogram statistics are used even with optimizer_use_condition_selectivity=3
--echo #
CREATE TABLE t1(a int);
INSERT INTO t1 values (1),(2),(2),(3),(4);
SET optimizer_use_condition_selectivity=4;
SET histogram_size= 255;
set use_stat_tables='preferably';
ANALYZE TABLE t1 PERSISTENT FOR ALL;
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
SET optimizer_use_condition_selectivity=3;
--echo # filtered should show 25 %
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
FLUSH TABLES;
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
DROP TABLE t1;
--echo # End of 10.2 tests
#

View file

@ -3732,6 +3732,7 @@ double get_column_range_cardinality(Field *field,
if (!table->stats_is_read)
return tab_records;
THD *thd= table->in_use;
double col_nulls= tab_records * col_stats->get_nulls_ratio();
double col_non_nulls= tab_records - col_nulls;
@ -3762,7 +3763,7 @@ double get_column_range_cardinality(Field *field,
col_stats->min_max_values_are_provided())
{
Histogram *hist= &col_stats->histogram;
if (hist->is_available())
if (hist->is_usable(thd))
{
store_key_image_to_rec(field, (uchar *) min_endp->key,
field->key_length());
@ -3806,10 +3807,10 @@ double get_column_range_cardinality(Field *field,
max_mp_pos= 1.0;
Histogram *hist= &col_stats->histogram;
if (!hist->is_available())
sel= (max_mp_pos - min_mp_pos);
else
if (hist->is_usable(thd))
sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
else
sel= (max_mp_pos - min_mp_pos);
res= col_non_nulls * sel;
set_if_bigger(res, col_stats->get_avg_frequency());
}

View file

@ -212,6 +212,17 @@ public:
bool is_available() { return get_size() > 0 && get_values(); }
/*
This function checks that histograms should be usable only when
1) the level of optimizer_use_condition_selectivity > 3
2) histograms have been collected
*/
bool is_usable(THD *thd)
{
return thd->variables.optimizer_use_condition_selectivity > 3 &&
is_available();
}
void set_value(uint i, double val)
{
switch (type) {