MDEV-22851: Engine independent index statistics are incorrect for large tables on Windows

An oveflow was happening on windows because on Windows sizeof(ulong) is 4 bytes
while it is 8 bytes on Linux.
Switched avg_frequency and avg length for column statistics to ulonglong.
Switched avg_frequency for index statistics to ulonglong.
This commit is contained in:
Varun Gupta 2020-07-14 12:09:10 +05:30
parent 67a03b7c94
commit dfdfeecb03
4 changed files with 67 additions and 15 deletions

View file

@ -770,4 +770,33 @@ set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selecti
set @save_optimizer_switch=@@optimizer_switch;
set use_stat_tables=@save_use_stat_tables;
set optimizer_switch=@save_optimizer_switch_for_stat_tables_test;
#
# MDEV-22851: Engine independent index statistics are incorrect for large tables on Windows.
#
CREATE TABLE t1 AS SELECT TABLE_CATALOG FROM INFORMATION_SCHEMA.COLUMNS;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
SET @save_use_stat_tables= @@use_stat_tables;
SET use_stat_tables= preferably;
SELECT count(*) FROM t1;
count(*)
54336
CREATE INDEX idx ON t1(TABLE_CATALOG);
Warnings:
Warning 1071 Specified key was too long; max key length is 767 bytes
ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (TABLE_CATALOG) INDEXES (idx);
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
SELECT * FROM mysql.index_stats where table_name='t1';
db_name table_name index_name prefix_arity avg_frequency
test t1 idx 1 NULL
SELECT * FROM mysql.column_stats where table_name='t1';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1 TABLE_CATALOG def def 0.0000 3.0000 54336.0000 0 NULL NULL
SET use_stat_tables= @save_use_stat_tables;
DROP TABLE t1;
SET SESSION STORAGE_ENGINE=DEFAULT;

View file

@ -9,4 +9,25 @@ set optimizer_switch='extended_keys=on';
set optimizer_switch=@save_optimizer_switch_for_stat_tables_test;
--echo #
--echo # MDEV-22851: Engine independent index statistics are incorrect for large tables on Windows.
--echo #
CREATE TABLE t1 AS SELECT TABLE_CATALOG FROM INFORMATION_SCHEMA.COLUMNS;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
SET @save_use_stat_tables= @@use_stat_tables;
SET use_stat_tables= preferably;
SELECT count(*) FROM t1;
CREATE INDEX idx ON t1(TABLE_CATALOG);
ANALYZE TABLE t1 PERSISTENT FOR COLUMNS (TABLE_CATALOG) INDEXES (idx);
SELECT * FROM mysql.index_stats where table_name='t1';
SELECT * FROM mysql.column_stats where table_name='t1';
SET use_stat_tables= @save_use_stat_tables;
DROP TABLE t1;
SET SESSION STORAGE_ENGINE=DEFAULT;

View file

@ -2096,8 +2096,8 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
sizeof(Index_statistics) * keys);
uint key_parts= table->s->ext_key_parts;
ulong *idx_avg_frequency= (ulong*) alloc_root(&table->mem_root,
sizeof(ulong) * key_parts);
ulonglong *idx_avg_frequency= (ulonglong*) alloc_root(&table->mem_root,
sizeof(ulonglong) * key_parts);
uint columns= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
@ -2142,7 +2142,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
}
}
memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts);
memset(idx_avg_frequency, 0, sizeof(ulonglong) * key_parts);
KEY *key_info, *end;
for (key_info= table->key_info, end= key_info + table->s->keys;
@ -2258,14 +2258,14 @@ static int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share)
}
uint key_parts= table_share->ext_key_parts;
ulong *idx_avg_frequency= table_stats->idx_avg_frequency;
ulonglong *idx_avg_frequency= table_stats->idx_avg_frequency;
if (!idx_avg_frequency)
{
idx_avg_frequency= (ulong*) alloc_root(&stats_cb->mem_root,
sizeof(ulong) * key_parts);
idx_avg_frequency= (ulonglong*) alloc_root(&stats_cb->mem_root,
sizeof(ulonglong) * key_parts);
if (idx_avg_frequency)
{
memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts);
memset(idx_avg_frequency, 0, sizeof(ulonglong) * key_parts);
table_stats->idx_avg_frequency= idx_avg_frequency;
for (key_info= table_share->key_info, end= key_info + keys;
key_info < end;

View file

@ -279,7 +279,9 @@ public:
uchar *min_max_record_buffers; /* Record buffers for min/max values */
Column_statistics *column_stats; /* Array of statistical data for columns */
Index_statistics *index_stats; /* Array of statistical data for indexes */
ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
/* Array of records per key for index prefixes */
ulonglong *idx_avg_frequency;
uchar *histograms; /* Sequence of histograms */
};
@ -331,7 +333,7 @@ private:
CHAR values are stripped of trailing spaces.
Flexible values are stripped of their length prefixes.
*/
ulong avg_length;
ulonglong avg_length;
/*
The ratio N/D multiplied by the scale factor Scale_factor_avg_frequency,
@ -339,7 +341,7 @@ private:
N is the number of rows with not null value in the column,
D the number of distinct values among them
*/
ulong avg_frequency;
ulonglong avg_frequency;
public:
@ -389,12 +391,12 @@ public:
void set_avg_length (double val)
{
avg_length= (ulong) (val * Scale_factor_avg_length);
avg_length= (ulonglong) (val * Scale_factor_avg_length);
}
void set_avg_frequency (double val)
{
avg_frequency= (ulong) (val * Scale_factor_avg_frequency);
avg_frequency= (ulonglong) (val * Scale_factor_avg_frequency);
}
bool min_max_values_are_provided()
@ -433,11 +435,11 @@ private:
in the first k components, and D is the number of distinct
k-component prefixes among them
*/
ulong *avg_frequency;
ulonglong *avg_frequency;
public:
void init_avg_frequency(ulong *ptr) { avg_frequency= ptr; }
void init_avg_frequency(ulonglong *ptr) { avg_frequency= ptr; }
bool avg_frequency_is_inited() { return avg_frequency != NULL; }
@ -448,7 +450,7 @@ public:
void set_avg_frequency(uint i, double val)
{
avg_frequency[i]= (ulong) (val * Scale_factor_avg_frequency);
avg_frequency[i]= (ulonglong) (val * Scale_factor_avg_frequency);
}
};