mariadb/mysql-test/main/analyze.result
Monty a6bf4b5807 MDEV-29693 ANALYZE TABLE still flushes table definition cache when engine-independent statistics is used
This commits enables reloading of engine-independent statistics
without flushing the table from table definition cache.

This is achieved by allowing multiple version of the
TABLE_STATISTICS_CB object and having independent pointers to it in
TABLE and TABLE_SHARE.  The TABLE_STATISTICS_CB object have reference
pointers and are freed when no one is pointing to it anymore.

TABLE's TABLE_STATISTICS_CB pointer is updated to use the
TABLE_SHARE's pointer when read_statistics_for_tables() is called at
the beginning of a query.

Main changes:
- read_statistics_for_table() will allocate an new TABLE_STATISTICS_CB
  object.
- All get_stat_values() functions has a new parameter that tells
  where collected data should be stored. get_stat_values() are not
  using the table_field object anymore to store data.
- All get_stat_values() functions returns 1 if they found any
  data in the statistics tables.

Other things:
- Fixed INSERT DELAYED to not read statistics tables.
- Removed Statistics_state from TABLE_STATISTICS_CB as this is not
  needed anymore as wer are not changing TABLE_SHARE->stats_cb while
  calculating or loading statistics.
- Store values used with store_from_statistical_minmax_field() in
  TABLE_STATISTICS_CB::mem_root. This allowed me to remove the function
  delete_stat_values_for_table_share().
  - Field_blob::store_from_statistical_minmax_field() is implemented
    but is not normally used as we do not yet support EIS statistics
    for blobs. For example Field_blob::update_min() and
    Field_blob::update_max() are not implemented.
    Note that the function can be called if there is an concurrent
    "ALTER TABLE MODIFY field BLOB" running because of a bug in
    ALTER TABLE where it deletes entries from column_stats
    before it has an exclusive lock on the table.
- Use result of field->val_str(&val) as a pointer to the result
  instead of val (safetly fix).
- Allocate memory for collected statistics in THD::mem_root, not in
  in TABLE::mem_root. This could cause the TABLE object to grow if a
  ANALYZE TABLE was run many times on the same table.
  This was done in allocate_statistics_for_table(),
  create_min_max_statistical_fields_for_table() and
  create_min_max_statistical_fields_for_table_share().
- Store in TABLE_STATISTICS_CB::stats_available which statistics was
  found in the statistics tables.
- Removed index_table from class Index_prefix_calc as it was not used.
- Added TABLE_SHARE::LOCK_statistics to ensure we don't load EITS
  in parallel. First thread will load it, others will reuse the
  loaded data.
- Eliminate read_histograms_for_table(). The loading happens within
  read_statistics_for_tables() if histograms are needed.
  One downside is that if we have read statistics without histograms
  before and someone requires histograms, we have to read all statistics
  again (once) from the statistics tables.
  A smaller downside is the need to call alloc_root() for each
  individual histogram. Before we could allocate all the space for
  histograms with a single alloc_root.
- Fixed bug in MyISAM and Aria where they did not properly notice
  that table had changed after analyze table. This was not a problem
  before this patch as then the MyISAM and Aria tables where flushed
  as part of ANALYZE table which did hide this issue.
- Fixed a bug in ANALYZE table where table->records could be seen as 0
  in collect_statistics_for_table(). The effect of this unlikely bug
  was that a full table scan could be done even if
  analyze_sample_percentage was not set to 1.
- Changed multiple mallocs in a row to use multi_alloc_root().
- Added a mutex protection in update_statistics_for_table() to ensure
  that several tables are not updating the statistics at the same time.

Some of the changes in sql_statistics.cc are based on a patch from
Oleg Smirnov <olernov@gmail.com>

Co-authored-by: Oleg Smirnov <olernov@gmail.com>
Co-authored-by: Vicentiu Ciorbaru <cvicentiu@gmail.com>
Reviewer: Sergei Petrunia <sergey@mariadb.com>
2023-08-18 13:28:39 +03:00

101 lines
3.8 KiB
Text

create table t1 (a bigint);
lock tables t1 write;
insert into t1 values(0);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
unlock tables;
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
create table t1 (a bigint);
insert into t1 values(0);
lock tables t1 write;
delete from t1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
unlock tables;
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
create table t1 (a bigint);
insert into t1 values(0);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
create table t1 (a mediumtext, fulltext key key1(a)) charset utf8 collate utf8_general_ci engine myisam;
insert into t1 values ('hello');
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 'a'
test.t1 analyze status OK
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 'a'
test.t1 analyze status Table is already up to date
drop table t1;
CREATE TABLE t1 (a int);
prepare stmt1 from "SELECT * FROM t1 PROCEDURE ANALYSE()";
execute stmt1;
Field_name Min_value Max_value Min_length Max_length Empties_or_zeros Nulls Avg_value_or_avg_length Std Optimal_fieldtype
execute stmt1;
Field_name Min_value Max_value Min_length Max_length Empties_or_zeros Nulls Avg_value_or_avg_length Std Optimal_fieldtype
deallocate prepare stmt1;
drop table t1;
create temporary table t1(a int, index(a));
insert into t1 values('1'),('2'),('3'),('4'),('5');
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored
t1 1 a 1 a A 5 NULL NULL YES BTREE NO
drop table t1;
End of 4.1 tests
create table t1(a int);
analyze table t1 extended;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'extended' at line 1
optimize table t1 extended;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'extended' at line 1
drop table t1;
End of 5.0 tests
#
# Test analyze of text column (not yet supported)
#
set optimizer_use_condition_selectivity=4;
set histogram_type='single_prec_hb';
set histogram_size=255;
create table t1 (a int not null, t tinytext, tx text);
insert into t1 select seq+1, repeat('X',seq*5), repeat('X',seq*10) from seq_0_to_50;
insert into t1 select seq+100, repeat('X',5), "" from seq_1_to_10;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze Warning Engine-independent statistics are not collected for column 't'
test.t1 analyze Warning Engine-independent statistics are not collected for column 'tx'
test.t1 analyze status OK
explain select count(*) from t1 where t='XXXXXX';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 61 Using where
select column_name, min_value, max_value, hist_size from mysql.column_stats where table_name='t1';
column_name min_value max_value hist_size
a 1 110 255
drop table t1;
set use_stat_tables=default;
set histogram_type=default;
set histogram_size=default;
#
# End of 10.6 tests
#