mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 02:46:29 +01:00 
			
		
		
		
	 ef966af801
			
		
	
	
	ef966af801
	
	
	
		
			
			(Variant 3) (commit in 11.4)
When a derived table has a GROUP BY clause:
  SELECT ...
    FROM  (SELECT ... GROUP BY col1, col2) AS tbl
The optimizer would use inner join's output cardinality as an estimate
of derived table size, ignoring the fact that GROUP BY operation would
produce much fewer groups.
Add code to produce tighter bounds:
- The GROUP BY list is split into per-table lists. If GROUP BY list has
  expressions that refer to multiple tables, we fall back to join output
  cardinality.
- For each table, the first cardinality estimate is join_tab->read_records.
- Then, we try to get a tighter bound by using index statistics.
- If indexes do not cover all GROUP BY columns, we try to use per-column
  EITS statistics.
		
	
			
		
			
				
	
	
		
			213 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			213 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| #
 | |
| # MDEV-30877: Output cardinality for derived table ignores GROUP BY
 | |
| #
 | |
| create table t1 (
 | |
| groups_20 int,
 | |
| groups_20_2 int,
 | |
| b int,
 | |
| index (groups_20)
 | |
| );
 | |
| insert into t1 select seq/1000, seq/1000, seq from seq_1_to_20000;
 | |
| create table t2 (a int, b int, index(a));
 | |
| insert into t2 select seq, seq from seq_1_to_10;
 | |
| analyze table t1 persistent for all;
 | |
| Table	Op	Msg_type	Msg_text
 | |
| test.t1	analyze	status	Engine-independent statistics collected
 | |
| test.t1	analyze	status	Table is already up to date
 | |
| set optimizer_trace=1;
 | |
| # Case 1: one indexed column
 | |
| explain
 | |
| select *
 | |
| from
 | |
| t2,
 | |
| (select count(*) cnt, groups_20 from t1
 | |
| group by groups_20) TBL
 | |
| where
 | |
| 1;
 | |
| id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 | |
| 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	
 | |
| 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using join buffer (flat, BNL join)
 | |
| 2	DERIVED	t1	index	NULL	groups_20	5	NULL	20000	Using index
 | |
| select
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| from
 | |
| information_schema.optimizer_trace;
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| [
 | |
|     {
 | |
|         "join_output_cardinality": 20000,
 | |
|         "estimation": 
 | |
|         [
 | |
|             {
 | |
|                 "table": "t1",
 | |
|                 "steps": 
 | |
|                 [
 | |
|                     {
 | |
|                         "index_name": "groups_20",
 | |
|                         "cardinality": 20.99999895
 | |
|                     }
 | |
|                 ],
 | |
|                 "cardinality": 20.99999895
 | |
|             }
 | |
|         ],
 | |
|         "post_group_cardinality": 20.99999895
 | |
|     }
 | |
| ]
 | |
| # Case 2: one non- indexed column
 | |
| explain
 | |
| select *
 | |
| from
 | |
| t2,
 | |
| (select count(*) cnt, groups_20_2 from t1
 | |
| group by groups_20_2) TBL
 | |
| where
 | |
| 1;
 | |
| id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 | |
| 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	
 | |
| 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using join buffer (flat, BNL join)
 | |
| 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20000	Using temporary; Using filesort
 | |
| select
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| from
 | |
| information_schema.optimizer_trace;
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| [
 | |
|     {
 | |
|         "join_output_cardinality": 20000,
 | |
|         "estimation": 
 | |
|         [
 | |
|             {
 | |
|                 "table": "t1",
 | |
|                 "steps": 
 | |
|                 [
 | |
|                     {
 | |
|                         "column": "groups_20_2",
 | |
|                         "cardinality": 20.99999895
 | |
|                     }
 | |
|                 ],
 | |
|                 "cardinality": 20.99999895
 | |
|             }
 | |
|         ],
 | |
|         "post_group_cardinality": 20.99999895
 | |
|     }
 | |
| ]
 | |
| # Case 4: one indexed column, multiple tables
 | |
| create table t3(c int);
 | |
| insert into t3 select seq from seq_1_to_10;
 | |
| explain
 | |
| select *
 | |
| from
 | |
| t2,
 | |
| (select count(*) cnt, groups_20 from t1,t3
 | |
| group by groups_20) TBL;
 | |
| id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 | |
| 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	
 | |
| 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using join buffer (flat, BNL join)
 | |
| 2	DERIVED	t3	ALL	NULL	NULL	NULL	NULL	10	Using temporary; Using filesort
 | |
| 2	DERIVED	t1	index	NULL	groups_20	5	NULL	20000	Using index; Using join buffer (flat, BNL join)
 | |
| select
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| from
 | |
| information_schema.optimizer_trace;
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| [
 | |
|     {
 | |
|         "join_output_cardinality": 200000,
 | |
|         "estimation": 
 | |
|         [
 | |
|             {
 | |
|                 "table": "t1",
 | |
|                 "steps": 
 | |
|                 [
 | |
|                     {
 | |
|                         "index_name": "groups_20",
 | |
|                         "cardinality": 20.99999895
 | |
|                     }
 | |
|                 ],
 | |
|                 "cardinality": 20.99999895
 | |
|             }
 | |
|         ],
 | |
|         "post_group_cardinality": 20.99999895
 | |
|     }
 | |
| ]
 | |
| # Case 5: group by two tables
 | |
| explain
 | |
| select *
 | |
| from
 | |
| t2,
 | |
| (select count(*) cnt, groups_20 from t1,t3
 | |
| group by groups_20, t3.c) TBL;
 | |
| id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 | |
| 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	
 | |
| 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	209	Using join buffer (flat, BNL join)
 | |
| 2	DERIVED	t3	ALL	NULL	NULL	NULL	NULL	10	Using temporary; Using filesort
 | |
| 2	DERIVED	t1	index	NULL	groups_20	5	NULL	20000	Using index; Using join buffer (flat, BNL join)
 | |
| select
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| from
 | |
| information_schema.optimizer_trace;
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| [
 | |
|     {
 | |
|         "join_output_cardinality": 200000,
 | |
|         "estimation": 
 | |
|         [
 | |
|             {
 | |
|                 "table": "t1",
 | |
|                 "steps": 
 | |
|                 [
 | |
|                     {
 | |
|                         "index_name": "groups_20",
 | |
|                         "cardinality": 20.99999895
 | |
|                     }
 | |
|                 ],
 | |
|                 "cardinality": 20.99999895
 | |
|             },
 | |
|             {
 | |
|                 "table": "t3",
 | |
|                 "steps": 
 | |
|                 [],
 | |
|                 "cardinality": 10
 | |
|             }
 | |
|         ],
 | |
|         "post_group_cardinality": 209.9999895
 | |
|     }
 | |
| ]
 | |
| # Now, without an index
 | |
| explain
 | |
| select *
 | |
| from
 | |
| t2,
 | |
| (select count(*) cnt, groups_20 from t1 use index(),t3 group by groups_20) TBL;
 | |
| id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 | |
| 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	10	
 | |
| 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	20	Using join buffer (flat, BNL join)
 | |
| 2	DERIVED	t3	ALL	NULL	NULL	NULL	NULL	10	Using temporary; Using filesort
 | |
| 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	20000	Using join buffer (flat, BNL join)
 | |
| select
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| from
 | |
| information_schema.optimizer_trace;
 | |
| json_detailed(json_extract(trace, '$**.materialized_output_cardinality'))
 | |
| [
 | |
|     {
 | |
|         "join_output_cardinality": 200000,
 | |
|         "estimation": 
 | |
|         [
 | |
|             {
 | |
|                 "table": "t1",
 | |
|                 "steps": 
 | |
|                 [
 | |
|                     {
 | |
|                         "column": "groups_20",
 | |
|                         "cardinality": 20.99999895
 | |
|                     }
 | |
|                 ],
 | |
|                 "cardinality": 20.99999895
 | |
|             }
 | |
|         ],
 | |
|         "post_group_cardinality": 20.99999895
 | |
|     }
 | |
| ]
 | |
| set optimizer_trace=default;
 | |
| drop table t1, t2, t3;
 |