mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 19:06:14 +01:00 
			
		
		
		
	 15623c7f29
			
		
	
	
	15623c7f29
	
	
	
		
			
			When executing a statement of the form
  SELECT AGGR_FN(DISTINCT c1, c2,..,cn) FROM t1,
where AGGR_FN is an aggregate function such as COUNT(), AVG() or SUM(),
and a unique index exists on table t1 covering some or all of the
columns (c1, c2,..,cn), the retrieved values are inherently unique.
Consequently, the need for de-duplication imposed by the DISTINCT
clause can be eliminated, leading to optimization of aggregation
operations.
This optimization applies under the following conditions:
  - only one table involved in the join (not counting const tables)
  - some arguments of the aggregate function are fields
        (not functions/subqueries)
This optimization extends to queries of the form
  SELECT AGGR_FN(c1, c2,..,cn) GROUP BY cx,..cy
when a unique index covers some or all of the columns
(c1, c2,..cn, cx,..cy)
		
	
			
		
			
				
	
	
		
			322 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			322 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| #
 | |
| # MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK
 | |
| #
 | |
| set @save_optimizer_trace = @@optimizer_trace;
 | |
| SET optimizer_trace='enabled=on';
 | |
| CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL);
 | |
| INSERT INTO t1 VALUES (1,1), (2,1), (3,1);
 | |
| # Optimization is applied (aggregator=simple):
 | |
| SELECT COUNT(DISTINCT a) FROM t1;
 | |
| COUNT(DISTINCT a)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1;
 | |
| AVG(DISTINCT a)	SUM(DISTINCT b)
 | |
| 2.0000	1
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "avg(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     },
 | |
|     {
 | |
|         "function": "sum(distinct t1.b)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| # Only `a` is unique but it's enough to eliminate DISTINCT:
 | |
| SELECT COUNT(DISTINCT b, a) FROM t1;
 | |
| COUNT(DISTINCT b, a)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.b,t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT COUNT(DISTINCT a, a + b) FROM t1;
 | |
| COUNT(DISTINCT a, a + b)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.a,t1.a + t1.b)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1;
 | |
| SUM(DISTINCT a)	AVG(DISTINCT a)	COUNT(DISTINCT a)
 | |
| 5	2.5000	2
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "sum(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     },
 | |
|     {
 | |
|         "function": "avg(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     },
 | |
|     {
 | |
|         "function": "count(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| # Optimization is not applied 'cause function argument is not a field
 | |
| # (aggregator=distinct):
 | |
| SELECT SUM(DISTINCT a + b) FROM t1;
 | |
| SUM(DISTINCT a + b)
 | |
| 9
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "sum(distinct t1.a + t1.b)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| SELECT COUNT(DISTINCT b) FROM t1;
 | |
| COUNT(DISTINCT b)
 | |
| 1
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.b)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| SELECT AVG(DISTINCT b / a) FROM t1;
 | |
| AVG(DISTINCT b / a)
 | |
| 0.61110000
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "avg(distinct t1.b / t1.a)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1;
 | |
| id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 | |
| 1	PRIMARY	t1	index	NULL	PRIMARY	4	NULL	3	Using index
 | |
| 2	DEPENDENT SUBQUERY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct (/* select#2 */ select t1.a))",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| CREATE TABLE t2 (a INT);
 | |
| INSERT INTO t2 VALUES (1), (2);
 | |
| # Optimization is not applied 'cause there is more than one table
 | |
| SELECT COUNT(DISTINCT t1.a) FROM t1, t2;
 | |
| COUNT(DISTINCT t1.a)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.a)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| SELECT AVG(DISTINCT t1.a) FROM t1, t2;
 | |
| AVG(DISTINCT t1.a)
 | |
| 2.0000
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "avg(distinct t1.a)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| # Const tables, optimization is applied
 | |
| SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2;
 | |
| COUNT(DISTINCT a)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3;
 | |
| AVG(DISTINCT t1.a)
 | |
| 2.0000
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "avg(distinct t1.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2;
 | |
| COUNT(DISTINCT a)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.a)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| # Unique index on two columns
 | |
| CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL);
 | |
| INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2);
 | |
| CREATE UNIQUE INDEX t3_a_b ON t3 (a, b);
 | |
| # Optimization is applied:
 | |
| SELECT COUNT(DISTINCT a, b) FROM t3;
 | |
| COUNT(DISTINCT a, b)
 | |
| 7
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t3.a,t3.b)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT COUNT(DISTINCT b, a) FROM t3;
 | |
| COUNT(DISTINCT b, a)
 | |
| 7
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t3.b,t3.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3;
 | |
| COUNT(DISTINCT b, a)
 | |
| 5
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t3.b,t3.a)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| # Optimization is applied to one of the functions:
 | |
| SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a;
 | |
| COUNT(DISTINCT b)	SUM(DISTINCT a)	SUM(DISTINCT a + b)
 | |
| 3	1	9
 | |
| 2	2	7
 | |
| 2	3	9
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t3.b)",
 | |
|         "aggregator_type": "simple"
 | |
|     },
 | |
|     {
 | |
|         "function": "sum(distinct t3.a)",
 | |
|         "aggregator_type": "distinct"
 | |
|     },
 | |
|     {
 | |
|         "function": "sum(distinct t3.a + t3.b)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| # Can't apply optimization 'cause GROUP BY argument is not a field:
 | |
| SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b;
 | |
| COUNT(DISTINCT b)
 | |
| 1
 | |
| 2
 | |
| 3
 | |
| 1
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t3.b)",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| # Test merged view
 | |
| CREATE VIEW v1 AS SELECT * FROM t1;
 | |
| # Optimization is applied
 | |
| SELECT COUNT(DISTINCT a, b) FROM v1;
 | |
| COUNT(DISTINCT a, b)
 | |
| 3
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "count(distinct t1.a,t1.b)",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| # GROUP_CONCAT implements non-standard distinct aggregator
 | |
| SELECT GROUP_CONCAT(b) FROM t1;
 | |
| GROUP_CONCAT(b)
 | |
| 1,1,1
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "group_concat(t1.b separator ',')",
 | |
|         "aggregator_type": "simple"
 | |
|     }
 | |
| ]
 | |
| SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
 | |
| GROUP_CONCAT(DISTINCT b)
 | |
| 1
 | |
| SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
 | |
| FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 | |
| JS
 | |
| [
 | |
|     {
 | |
|         "function": "group_concat(distinct t1.b separator ',')",
 | |
|         "aggregator_type": "distinct"
 | |
|     }
 | |
| ]
 | |
| DROP TABLE t1, t2, t3;
 | |
| DROP VIEW v1;
 | |
| SET optimizer_trace = @save_optimizer_trace;
 | |
| #
 | |
| # end of 10.5 tests
 | |
| #
 |