mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			439 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			439 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| #
 | |
| # Tests of cost-based choice between the materialization and in-to-exists
 | |
| # subquery execution strategies (MWL#89)
 | |
| #
 | |
| # The test file is divided into two groups of tests:
 | |
| # A. Typical cases when either of the two strategies is selected:
 | |
| #    1. Subquery in disjunctive WHERE clause of the outer query.
 | |
| #    2. NOT IN subqueries
 | |
| #    3. Subqueries with GROUP BY, HAVING, and aggregate functions
 | |
| #    4. Subqueries in the SELECT and HAVING clauses
 | |
| #    5. Subqueries with UNION
 | |
| # B. Reasonably exhaustive tests of the various combinations of optimizer
 | |
| #    switches, data distribution, available indexes, and typical queries.
 | |
| #
 | |
| 
 | |
| --source include/default_optimizer_switch.inc
 | |
| --source include/default_charset.inc
 | |
| 
 | |
| set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
 | |
| #
 | |
| # Test logging to slow log (there was some errors in the log files about
 | |
| # the slow log when running under valgrind, so better to get this tested)
 | |
| #
 | |
| set long_query_time=0.1;
 | |
| 
 | |
| 
 | |
| -- echo TEST GROUP 1:
 | |
| -- echo Typical cases of in-to-exists and materialization subquery strategies
 | |
| -- echo =====================================================================
 | |
| 
 | |
| --disable_warnings
 | |
| drop database if exists world;
 | |
| --enable_warnings
 | |
| 
 | |
| set names utf8;
 | |
| 
 | |
| create database world;
 | |
| use world;
 | |
| 
 | |
| --source include/world_schema.inc
 | |
| --disable_query_log
 | |
| --disable_result_log
 | |
| --disable_warnings
 | |
| --source include/world.inc
 | |
| --enable_warnings
 | |
| --enable_result_log
 | |
| --enable_query_log
 | |
| 
 | |
| -- echo Make the schema and data more diverse by adding more indexes, nullable
 | |
| -- echo columns, and NULL data.
 | |
| create index SurfaceArea on Country(SurfaceArea);
 | |
| create index Language on CountryLanguage(Language);
 | |
| create index CityName on City(Name);
 | |
| alter table City change population population int(11) null default 0;
 | |
| 
 | |
| --disable_cursor_protocol
 | |
| --enable_prepare_warnings
 | |
| select max(id) from City into @max_city_id;
 | |
| --disable_prepare_warnings
 | |
| --enable_cursor_protocol
 | |
| insert into City values (@max_city_id + 1,'Kilifarevo','BGR',NULL);
 | |
| 
 | |
| 
 | |
| SELECT COUNT(*) FROM Country;
 | |
| SELECT COUNT(*) FROM City;
 | |
| SELECT COUNT(*) FROM CountryLanguage;
 | |
| 
 | |
| set @@optimizer_switch = 'in_to_exists=on,semijoin=on,materialization=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on';
 | |
| 
 | |
| -- echo
 | |
| -- echo 1. Subquery in a disjunctive WHERE clause of the outer query.
 | |
| -- echo
 | |
| 
 | |
| -- echo
 | |
| -- echo Q1.1m:
 | |
| -- echo MATERIALIZATION: there are too many rows in the outer query
 | |
| -- echo to be looked up in the inner table.
 | |
| EXPLAIN
 | |
| SELECT Name FROM Country
 | |
| WHERE (Code IN (select Country from City where City.Population > 100000) OR
 | |
|        Name LIKE 'L%') AND
 | |
|       surfacearea > 1000000;
 | |
| 
 | |
| SELECT Name FROM Country
 | |
| WHERE (Code IN (select Country from City where City.Population > 100000) OR
 | |
|        Name LIKE 'L%') AND
 | |
|       surfacearea > 1000000;
 | |
| 
 | |
| -- echo Q1.1e:
 | |
| -- echo IN-EXISTS: the materialization cost is the same as above, but
 | |
| -- echo there are much fewer outer rows to be looked up, thus the
 | |
| -- echo materialization cost is too high to compensate for fast lookups.
 | |
| EXPLAIN
 | |
| SELECT Name FROM Country
 | |
| WHERE (Code IN (select Country from City where City.Population > 100000) OR
 | |
|        Name LIKE 'L%') AND
 | |
|       surfacearea > 10*1000000;
 | |
| 
 | |
| SELECT Name FROM Country
 | |
| WHERE (Code IN (select Country from City where City.Population > 100000) OR
 | |
|        Name LIKE 'L%') AND
 | |
|       surfacearea > 10*1000000;
 | |
| 
 | |
| -- echo
 | |
| -- echo Q1.2m:
 | |
| -- echo MATERIALIZATION: the IN predicate is pushed (attached) to the last table
 | |
| -- echo in the join order (Country, City), therefore there are too many row
 | |
| -- echo combinations to filter by re-executing the subquery for each combination.
 | |
| EXPLAIN
 | |
| SELECT *
 | |
|   FROM Country, City
 | |
|   WHERE City.Country = Country.Code AND
 | |
|         Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
 | |
|         (City.Name IN
 | |
|          (select Language from CountryLanguage where Percentage > 50) OR
 | |
|          City.name LIKE '%Island%');
 | |
| 
 | |
| SELECT *
 | |
|   FROM Country, City
 | |
|   WHERE City.Country = Country.Code AND
 | |
|         Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
 | |
|         (City.Name IN
 | |
|          (select Language from CountryLanguage where Percentage > 50) OR
 | |
|          City.name LIKE '%Island%');
 | |
| 
 | |
| -- echo Q1.2e:
 | |
| -- echo IN_EXISTS: join order is the same, but the left IN operand refers to
 | |
| -- echo only the first table in the join order (Country), so there are much
 | |
| -- echo fewer rows to filter by subquery re-execution.
 | |
| EXPLAIN
 | |
| SELECT *
 | |
|   FROM Country, City
 | |
|   WHERE City.Country = Country.Code AND
 | |
|         Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
 | |
|         (Country.Name IN
 | |
|          (select Language from CountryLanguage where Percentage > 50) OR
 | |
|          Country.name LIKE '%Island%');
 | |
| 
 | |
| SELECT *
 | |
|   FROM Country, City
 | |
|   WHERE City.Country = Country.Code AND
 | |
|         Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
 | |
|         (Country.Name IN
 | |
|          (select Language from CountryLanguage where Percentage > 50) OR
 | |
|          Country.name LIKE '%Island%');
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo Q1.3:
 | |
| -- echo For the same reasons as in Q2 IN-EXISTS and MATERIALIZATION chosen
 | |
| -- echo for each respective subquery.
 | |
| EXPLAIN
 | |
| SELECT City.Name, Country.Name
 | |
|   FROM City,Country
 | |
|   WHERE City.Country = Country.Code AND
 | |
|         Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
 | |
|         ((Country.Code, Country.Name) IN
 | |
|          (select Country, Language from CountryLanguage where Percentage > 50) AND
 | |
|          Country.Population > 3000000
 | |
|          OR
 | |
|          (Country.Code, City.Name) IN
 | |
|          (select Country, Language from CountryLanguage));
 | |
| 
 | |
| SELECT City.Name, Country.Name
 | |
|   FROM City,Country
 | |
|   WHERE City.Country = Country.Code AND
 | |
|         Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
 | |
|         ((Country.Code, Country.Name) IN
 | |
|          (select Country, Language from CountryLanguage where Percentage > 50) AND
 | |
|          Country.Population > 3000000
 | |
|          OR
 | |
|          (Country.Code, City.Name) IN
 | |
|          (select Country, Language from CountryLanguage));
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo 2. NOT IN subqueries
 | |
| -- echo
 | |
| 
 | |
| -- echo
 | |
| -- echo Q2.1:
 | |
| -- echo Number of cities that are not capitals in countries with small population.
 | |
| -- echo MATERIALIZATION is 50 times faster because the cost of each subquery
 | |
| -- echo re-execution is much higher than the cost of index lookups into the
 | |
| -- echo materialized subquery.
 | |
| 
 | |
| EXPLAIN
 | |
| select count(*) from City
 | |
| where City.id not in (select capital from Country
 | |
|                       where capital is not null and population < 100000);
 | |
| 
 | |
| -- echo
 | |
| -- echo Q2.2e:
 | |
| -- echo Countries that speak French, but do not speak English
 | |
| -- echo IN-EXISTS because the outer query filters many rows, thus
 | |
| -- echo there are few lookups to make.
 | |
| EXPLAIN
 | |
| SELECT Country.Name
 | |
| FROM Country, CountryLanguage 
 | |
| WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
 | |
|   AND CountryLanguage.Language = 'French'
 | |
|   AND Code = Country;
 | |
| 
 | |
| SELECT Country.Name
 | |
| FROM Country, CountryLanguage 
 | |
| WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
 | |
|   AND CountryLanguage.Language = 'French'
 | |
|   AND Code = Country;
 | |
| 
 | |
| -- echo Q2.2m:
 | |
| -- echo Countries that speak French OR Spanish, but do not speak English
 | |
| -- echo MATERIALIZATION because the outer query filters less rows than Q5-a,
 | |
| -- echo so there are more lookups.
 | |
| 
 | |
| 
 | |
| set statement optimizer_switch='rowid_filter=off' for
 | |
| EXPLAIN
 | |
| SELECT Country.Name
 | |
| FROM Country, CountryLanguage 
 | |
| WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
 | |
|   AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
 | |
|   AND Code = Country;
 | |
| 
 | |
| set statement optimizer_switch='rowid_filter=off' for
 | |
| SELECT Country.Name
 | |
| FROM Country, CountryLanguage 
 | |
| WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
 | |
|   AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
 | |
|   AND Code = Country;
 | |
| 
 | |
| -- echo
 | |
| -- echo Q2.3e:
 | |
| -- echo Not a very meaningful query that tests NOT IN.
 | |
| -- echo IN-EXISTS because the outer query is cheap enough to reexecute many times.
 | |
| EXPLAIN
 | |
| select count(*)
 | |
| from CountryLanguage
 | |
| where (Language, Country) NOT IN
 | |
|       (SELECT City.Name, Country.Code
 | |
|        FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000))
 | |
|       AND Language IN ('English','Spanish');
 | |
| 
 | |
| select count(*)
 | |
| from CountryLanguage
 | |
| where (Language, Country) NOT IN
 | |
|       (SELECT City.Name, Country.Code
 | |
|        FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000))
 | |
|       AND Language IN ('English','Spanish');
 | |
| 
 | |
| -- echo Q2.3m:
 | |
| -- echo MATERIALIZATION with the PARTIAL_MATCH_MERGE strategy, because the HAVING
 | |
| -- echo clause prevents the use of the index on City(Name), and in practice reduces
 | |
| -- echo radically the size of the temp table.
 | |
| EXPLAIN
 | |
| select count(*)
 | |
| from CountryLanguage
 | |
| where (Language, Country) NOT IN
 | |
|       (SELECT City.Name, Country.Code
 | |
|        FROM City LEFT JOIN Country ON (Country = Code)
 | |
|        HAVING City.Name LIKE "Santa%");
 | |
| 
 | |
| select count(*)
 | |
| from CountryLanguage
 | |
| where (Language, Country) NOT IN
 | |
|       (SELECT City.Name, Country.Code
 | |
|        FROM City LEFT JOIN Country ON (Country = Code)
 | |
|        HAVING City.Name LIKE "Santa%");
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo 3. Subqueries with GROUP BY, HAVING, and aggregate functions
 | |
| -- echo
 | |
| 
 | |
| -- echo Q3.1:
 | |
| -- echo Languages that are spoken in countries with 10 or 11 languages
 | |
| -- echo MATERIALIZATION is about 100 times faster than IN-EXISTS.
 | |
| 
 | |
| EXPLAIN
 | |
| select count(*)
 | |
| from CountryLanguage
 | |
| where
 | |
| (Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
 | |
|                   WHERE Code = Country GROUP BY Code)
 | |
| OR
 | |
| (Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
 | |
|                   WHERE Code = Country GROUP BY Code)
 | |
| order by Country;
 | |
| 
 | |
| select count(*)
 | |
| from CountryLanguage
 | |
| where
 | |
| (Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
 | |
|                   WHERE Code = Country GROUP BY Code)
 | |
| OR
 | |
| (Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
 | |
|                   WHERE Code = Country GROUP BY Code)
 | |
| order by Country;
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo Q3.2:
 | |
| -- echo Countries whose capital is a city name that names more than one
 | |
| -- echo cities.
 | |
| -- echo MATERIALIZATION because the cost of single subquery execution is
 | |
| -- echo close to that of materializing the subquery.
 | |
| 
 | |
| EXPLAIN
 | |
| select * from Country, City
 | |
| where capital = id and
 | |
|       (City.name in (SELECT name FROM City
 | |
|                      GROUP BY name HAVING Count(*) > 2) OR
 | |
|        capital is null);
 | |
| 
 | |
| select * from Country, City
 | |
| where capital = id and
 | |
|       (City.name in (SELECT name FROM City
 | |
|                      GROUP BY name HAVING Count(*) > 2) OR
 | |
|        capital is null);
 | |
| 
 | |
| -- echo
 | |
| -- echo Q3.3: MATERIALIZATION is 25 times faster than IN-EXISTS
 | |
| 
 | |
| EXPLAIN
 | |
| SELECT Name
 | |
| FROM Country
 | |
| WHERE Country.Code NOT IN
 | |
|       (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
 | |
| 
 | |
| SELECT Name
 | |
| FROM Country
 | |
| WHERE Country.Code NOT IN
 | |
|       (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo 4. Subqueries in the SELECT and HAVING clauses
 | |
| -- echo
 | |
| 
 | |
| -- echo Q4.1m:
 | |
| -- echo Capital information about very big cities
 | |
| -- echo MATERIALIZATION
 | |
| EXPLAIN
 | |
| select Name, City.id in (select capital from Country where capital is not null) as is_capital
 | |
| from City
 | |
| where City.population > 10000000;
 | |
| 
 | |
| select Name, City.id in (select capital from Country where capital is not null) as is_capital
 | |
| from City
 | |
| where City.population > 10000000;
 | |
| 
 | |
| -- echo Q4.1e:
 | |
| -- echo IN-TO-EXISTS after adding an index to make the subquery re-execution
 | |
| -- echo efficient.
 | |
| 
 | |
| create index CountryCapital on Country(capital);
 | |
| 
 | |
| EXPLAIN
 | |
| select Name, City.id in (select capital from Country where capital is not null) as is_capital
 | |
| from City
 | |
| where City.population > 10000000;
 | |
| 
 | |
| select Name, City.id in (select capital from Country where capital is not null) as is_capital
 | |
| from City
 | |
| where City.population > 10000000;
 | |
| 
 | |
| drop index CountryCapital on Country;
 | |
| 
 | |
| -- echo
 | |
| -- echo Q4.2:
 | |
| -- echo MATERIALIZATION
 | |
| # TODO: the cost estimates for subqueries in the HAVING clause need to be changed
 | |
| # to take into account that the subquery predicate is executed #times ~ to the
 | |
| # number of groups, not number of rows
 | |
| EXPLAIN
 | |
| SELECT City.Name, City.Population
 | |
| FROM City JOIN Country ON City.Country = Country.Code
 | |
| GROUP BY City.Name
 | |
| HAVING City.Name IN (select Name from Country where population < 1000000);
 | |
| 
 | |
| SELECT City.Name, City.Population
 | |
| FROM City JOIN Country ON City.Country = Country.Code
 | |
| GROUP BY City.Name
 | |
| HAVING City.Name IN (select Name from Country where population < 1000000);
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo 5. Subqueries with UNION
 | |
| -- echo
 | |
| 
 | |
| -- echo Q5.1:
 | |
| EXPLAIN
 | |
| SELECT * from City where (Name, 91) in
 | |
| (SELECT Name, round(Population/1000)
 | |
|  FROM City
 | |
|  WHERE Country = "IND" AND Population > 2500000
 | |
| UNION
 | |
|  SELECT Name, round(Population/1000)
 | |
|  FROM City
 | |
|  WHERE Country = "IND" AND Population < 100000);
 | |
| 
 | |
| SELECT * from City where (Name, 91) in
 | |
| (SELECT Name, round(Population/1000)
 | |
|  FROM City
 | |
|  WHERE Country = "IND" AND Population > 2500000
 | |
| UNION
 | |
|  SELECT Name, round(Population/1000)
 | |
|  FROM City
 | |
|  WHERE Country = "IND" AND Population < 100000);
 | |
| 
 | |
| set @@optimizer_switch='default';
 | |
| drop database world;
 | |
| -- echo
 | |
| 
 | |
| 
 | |
| -- echo
 | |
| -- echo TEST GROUP 2:
 | |
| -- echo Tests of various combinations of optimizer switches, types of queries,
 | |
| -- echo available indexes, column nullability, constness of tables/predicates.
 | |
| -- echo =====================================================================
 | |
| 
 | |
| 
 | |
| #TODO From Igor's review:
 | |
| #
 | |
| #2.1 Please add a case when two subqueries  are used in the where clause
 | |
| #(or in select) of a 2-way join.
 | |
| #The first subquery is accessed after the first table, while the second
 | |
| #is accessed after the second table.
 | |
| #
 | |
| #2.2. Please add a test case when one non-correlated subquery contains
 | |
| #another non-correlated subquery.
 | |
| #Consider 4 subcases:
 | |
| #   both subqueries are materialized
 | |
| #   IN_EXIST transformations are applied to both subqueries
 | |
| #   outer subquery is materialized while the inner subquery  is not
 | |
| #(IN_EXIST transformation is applied to it)
 | |
| #   inner subqyery is materialized while the outer subquery  is not (
 | |
| #IN_EXIST transformation is applied to it)
 | |
| 
 | |
| set optimizer_switch=default;
 | 
