1#
2# Tests of cost-based choice between the materialization and in-to-exists
3# subquery execution strategies (MWL#89)
4#
5# The test file is divided into two groups of tests:
6# A. Typical cases when either of the two strategies is selected:
7#    1. Subquery in disjunctive WHERE clause of the outer query.
8#    2. NOT IN subqueries
9#    3. Subqueries with GROUP BY, HAVING, and aggregate functions
10#    4. Subqueries in the SELECT and HAVING clauses
11#    5. Subqueries with UNION
12# B. Reasonably exhaustive tests of the various combinations of optimizer
13#    switches, data distribution, available indexes, and typical queries.
14#
15
16--source include/default_optimizer_switch.inc
17--source include/default_charset.inc
18
19set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
20#
21# Test logging to slow log (there was some errors in the log files about
22# the slow log when running under valgrind, so better to get this tested)
23#
24set long_query_time=0.1;
25
26
27-- echo TEST GROUP 1:
28-- echo Typical cases of in-to-exists and materialization subquery strategies
29-- echo =====================================================================
30
31--disable_warnings
32drop database if exists world;
33--enable_warnings
34
35set names utf8;
36
37create database world;
38use world;
39
40--source include/world_schema.inc
41--disable_query_log
42--disable_result_log
43--disable_warnings
44--source include/world.inc
45--enable_warnings
46--enable_result_log
47--enable_query_log
48
49-- echo Make the schema and data more diverse by adding more indexes, nullable
50-- echo columns, and NULL data.
51create index SurfaceArea on Country(SurfaceArea);
52create index Language on CountryLanguage(Language);
53create index CityName on City(Name);
54alter table City change population population int(11) null default 0;
55
56select max(id) from City into @max_city_id;
57insert into City values (@max_city_id + 1,'Kilifarevo','BGR',NULL);
58
59
60SELECT COUNT(*) FROM Country;
61SELECT COUNT(*) FROM City;
62SELECT COUNT(*) FROM CountryLanguage;
63
64set @@optimizer_switch = 'in_to_exists=on,semijoin=on,materialization=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on';
65
66-- echo
67-- echo 1. Subquery in a disjunctive WHERE clause of the outer query.
68-- echo
69
70-- echo
71-- echo Q1.1m:
72-- echo MATERIALIZATION: there are too many rows in the outer query
73-- echo to be looked up in the inner table.
74EXPLAIN
75SELECT Name FROM Country
76WHERE (Code IN (select Country from City where City.Population > 100000) OR
77       Name LIKE 'L%') AND
78      surfacearea > 1000000;
79
80SELECT Name FROM Country
81WHERE (Code IN (select Country from City where City.Population > 100000) OR
82       Name LIKE 'L%') AND
83      surfacearea > 1000000;
84
85-- echo Q1.1e:
86-- echo IN-EXISTS: the materialization cost is the same as above, but
87-- echo there are much fewer outer rows to be looked up, thus the
88-- echo materialization cost is too high to compensate for fast lookups.
89EXPLAIN
90SELECT Name FROM Country
91WHERE (Code IN (select Country from City where City.Population > 100000) OR
92       Name LIKE 'L%') AND
93      surfacearea > 10*1000000;
94
95SELECT Name FROM Country
96WHERE (Code IN (select Country from City where City.Population > 100000) OR
97       Name LIKE 'L%') AND
98      surfacearea > 10*1000000;
99
100-- echo
101-- echo Q1.2m:
102-- echo MATERIALIZATION: the IN predicate is pushed (attached) to the last table
103-- echo in the join order (Country, City), therefore there are too many row
104-- echo combinations to filter by re-executing the subquery for each combination.
105EXPLAIN
106SELECT *
107  FROM Country, City
108  WHERE City.Country = Country.Code AND
109        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
110        (City.Name IN
111         (select Language from CountryLanguage where Percentage > 50) OR
112         City.name LIKE '%Island%');
113
114SELECT *
115  FROM Country, City
116  WHERE City.Country = Country.Code AND
117        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
118        (City.Name IN
119         (select Language from CountryLanguage where Percentage > 50) OR
120         City.name LIKE '%Island%');
121
122-- echo Q1.2e:
123-- echo IN_EXISTS: join order is the same, but the left IN operand refers to
124-- echo only the first table in the join order (Country), so there are much
125-- echo fewer rows to filter by subquery re-execution.
126EXPLAIN
127SELECT *
128  FROM Country, City
129  WHERE City.Country = Country.Code AND
130        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
131        (Country.Name IN
132         (select Language from CountryLanguage where Percentage > 50) OR
133         Country.name LIKE '%Island%');
134
135SELECT *
136  FROM Country, City
137  WHERE City.Country = Country.Code AND
138        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
139        (Country.Name IN
140         (select Language from CountryLanguage where Percentage > 50) OR
141         Country.name LIKE '%Island%');
142
143
144-- echo
145-- echo Q1.3:
146-- echo For the same reasons as in Q2 IN-EXISTS and MATERIALIZATION chosen
147-- echo for each respective subquery.
148EXPLAIN
149SELECT City.Name, Country.Name
150  FROM City,Country
151  WHERE City.Country = Country.Code AND
152        Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
153        ((Country.Code, Country.Name) IN
154         (select Country, Language from CountryLanguage where Percentage > 50) AND
155         Country.Population > 3000000
156         OR
157         (Country.Code, City.Name) IN
158         (select Country, Language from CountryLanguage));
159
160SELECT City.Name, Country.Name
161  FROM City,Country
162  WHERE City.Country = Country.Code AND
163        Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
164        ((Country.Code, Country.Name) IN
165         (select Country, Language from CountryLanguage where Percentage > 50) AND
166         Country.Population > 3000000
167         OR
168         (Country.Code, City.Name) IN
169         (select Country, Language from CountryLanguage));
170
171
172-- echo
173-- echo 2. NOT IN subqueries
174-- echo
175
176-- echo
177-- echo Q2.1:
178-- echo Number of cities that are not capitals in countries with small population.
179-- echo MATERIALIZATION is 50 times faster because the cost of each subquery
180-- echo re-execution is much higher than the cost of index lookups into the
181-- echo materialized subquery.
182
183EXPLAIN
184select count(*) from City
185where City.id not in (select capital from Country
186                      where capital is not null and population < 100000);
187
188-- echo
189-- echo Q2.2e:
190-- echo Countries that speak French, but do not speak English
191-- echo IN-EXISTS because the outer query filters many rows, thus
192-- echo there are few lookups to make.
193EXPLAIN
194SELECT Country.Name
195FROM Country, CountryLanguage
196WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
197  AND CountryLanguage.Language = 'French'
198  AND Code = Country;
199
200SELECT Country.Name
201FROM Country, CountryLanguage
202WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
203  AND CountryLanguage.Language = 'French'
204  AND Code = Country;
205
206-- echo Q2.2m:
207-- echo Countries that speak French OR Spanish, but do not speak English
208-- echo MATERIALIZATION because the outer query filters less rows than Q5-a,
209-- echo so there are more lookups.
210
211
212set statement optimizer_switch='rowid_filter=off' for
213EXPLAIN
214SELECT Country.Name
215FROM Country, CountryLanguage
216WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
217  AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
218  AND Code = Country;
219
220set statement optimizer_switch='rowid_filter=off' for
221SELECT Country.Name
222FROM Country, CountryLanguage
223WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
224  AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
225  AND Code = Country;
226
227-- echo
228-- echo Q2.3e:
229-- echo Not a very meaningful query that tests NOT IN.
230-- echo IN-EXISTS because the outer query is cheap enough to reexecute many times.
231EXPLAIN
232select count(*)
233from CountryLanguage
234where (Language, Country) NOT IN
235      (SELECT City.Name, Country.Code
236       FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000))
237      AND Language IN ('English','Spanish');
238
239select count(*)
240from CountryLanguage
241where (Language, Country) NOT IN
242      (SELECT City.Name, Country.Code
243       FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000))
244      AND Language IN ('English','Spanish');
245
246-- echo Q2.3m:
247-- echo MATERIALIZATION with the PARTIAL_MATCH_MERGE strategy, because the HAVING
248-- echo clause prevents the use of the index on City(Name), and in practice reduces
249-- echo radically the size of the temp table.
250EXPLAIN
251select count(*)
252from CountryLanguage
253where (Language, Country) NOT IN
254      (SELECT City.Name, Country.Code
255       FROM City LEFT JOIN Country ON (Country = Code)
256       HAVING City.Name LIKE "Santa%");
257
258select count(*)
259from CountryLanguage
260where (Language, Country) NOT IN
261      (SELECT City.Name, Country.Code
262       FROM City LEFT JOIN Country ON (Country = Code)
263       HAVING City.Name LIKE "Santa%");
264
265
266-- echo
267-- echo 3. Subqueries with GROUP BY, HAVING, and aggregate functions
268-- echo
269
270-- echo Q3.1:
271-- echo Languages that are spoken in countries with 10 or 11 languages
272-- echo MATERIALIZATION is about 100 times faster than IN-EXISTS.
273
274EXPLAIN
275select count(*)
276from CountryLanguage
277where
278(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
279                  WHERE Code = Country GROUP BY Code)
280OR
281(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
282                  WHERE Code = Country GROUP BY Code)
283order by Country;
284
285select count(*)
286from CountryLanguage
287where
288(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
289                  WHERE Code = Country GROUP BY Code)
290OR
291(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
292                  WHERE Code = Country GROUP BY Code)
293order by Country;
294
295
296-- echo
297-- echo Q3.2:
298-- echo Countries whose capital is a city name that names more than one
299-- echo cities.
300-- echo MATERIALIZATION because the cost of single subquery execution is
301-- echo close to that of materializing the subquery.
302
303EXPLAIN
304select * from Country, City
305where capital = id and
306      (City.name in (SELECT name FROM City
307                     GROUP BY name HAVING Count(*) > 2) OR
308       capital is null);
309
310select * from Country, City
311where capital = id and
312      (City.name in (SELECT name FROM City
313                     GROUP BY name HAVING Count(*) > 2) OR
314       capital is null);
315
316-- echo
317-- echo Q3.3: MATERIALIZATION is 25 times faster than IN-EXISTS
318
319EXPLAIN
320SELECT Name
321FROM Country
322WHERE Country.Code NOT IN
323      (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
324
325SELECT Name
326FROM Country
327WHERE Country.Code NOT IN
328      (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
329
330
331-- echo
332-- echo 4. Subqueries in the SELECT and HAVING clauses
333-- echo
334
335-- echo Q4.1m:
336-- echo Capital information about very big cities
337-- echo MATERIALIZATION
338EXPLAIN
339select Name, City.id in (select capital from Country where capital is not null) as is_capital
340from City
341where City.population > 10000000;
342
343select Name, City.id in (select capital from Country where capital is not null) as is_capital
344from City
345where City.population > 10000000;
346
347-- echo Q4.1e:
348-- echo IN-TO-EXISTS after adding an index to make the subquery re-execution
349-- echo efficient.
350
351create index CountryCapital on Country(capital);
352
353EXPLAIN
354select Name, City.id in (select capital from Country where capital is not null) as is_capital
355from City
356where City.population > 10000000;
357
358select Name, City.id in (select capital from Country where capital is not null) as is_capital
359from City
360where City.population > 10000000;
361
362drop index CountryCapital on Country;
363
364-- echo
365-- echo Q4.2:
366-- echo MATERIALIZATION
367# TODO: the cost estimates for subqueries in the HAVING clause need to be changed
368# to take into account that the subquery predicate is executed #times ~ to the
369# number of groups, not number of rows
370EXPLAIN
371SELECT City.Name, City.Population
372FROM City JOIN Country ON City.Country = Country.Code
373GROUP BY City.Name
374HAVING City.Name IN (select Name from Country where population < 1000000);
375
376SELECT City.Name, City.Population
377FROM City JOIN Country ON City.Country = Country.Code
378GROUP BY City.Name
379HAVING City.Name IN (select Name from Country where population < 1000000);
380
381
382-- echo
383-- echo 5. Subqueries with UNION
384-- echo
385
386-- echo Q5.1:
387EXPLAIN
388SELECT * from City where (Name, 91) in
389(SELECT Name, round(Population/1000)
390 FROM City
391 WHERE Country = "IND" AND Population > 2500000
392UNION
393 SELECT Name, round(Population/1000)
394 FROM City
395 WHERE Country = "IND" AND Population < 100000);
396
397SELECT * from City where (Name, 91) in
398(SELECT Name, round(Population/1000)
399 FROM City
400 WHERE Country = "IND" AND Population > 2500000
401UNION
402 SELECT Name, round(Population/1000)
403 FROM City
404 WHERE Country = "IND" AND Population < 100000);
405
406set @@optimizer_switch='default';
407drop database world;
408-- echo
409
410
411-- echo
412-- echo TEST GROUP 2:
413-- echo Tests of various combinations of optimizer switches, types of queries,
414-- echo available indexes, column nullability, constness of tables/predicates.
415-- echo =====================================================================
416
417
418#TODO From Igor's review:
419#
420#2.1 Please add a case when two subqueries  are used in the where clause
421#(or in select) of a 2-way join.
422#The first subquery is accessed after the first table, while the second
423#is accessed after the second table.
424#
425#2.2. Please add a test case when one non-correlated subquery contains
426#another non-correlated subquery.
427#Consider 4 subcases:
428#   both subqueries are materialized
429#   IN_EXIST transformations are applied to both subqueries
430#   outer subquery is materialized while the inner subquery  is not
431#(IN_EXIST transformation is applied to it)
432#   inner subqyery is materialized while the outer subquery  is not (
433#IN_EXIST transformation is applied to it)
434
435set optimizer_switch=default;
436