1# 2# Tests of cost-based choice between the materialization and in-to-exists 3# subquery execution strategies (MWL#89) 4# 5# The test file is divided into two groups of tests: 6# A. Typical cases when either of the two strategies is selected: 7# 1. Subquery in disjunctive WHERE clause of the outer query. 8# 2. NOT IN subqueries 9# 3. Subqueries with GROUP BY, HAVING, and aggregate functions 10# 4. Subqueries in the SELECT and HAVING clauses 11# 5. Subqueries with UNION 12# B. Reasonably exhaustive tests of the various combinations of optimizer 13# switches, data distribution, available indexes, and typical queries. 14# 15 16--source include/default_optimizer_switch.inc 17--source include/default_charset.inc 18 19set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; 20# 21# Test logging to slow log (there was some errors in the log files about 22# the slow log when running under valgrind, so better to get this tested) 23# 24set long_query_time=0.1; 25 26 27-- echo TEST GROUP 1: 28-- echo Typical cases of in-to-exists and materialization subquery strategies 29-- echo ===================================================================== 30 31--disable_warnings 32drop database if exists world; 33--enable_warnings 34 35set names utf8; 36 37create database world; 38use world; 39 40--source include/world_schema.inc 41--disable_query_log 42--disable_result_log 43--disable_warnings 44--source include/world.inc 45--enable_warnings 46--enable_result_log 47--enable_query_log 48 49-- echo Make the schema and data more diverse by adding more indexes, nullable 50-- echo columns, and NULL data. 51create index SurfaceArea on Country(SurfaceArea); 52create index Language on CountryLanguage(Language); 53create index CityName on City(Name); 54alter table City change population population int(11) null default 0; 55 56select max(id) from City into @max_city_id; 57insert into City values (@max_city_id + 1,'Kilifarevo','BGR',NULL); 58 59 60SELECT COUNT(*) FROM Country; 61SELECT COUNT(*) FROM City; 62SELECT COUNT(*) FROM CountryLanguage; 63 64set @@optimizer_switch = 'in_to_exists=on,semijoin=on,materialization=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on'; 65 66-- echo 67-- echo 1. Subquery in a disjunctive WHERE clause of the outer query. 68-- echo 69 70-- echo 71-- echo Q1.1m: 72-- echo MATERIALIZATION: there are too many rows in the outer query 73-- echo to be looked up in the inner table. 74EXPLAIN 75SELECT Name FROM Country 76WHERE (Code IN (select Country from City where City.Population > 100000) OR 77 Name LIKE 'L%') AND 78 surfacearea > 1000000; 79 80SELECT Name FROM Country 81WHERE (Code IN (select Country from City where City.Population > 100000) OR 82 Name LIKE 'L%') AND 83 surfacearea > 1000000; 84 85-- echo Q1.1e: 86-- echo IN-EXISTS: the materialization cost is the same as above, but 87-- echo there are much fewer outer rows to be looked up, thus the 88-- echo materialization cost is too high to compensate for fast lookups. 89EXPLAIN 90SELECT Name FROM Country 91WHERE (Code IN (select Country from City where City.Population > 100000) OR 92 Name LIKE 'L%') AND 93 surfacearea > 10*1000000; 94 95SELECT Name FROM Country 96WHERE (Code IN (select Country from City where City.Population > 100000) OR 97 Name LIKE 'L%') AND 98 surfacearea > 10*1000000; 99 100-- echo 101-- echo Q1.2m: 102-- echo MATERIALIZATION: the IN predicate is pushed (attached) to the last table 103-- echo in the join order (Country, City), therefore there are too many row 104-- echo combinations to filter by re-executing the subquery for each combination. 105EXPLAIN 106SELECT * 107 FROM Country, City 108 WHERE City.Country = Country.Code AND 109 Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND 110 (City.Name IN 111 (select Language from CountryLanguage where Percentage > 50) OR 112 City.name LIKE '%Island%'); 113 114SELECT * 115 FROM Country, City 116 WHERE City.Country = Country.Code AND 117 Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND 118 (City.Name IN 119 (select Language from CountryLanguage where Percentage > 50) OR 120 City.name LIKE '%Island%'); 121 122-- echo Q1.2e: 123-- echo IN_EXISTS: join order is the same, but the left IN operand refers to 124-- echo only the first table in the join order (Country), so there are much 125-- echo fewer rows to filter by subquery re-execution. 126EXPLAIN 127SELECT * 128 FROM Country, City 129 WHERE City.Country = Country.Code AND 130 Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND 131 (Country.Name IN 132 (select Language from CountryLanguage where Percentage > 50) OR 133 Country.name LIKE '%Island%'); 134 135SELECT * 136 FROM Country, City 137 WHERE City.Country = Country.Code AND 138 Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND 139 (Country.Name IN 140 (select Language from CountryLanguage where Percentage > 50) OR 141 Country.name LIKE '%Island%'); 142 143 144-- echo 145-- echo Q1.3: 146-- echo For the same reasons as in Q2 IN-EXISTS and MATERIALIZATION chosen 147-- echo for each respective subquery. 148EXPLAIN 149SELECT City.Name, Country.Name 150 FROM City,Country 151 WHERE City.Country = Country.Code AND 152 Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND 153 ((Country.Code, Country.Name) IN 154 (select Country, Language from CountryLanguage where Percentage > 50) AND 155 Country.Population > 3000000 156 OR 157 (Country.Code, City.Name) IN 158 (select Country, Language from CountryLanguage)); 159 160SELECT City.Name, Country.Name 161 FROM City,Country 162 WHERE City.Country = Country.Code AND 163 Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND 164 ((Country.Code, Country.Name) IN 165 (select Country, Language from CountryLanguage where Percentage > 50) AND 166 Country.Population > 3000000 167 OR 168 (Country.Code, City.Name) IN 169 (select Country, Language from CountryLanguage)); 170 171 172-- echo 173-- echo 2. NOT IN subqueries 174-- echo 175 176-- echo 177-- echo Q2.1: 178-- echo Number of cities that are not capitals in countries with small population. 179-- echo MATERIALIZATION is 50 times faster because the cost of each subquery 180-- echo re-execution is much higher than the cost of index lookups into the 181-- echo materialized subquery. 182 183EXPLAIN 184select count(*) from City 185where City.id not in (select capital from Country 186 where capital is not null and population < 100000); 187 188-- echo 189-- echo Q2.2e: 190-- echo Countries that speak French, but do not speak English 191-- echo IN-EXISTS because the outer query filters many rows, thus 192-- echo there are few lookups to make. 193EXPLAIN 194SELECT Country.Name 195FROM Country, CountryLanguage 196WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English') 197 AND CountryLanguage.Language = 'French' 198 AND Code = Country; 199 200SELECT Country.Name 201FROM Country, CountryLanguage 202WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English') 203 AND CountryLanguage.Language = 'French' 204 AND Code = Country; 205 206-- echo Q2.2m: 207-- echo Countries that speak French OR Spanish, but do not speak English 208-- echo MATERIALIZATION because the outer query filters less rows than Q5-a, 209-- echo so there are more lookups. 210 211 212set statement optimizer_switch='rowid_filter=off' for 213EXPLAIN 214SELECT Country.Name 215FROM Country, CountryLanguage 216WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English') 217 AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish') 218 AND Code = Country; 219 220set statement optimizer_switch='rowid_filter=off' for 221SELECT Country.Name 222FROM Country, CountryLanguage 223WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English') 224 AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish') 225 AND Code = Country; 226 227-- echo 228-- echo Q2.3e: 229-- echo Not a very meaningful query that tests NOT IN. 230-- echo IN-EXISTS because the outer query is cheap enough to reexecute many times. 231EXPLAIN 232select count(*) 233from CountryLanguage 234where (Language, Country) NOT IN 235 (SELECT City.Name, Country.Code 236 FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000)) 237 AND Language IN ('English','Spanish'); 238 239select count(*) 240from CountryLanguage 241where (Language, Country) NOT IN 242 (SELECT City.Name, Country.Code 243 FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000)) 244 AND Language IN ('English','Spanish'); 245 246-- echo Q2.3m: 247-- echo MATERIALIZATION with the PARTIAL_MATCH_MERGE strategy, because the HAVING 248-- echo clause prevents the use of the index on City(Name), and in practice reduces 249-- echo radically the size of the temp table. 250EXPLAIN 251select count(*) 252from CountryLanguage 253where (Language, Country) NOT IN 254 (SELECT City.Name, Country.Code 255 FROM City LEFT JOIN Country ON (Country = Code) 256 HAVING City.Name LIKE "Santa%"); 257 258select count(*) 259from CountryLanguage 260where (Language, Country) NOT IN 261 (SELECT City.Name, Country.Code 262 FROM City LEFT JOIN Country ON (Country = Code) 263 HAVING City.Name LIKE "Santa%"); 264 265 266-- echo 267-- echo 3. Subqueries with GROUP BY, HAVING, and aggregate functions 268-- echo 269 270-- echo Q3.1: 271-- echo Languages that are spoken in countries with 10 or 11 languages 272-- echo MATERIALIZATION is about 100 times faster than IN-EXISTS. 273 274EXPLAIN 275select count(*) 276from CountryLanguage 277where 278(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country 279 WHERE Code = Country GROUP BY Code) 280OR 281(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country 282 WHERE Code = Country GROUP BY Code) 283order by Country; 284 285select count(*) 286from CountryLanguage 287where 288(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country 289 WHERE Code = Country GROUP BY Code) 290OR 291(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country 292 WHERE Code = Country GROUP BY Code) 293order by Country; 294 295 296-- echo 297-- echo Q3.2: 298-- echo Countries whose capital is a city name that names more than one 299-- echo cities. 300-- echo MATERIALIZATION because the cost of single subquery execution is 301-- echo close to that of materializing the subquery. 302 303EXPLAIN 304select * from Country, City 305where capital = id and 306 (City.name in (SELECT name FROM City 307 GROUP BY name HAVING Count(*) > 2) OR 308 capital is null); 309 310select * from Country, City 311where capital = id and 312 (City.name in (SELECT name FROM City 313 GROUP BY name HAVING Count(*) > 2) OR 314 capital is null); 315 316-- echo 317-- echo Q3.3: MATERIALIZATION is 25 times faster than IN-EXISTS 318 319EXPLAIN 320SELECT Name 321FROM Country 322WHERE Country.Code NOT IN 323 (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1); 324 325SELECT Name 326FROM Country 327WHERE Country.Code NOT IN 328 (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1); 329 330 331-- echo 332-- echo 4. Subqueries in the SELECT and HAVING clauses 333-- echo 334 335-- echo Q4.1m: 336-- echo Capital information about very big cities 337-- echo MATERIALIZATION 338EXPLAIN 339select Name, City.id in (select capital from Country where capital is not null) as is_capital 340from City 341where City.population > 10000000; 342 343select Name, City.id in (select capital from Country where capital is not null) as is_capital 344from City 345where City.population > 10000000; 346 347-- echo Q4.1e: 348-- echo IN-TO-EXISTS after adding an index to make the subquery re-execution 349-- echo efficient. 350 351create index CountryCapital on Country(capital); 352 353EXPLAIN 354select Name, City.id in (select capital from Country where capital is not null) as is_capital 355from City 356where City.population > 10000000; 357 358select Name, City.id in (select capital from Country where capital is not null) as is_capital 359from City 360where City.population > 10000000; 361 362drop index CountryCapital on Country; 363 364-- echo 365-- echo Q4.2: 366-- echo MATERIALIZATION 367# TODO: the cost estimates for subqueries in the HAVING clause need to be changed 368# to take into account that the subquery predicate is executed #times ~ to the 369# number of groups, not number of rows 370EXPLAIN 371SELECT City.Name, City.Population 372FROM City JOIN Country ON City.Country = Country.Code 373GROUP BY City.Name 374HAVING City.Name IN (select Name from Country where population < 1000000); 375 376SELECT City.Name, City.Population 377FROM City JOIN Country ON City.Country = Country.Code 378GROUP BY City.Name 379HAVING City.Name IN (select Name from Country where population < 1000000); 380 381 382-- echo 383-- echo 5. Subqueries with UNION 384-- echo 385 386-- echo Q5.1: 387EXPLAIN 388SELECT * from City where (Name, 91) in 389(SELECT Name, round(Population/1000) 390 FROM City 391 WHERE Country = "IND" AND Population > 2500000 392UNION 393 SELECT Name, round(Population/1000) 394 FROM City 395 WHERE Country = "IND" AND Population < 100000); 396 397SELECT * from City where (Name, 91) in 398(SELECT Name, round(Population/1000) 399 FROM City 400 WHERE Country = "IND" AND Population > 2500000 401UNION 402 SELECT Name, round(Population/1000) 403 FROM City 404 WHERE Country = "IND" AND Population < 100000); 405 406set @@optimizer_switch='default'; 407drop database world; 408-- echo 409 410 411-- echo 412-- echo TEST GROUP 2: 413-- echo Tests of various combinations of optimizer switches, types of queries, 414-- echo available indexes, column nullability, constness of tables/predicates. 415-- echo ===================================================================== 416 417 418#TODO From Igor's review: 419# 420#2.1 Please add a case when two subqueries are used in the where clause 421#(or in select) of a 2-way join. 422#The first subquery is accessed after the first table, while the second 423#is accessed after the second table. 424# 425#2.2. Please add a test case when one non-correlated subquery contains 426#another non-correlated subquery. 427#Consider 4 subcases: 428# both subqueries are materialized 429# IN_EXIST transformations are applied to both subqueries 430# outer subquery is materialized while the inner subquery is not 431#(IN_EXIST transformation is applied to it) 432# inner subqyery is materialized while the outer subquery is not ( 433#IN_EXIST transformation is applied to it) 434 435set optimizer_switch=default; 436