1set hive.auto.convert.join=false; 2set hive.optimize.correlation=false; 3-- When Correlation Optimizer is turned off, 6 MR jobs are needed. 4-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 5-- The first job will evaluate subquery xx, subquery yy, and xx join yy. 6EXPLAIN 7SELECT xx.key, xx.cnt, yy.key, yy.cnt 8FROM 9(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 10JOIN 11(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 12ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; 13 14SELECT xx.key, xx.cnt, yy.key, yy.cnt 15FROM 16(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 17JOIN 18(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 19ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; 20 21set hive.optimize.correlation=true; 22EXPLAIN 23SELECT xx.key, xx.cnt, yy.key, yy.cnt 24FROM 25(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 26JOIN 27(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 28ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; 29 30SELECT xx.key, xx.cnt, yy.key, yy.cnt 31FROM 32(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 33JOIN 34(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 35ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; 36 37set hive.optimize.correlation=true; 38set hive.auto.convert.join=true; 39-- Enable hive.auto.convert.join. 40EXPLAIN 41SELECT xx.key, xx.cnt, yy.key, yy.cnt 42FROM 43(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 44JOIN 45(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 46ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; 47 48SELECT xx.key, xx.cnt, yy.key, yy.cnt 49FROM 50(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 51JOIN 52(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 53ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt; 54 55set hive.auto.convert.join=false; 56set hive.optimize.correlation=false; 57-- When Correlation Optimizer is turned off, 3 MR jobs are needed. 58-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 59-- The first job will evaluate subquery yy and xx join yy. 60EXPLAIN 61SELECT xx.key, yy.key, yy.cnt 62FROM src1 xx 63JOIN 64(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy 65ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 66 67SELECT xx.key, yy.key, yy.cnt 68FROM src1 xx 69JOIN 70(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy 71ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 72 73set hive.optimize.correlation=true; 74EXPLAIN 75SELECT xx.key, yy.key, yy.cnt 76FROM src1 xx 77JOIN 78(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy 79ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 80 81SELECT xx.key, yy.key, yy.cnt 82FROM src1 xx 83JOIN 84(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy 85ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 86 87set hive.optimize.correlation=false; 88-- When Correlation Optimizer is turned off, 4 MR jobs are needed. 89-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 90-- The first job will evaluate subquery yy and xx join yy. 91EXPLAIN 92SELECT xx.key, yy.key, yy.cnt 93FROM src1 xx 94JOIN 95(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 96ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 97 98SELECT xx.key, yy.key, yy.cnt 99FROM src1 xx 100JOIN 101(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 102ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 103 104set hive.optimize.correlation=true; 105EXPLAIN 106SELECT xx.key, yy.key, yy.cnt 107FROM src1 xx 108JOIN 109(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 110ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 111 112SELECT xx.key, yy.key, yy.cnt 113FROM src1 xx 114JOIN 115(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 116ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt; 117 118set hive.auto.convert.join=false; 119set hive.optimize.correlation=false; 120-- When Correlation Optimizer is turned off, 4 MR jobs are needed. 121-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 122-- The first job will evaluate subquery xx and xx join yy. 123EXPLAIN 124SELECT xx.key, xx.cnt, yy.key 125FROM 126(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 127JOIN src yy 128ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key; 129 130SELECT xx.key, xx.cnt, yy.key 131FROM 132(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 133JOIN src yy 134ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key; 135 136set hive.optimize.correlation=true; 137EXPLAIN 138SELECT xx.key, xx.cnt, yy.key 139FROM 140(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 141JOIN src yy 142ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key; 143 144SELECT xx.key, xx.cnt, yy.key 145FROM 146(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 147JOIN src yy 148ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key; 149 150set hive.optimize.correlation=false; 151-- When Correlation Optimizer is turned off, 4 MR jobs are needed. 152-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 153-- The first job will evaluate subquery xx and xx join yy join zz. 154EXPLAIN 155SELECT xx.key, yy.key, yy.cnt 156FROM src1 xx 157JOIN src zz ON xx.key=zz.key 158JOIN 159(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 160ON zz.key=yy.key 161ORDER BY xx.key, yy.key, yy.cnt; 162 163SELECT xx.key, yy.key, yy.cnt 164FROM src1 xx 165JOIN src zz ON xx.key=zz.key 166JOIN 167(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 168ON zz.key=yy.key 169ORDER BY xx.key, yy.key, yy.cnt; 170 171set hive.optimize.correlation=true; 172-- When Correlation Optimizer is turned off, 4 MR jobs are needed. 173-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 174-- The first job will evaluate subquery yy and xx join yy join zz. 175EXPLAIN 176SELECT xx.key, yy.key, yy.cnt 177FROM src1 xx 178JOIN src zz ON xx.key=zz.key 179JOIN 180(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 181ON zz.key=yy.key 182ORDER BY xx.key, yy.key, yy.cnt; 183 184SELECT xx.key, yy.key, yy.cnt 185FROM src1 xx 186JOIN src zz ON xx.key=zz.key 187JOIN 188(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 189ON zz.key=yy.key 190ORDER BY xx.key, yy.key, yy.cnt; 191 192set hive.optimize.correlation=false; 193-- When Correlation Optimizer is turned off, 4 MR jobs are needed. 194-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 195-- The first job will evaluate subquery yy and xx join yy join zz. 196EXPLAIN 197SELECT xx.key, yy.key, yy.cnt 198FROM src1 xx 199JOIN 200(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 201ON xx.key=yy.key JOIN src zz 202ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt; 203 204SELECT xx.key, yy.key, yy.cnt 205FROM src1 xx 206JOIN 207(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 208ON xx.key=yy.key JOIN src zz 209ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt; 210 211set hive.optimize.correlation=true; 212EXPLAIN 213SELECT xx.key, yy.key, yy.cnt 214FROM src1 xx 215JOIN 216(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 217ON xx.key=yy.key JOIN src zz 218ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt; 219 220SELECT xx.key, yy.key, yy.cnt 221FROM src1 xx 222JOIN 223(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy 224ON xx.key=yy.key JOIN src zz 225ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt; 226 227set hive.optimize.correlation=false; 228-- When Correlation Optimizer is turned off, 6 MR jobs are needed. 229-- When Correlation Optimizer is turned on, 2 MR jobs are needed. 230-- The first job will evaluate subquery tmp and tmp join z. 231EXPLAIN 232SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value 233FROM 234(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 235 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx 236 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy 237 ON (xx.key=yy.key) GROUP BY xx.key) tmp 238JOIN src z ON tmp.key=z.key 239ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value; 240 241SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value 242FROM 243(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 244 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx 245 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy 246 ON (xx.key=yy.key) GROUP BY xx.key) tmp 247JOIN src z ON tmp.key=z.key 248ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value; 249 250set hive.optimize.correlation=true; 251EXPLAIN 252SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value 253FROM 254(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 255 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx 256 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy 257 ON (xx.key=yy.key) GROUP BY xx.key) tmp 258JOIN src z ON tmp.key=z.key 259ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value; 260 261SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value 262FROM 263(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2 264 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx 265 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy 266 ON (xx.key=yy.key) GROUP BY xx.key) tmp 267JOIN src z ON tmp.key=z.key 268ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value; 269 270set hive.optimize.correlation=false; 271-- When Correlation Optimizer is turned off, 6 MR jobs are needed. 272-- When Correlation Optimizer is turned on, 4 MR jobs are needed. 273-- 2 MR jobs are used to evaluate yy, 1 MR is used to evaluate xx and xx join yy. 274-- The last MR is used for ordering. 275EXPLAIN 276SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt 277FROM 278(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 279JOIN 280(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy 281ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; 282 283SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt 284FROM 285(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 286JOIN 287(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy 288ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; 289 290set hive.optimize.correlation=true; 291EXPLAIN 292SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt 293FROM 294(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 295JOIN 296(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy 297ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; 298 299SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt 300FROM 301(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 302JOIN 303(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy 304ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; 305 306set hive.optimize.correlation=true; 307set hive.auto.convert.join=true; 308EXPLAIN 309SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt 310FROM 311(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 312JOIN 313(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy 314ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; 315 316SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt 317FROM 318(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx 319JOIN 320(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy 321ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt; 322 323