1set hive.auto.convert.join=false;
2set hive.optimize.correlation=false;
3-- When Correlation Optimizer is turned off, 6 MR jobs are needed.
4-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
5-- The first job will evaluate subquery xx, subquery yy, and xx join yy.
6EXPLAIN
7SELECT xx.key, xx.cnt, yy.key, yy.cnt
8FROM
9(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
10JOIN
11(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
12ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt;
13
14SELECT xx.key, xx.cnt, yy.key, yy.cnt
15FROM
16(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
17JOIN
18(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
19ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt;
20
21set hive.optimize.correlation=true;
22EXPLAIN
23SELECT xx.key, xx.cnt, yy.key, yy.cnt
24FROM
25(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
26JOIN
27(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
28ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt;
29
30SELECT xx.key, xx.cnt, yy.key, yy.cnt
31FROM
32(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
33JOIN
34(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
35ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt;
36
37set hive.optimize.correlation=true;
38set hive.auto.convert.join=true;
39-- Enable hive.auto.convert.join.
40EXPLAIN
41SELECT xx.key, xx.cnt, yy.key, yy.cnt
42FROM
43(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
44JOIN
45(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
46ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt;
47
48SELECT xx.key, xx.cnt, yy.key, yy.cnt
49FROM
50(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
51JOIN
52(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
53ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.cnt;
54
55set hive.auto.convert.join=false;
56set hive.optimize.correlation=false;
57-- When Correlation Optimizer is turned off, 3 MR jobs are needed.
58-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
59-- The first job will evaluate subquery yy and xx join yy.
60EXPLAIN
61SELECT xx.key, yy.key, yy.cnt
62FROM src1 xx
63JOIN
64(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
65ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
66
67SELECT xx.key, yy.key, yy.cnt
68FROM src1 xx
69JOIN
70(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
71ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
72
73set hive.optimize.correlation=true;
74EXPLAIN
75SELECT xx.key, yy.key, yy.cnt
76FROM src1 xx
77JOIN
78(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
79ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
80
81SELECT xx.key, yy.key, yy.cnt
82FROM src1 xx
83JOIN
84(SELECT x.key as key, count(1) as cnt FROM src x GROUP BY x.key) yy
85ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
86
87set hive.optimize.correlation=false;
88-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
89-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
90-- The first job will evaluate subquery yy and xx join yy.
91EXPLAIN
92SELECT xx.key, yy.key, yy.cnt
93FROM src1 xx
94JOIN
95(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
96ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
97
98SELECT xx.key, yy.key, yy.cnt
99FROM src1 xx
100JOIN
101(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
102ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
103
104set hive.optimize.correlation=true;
105EXPLAIN
106SELECT xx.key, yy.key, yy.cnt
107FROM src1 xx
108JOIN
109(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
110ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
111
112SELECT xx.key, yy.key, yy.cnt
113FROM src1 xx
114JOIN
115(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
116ON xx.key=yy.key ORDER BY xx.key, yy.key, yy.cnt;
117
118set hive.auto.convert.join=false;
119set hive.optimize.correlation=false;
120-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
121-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
122-- The first job will evaluate subquery xx and xx join yy.
123EXPLAIN
124SELECT xx.key, xx.cnt, yy.key
125FROM
126(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
127JOIN src yy
128ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
129
130SELECT xx.key, xx.cnt, yy.key
131FROM
132(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
133JOIN src yy
134ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
135
136set hive.optimize.correlation=true;
137EXPLAIN
138SELECT xx.key, xx.cnt, yy.key
139FROM
140(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
141JOIN src yy
142ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
143
144SELECT xx.key, xx.cnt, yy.key
145FROM
146(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
147JOIN src yy
148ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
149
150set hive.optimize.correlation=false;
151-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
152-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
153-- The first job will evaluate subquery xx and xx join yy join zz.
154EXPLAIN
155SELECT xx.key, yy.key, yy.cnt
156FROM src1 xx
157JOIN src zz ON xx.key=zz.key
158JOIN
159(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
160ON zz.key=yy.key
161ORDER BY xx.key, yy.key, yy.cnt;
162
163SELECT xx.key, yy.key, yy.cnt
164FROM src1 xx
165JOIN src zz ON xx.key=zz.key
166JOIN
167(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
168ON zz.key=yy.key
169ORDER BY xx.key, yy.key, yy.cnt;
170
171set hive.optimize.correlation=true;
172-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
173-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
174-- The first job will evaluate subquery yy and xx join yy join zz.
175EXPLAIN
176SELECT xx.key, yy.key, yy.cnt
177FROM src1 xx
178JOIN src zz ON xx.key=zz.key
179JOIN
180(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
181ON zz.key=yy.key
182ORDER BY xx.key, yy.key, yy.cnt;
183
184SELECT xx.key, yy.key, yy.cnt
185FROM src1 xx
186JOIN src zz ON xx.key=zz.key
187JOIN
188(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
189ON zz.key=yy.key
190ORDER BY xx.key, yy.key, yy.cnt;
191
192set hive.optimize.correlation=false;
193-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
194-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
195-- The first job will evaluate subquery yy and xx join yy join zz.
196EXPLAIN
197SELECT xx.key, yy.key, yy.cnt
198FROM src1 xx
199JOIN
200(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
201ON xx.key=yy.key JOIN src zz
202ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt;
203
204SELECT xx.key, yy.key, yy.cnt
205FROM src1 xx
206JOIN
207(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
208ON xx.key=yy.key JOIN src zz
209ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt;
210
211set hive.optimize.correlation=true;
212EXPLAIN
213SELECT xx.key, yy.key, yy.cnt
214FROM src1 xx
215JOIN
216(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
217ON xx.key=yy.key JOIN src zz
218ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt;
219
220SELECT xx.key, yy.key, yy.cnt
221FROM src1 xx
222JOIN
223(SELECT x.key as key, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key) yy
224ON xx.key=yy.key JOIN src zz
225ON yy.key=zz.key ORDER BY xx.key, yy.key, yy.cnt;
226
227set hive.optimize.correlation=false;
228-- When Correlation Optimizer is turned off, 6 MR jobs are needed.
229-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
230-- The first job will evaluate subquery tmp and tmp join z.
231EXPLAIN
232SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
233FROM
234(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
235 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
236 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
237 ON (xx.key=yy.key) GROUP BY xx.key) tmp
238JOIN src z ON tmp.key=z.key
239ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value;
240
241SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
242FROM
243(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
244 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
245 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
246 ON (xx.key=yy.key) GROUP BY xx.key) tmp
247JOIN src z ON tmp.key=z.key
248ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value;
249
250set hive.optimize.correlation=true;
251EXPLAIN
252SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
253FROM
254(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
255 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
256 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
257 ON (xx.key=yy.key) GROUP BY xx.key) tmp
258JOIN src z ON tmp.key=z.key
259ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value;
260
261SELECT tmp.key, tmp.sum1, tmp.sum2, z.key, z.value
262FROM
263(SELECT xx.key as key, sum(xx.cnt) as sum1, sum(yy.cnt) as sum2
264 FROM (SELECT x.key as key, count(*) AS cnt FROM src x group by x.key) xx
265 JOIN (SELECT y.key as key, count(*) AS cnt FROM src1 y group by y.key) yy
266 ON (xx.key=yy.key) GROUP BY xx.key) tmp
267JOIN src z ON tmp.key=z.key
268ORDER BY tmp.key, tmp.sum1, tmp.sum2, z.key, z.value;
269
270set hive.optimize.correlation=false;
271-- When Correlation Optimizer is turned off, 6 MR jobs are needed.
272-- When Correlation Optimizer is turned on, 4 MR jobs are needed.
273-- 2 MR jobs are used to evaluate yy, 1 MR is used to evaluate xx and xx join yy.
274-- The last MR is used for ordering.
275EXPLAIN
276SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
277FROM
278(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
279JOIN
280(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
281ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
282
283SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
284FROM
285(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
286JOIN
287(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
288ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
289
290set hive.optimize.correlation=true;
291EXPLAIN
292SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
293FROM
294(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
295JOIN
296(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
297ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
298
299SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
300FROM
301(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
302JOIN
303(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
304ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
305
306set hive.optimize.correlation=true;
307set hive.auto.convert.join=true;
308EXPLAIN
309SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
310FROM
311(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
312JOIN
313(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
314ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
315
316SELECT xx.key, xx.cnt, yy.key, yy.value, yy.cnt
317FROM
318(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by x.key) xx
319JOIN
320(SELECT x.key as key, x.value as value, count(1) as cnt FROM src x JOIN src y ON (x.key = y.key) group by x.key, x.value) yy
321ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key, yy.value, yy.cnt;
322
323