1 /* Copyright (C) 2014 InfiniDB, Inc.
2    Copyright (C) 2016 MariaDB Corporation
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17    MA 02110-1301, USA. */
18 
19 /***********************************************************************
20 *   $Id: ha_from_sub.cpp 6377 2010-03-22 20:18:47Z zzhu $
21 *
22 *
23 ***********************************************************************/
24 /** @file */
25 /** class FromSubSelect definition */
26 
27 //#define NDEBUG
28 #include <my_config.h>
29 #include <cassert>
30 #include <map>
31 using namespace std;
32 
33 #include "idb_mysql.h"
34 
35 #include "parsetree.h"
36 #include "simplefilter.h"
37 #include "logicoperator.h"
38 #include "constantcolumn.h"
39 #include "simplecolumn.h"
40 using namespace execplan;
41 
42 #include "ha_subquery.h"
43 
44 namespace cal_impl_if
45 {
46 
derivedTableOptimization(THD * thd,SCSEP & csep)47 void derivedTableOptimization(THD* thd, SCSEP& csep)
48 {
49     // @bug5634. replace the unused column with ConstantColumn from derived table column list,
50     // ExeMgr will not project ConstantColumn. Only count for local derived column.
51     // subquery may carry main query derived table list for column reference, those
52     // derived tables are not checked for optimization in this scope.
53     CalpontSelectExecutionPlan::SelectList derivedTbList = csep->derivedTableList();
54 
55     // @bug6156. Skip horizontal optimization for no table union.
56     bool horizontalOptimization = true;
57 
58     for (uint i = 0; i < derivedTbList.size(); i++)
59     {
60         CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
61         CalpontSelectExecutionPlan::ReturnedColumnList cols = plan->returnedCols();
62         vector<CalpontSelectExecutionPlan::ReturnedColumnList> unionColVec;
63 
64         // only do vertical optimization for union all
65         // @bug6134. Also skip the vertical optimization for select distinct
66         // because all columns need to be projected to check the distinctness.
67         bool verticalOptimization = false;
68 
69         if (plan->distinctUnionNum() == 0 && !plan->distinct())
70         {
71             verticalOptimization = true;
72 
73             for (uint j = 0; j < plan->unionVec().size(); j++)
74             {
75                 unionColVec.push_back(
76                     reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols());
77             }
78         }
79 
80         if (plan->tableList().empty())
81             horizontalOptimization = false;
82 
83         for (uint j = 0; j < plan->unionVec().size(); j++)
84         {
85             if (reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty())
86             {
87                 horizontalOptimization = false;
88                 break;
89             }
90         }
91 
92         if (verticalOptimization)
93         {
94             int64_t val = 1;
95 
96             // TODO MCOL-4543 Only project those columns from the subquery
97             // which are referenced in the outer select. So for example,
98             // if a table t contains 10 columns c1 ... c10 :
99             // "select count(c2) from (select * from t) q;"
100             // with p being the subquery execution plan, p->columnMap()
101             // and p->returnedCols() should both be of size 1, instead
102             // of 10, with entries for c2 in each.
103             //
104             // We are currently performing a dumb optimization:
105             // Instead of just referencing c2, we are referencing (c1,c2)
106             // for the above query. This is due to complexity associated
107             // with modifying ReturnedColumn::colPosition()
108             // (from a value of 1 to a value of 0) of the outer query
109             // which references c2. So essentially, if c2 is replaced by c10
110             // in the above query, we fallback to projecting all 10 columns
111             // of the subquery in ExeMgr.
112             // This will be addressed in future.
113             CalpontSelectExecutionPlan::ReturnedColumnList nonConstCols;
114             vector<CalpontSelectExecutionPlan::ReturnedColumnList> nonConstUnionColVec(unionColVec.size());
115 
116             int64_t lastNonConstIndex = -1;
117 
118             for (int64_t i = cols.size() - 1; i >= 0; i--)
119             {
120                 //if (cols[i]->derivedTable().empty())
121                 if (cols[i]->refCount() == 0)
122                 {
123                     if (cols[i]->derivedRefCol())
124                         cols[i]->derivedRefCol()->decRefCount();
125 
126                     if (lastNonConstIndex == -1)
127                     {
128                         SimpleColumn* sc = dynamic_cast<SimpleColumn*>(cols[i].get());
129 
130                         if (sc && (plan->columnMap().count(sc->columnName()) == 1))
131                         {
132                             plan->columnMap().erase(sc->columnName());
133                         }
134                     }
135                     else
136                     {
137                         cols[i].reset(new ConstantColumn(val));
138                         (reinterpret_cast<ConstantColumn*>(cols[i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
139                     }
140 
141                     for (uint j = 0; j < unionColVec.size(); j++)
142                     {
143                         if (lastNonConstIndex == -1)
144                         {
145                             CalpontSelectExecutionPlan* unionSubPlan =
146                                 reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
147 
148                             SimpleColumn* sc = dynamic_cast<SimpleColumn*>(unionSubPlan->returnedCols()[i].get());
149 
150                             if (sc && (unionSubPlan->columnMap().count(sc->columnName()) == 1))
151                             {
152                                 unionSubPlan->columnMap().erase(sc->columnName());
153                             }
154                         }
155                         else
156                         {
157                             unionColVec[j][i].reset(new ConstantColumn(val));
158                             (reinterpret_cast<ConstantColumn*>(unionColVec[j][i].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
159                         }
160                     }
161                 }
162                 else if (lastNonConstIndex == -1)
163                 {
164                     lastNonConstIndex = i;
165                 }
166             }
167 
168             if (lastNonConstIndex == -1)
169             {
170                 // None of the subquery columns are referenced, just use the first one
171                 if (!cols.empty())
172                 {
173                     cols[0].reset(new ConstantColumn(val));
174                     (reinterpret_cast<ConstantColumn*>(cols[0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
175                     nonConstCols.push_back(cols[0]);
176 
177                     for (uint j = 0; j < unionColVec.size(); j++)
178                     {
179                         unionColVec[j][0].reset(new ConstantColumn(val));
180                         (reinterpret_cast<ConstantColumn*>(unionColVec[j][0].get()))->timeZone(thd->variables.time_zone->get_name()->ptr());
181                         nonConstUnionColVec[j].push_back(unionColVec[j][0]);
182                     }
183                 }
184             }
185             else
186             {
187                 nonConstCols.assign(cols.begin(), cols.begin() + lastNonConstIndex + 1);
188 
189                 for (uint j = 0; j < unionColVec.size(); j++)
190                 {
191                     nonConstUnionColVec[j].assign(unionColVec[j].begin(), unionColVec[j].begin() + lastNonConstIndex + 1);
192                 }
193             }
194 
195             // set back
196             plan->returnedCols(nonConstCols);
197 
198             for (uint j = 0; j < unionColVec.size(); j++)
199             {
200                 CalpontSelectExecutionPlan* unionSubPlan =
201                     reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
202                 unionSubPlan->returnedCols(nonConstUnionColVec[j]);
203             }
204         }
205     }
206 
207     /*
208      * @bug5635. Move filters that only belongs to a derived table to inside the derived table.
209      * 1. parse tree walk to populate derivedTableFilterMap and set null candidate on the tree.
210      * 2. remove the null filters
211      * 3. and the filters of derivedTableFilterMap and append to the WHERE filter of the derived table
212      *
213      * Note:
214      * 1. Subquery filters is ignored because derived table can not be in subquery
215      * 2. While walking tree, whenever a single derive simplefilter is encountered,
216      * this filter is pushed to the corresponding stack
217      * 2. Whenever an OR operator is encountered, all the filter stack of
218      * that OR involving derived table are emptied and null candidate of each
219      * stacked filter needs to be reset (not null)
220      */
221     ParseTree* pt = csep->filters();
222     map<string, ParseTree*> derivedTbFilterMap;
223 
224     if (horizontalOptimization && pt)
225     {
226         pt->walk(setDerivedTable);
227         setDerivedFilter(thd, pt, derivedTbFilterMap, derivedTbList);
228         csep->filters(pt);
229     }
230 
231     // AND the filters of individual stack to the derived table filter tree
232     // @todo union filters.
233     // @todo outer join complication
234     map<string, ParseTree*>::iterator mapIt;
235 
236     for (uint i = 0; i < derivedTbList.size(); i++)
237     {
238         CalpontSelectExecutionPlan* plan = reinterpret_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
239         CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols();
240         mapIt = derivedTbFilterMap.find(plan->derivedTbAlias());
241 
242         if (mapIt != derivedTbFilterMap.end())
243         {
244             // replace all derived column of this filter with real column from
245             // derived table projection list.
246             ParseTree* mainFilter = new ParseTree();
247             mainFilter->copyTree(*(mapIt->second));
248             replaceRefCol(mainFilter, derivedColList);
249             ParseTree* derivedFilter = plan->filters();
250 
251             if (derivedFilter)
252             {
253                 LogicOperator* op = new LogicOperator("and");
254                 ParseTree* filter = new ParseTree(op);
255                 filter->left(derivedFilter);
256                 filter->right(mainFilter);
257                 plan->filters(filter);
258             }
259             else
260             {
261                 plan->filters(mainFilter);
262             }
263 
264             // union filter handling
265             for (uint j = 0; j < plan->unionVec().size(); j++)
266             {
267                 CalpontSelectExecutionPlan* unionPlan =
268                     reinterpret_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get());
269                 CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols();
270                 ParseTree* mainFilterForUnion = new ParseTree();
271                 mainFilterForUnion->copyTree(*(mapIt->second));
272                 replaceRefCol(mainFilterForUnion, unionColList);
273                 ParseTree* unionFilter = unionPlan->filters();
274 
275                 if (unionFilter)
276                 {
277                     LogicOperator* op = new LogicOperator("and");
278                     ParseTree* filter = new ParseTree(op);
279                     filter->left(unionFilter);
280                     filter->right(mainFilterForUnion);
281                     unionPlan->filters(filter);
282                 }
283                 else
284                 {
285                     unionPlan->filters(mainFilterForUnion);
286                 }
287             }
288         }
289     }
290 
291     // clean derivedTbFilterMap because all the filters are copied
292     for ( mapIt = derivedTbFilterMap.begin(); mapIt != derivedTbFilterMap.end(); ++mapIt )
293         delete (*mapIt).second;
294 
295     // recursively process the nested derived table
296     for (uint i = 0; i < csep->subSelectList().size(); i++)
297     {
298         SCSEP subselect(boost::dynamic_pointer_cast<CalpontSelectExecutionPlan>(csep->subSelectList()[i]));
299         derivedTableOptimization(thd, subselect);
300     }
301 }
302 
setDerivedTable(execplan::ParseTree * n)303 void setDerivedTable(execplan::ParseTree* n)
304 {
305     ParseTree* lhs = n->left();
306     ParseTree* rhs = n->right();
307 
308     Operator* op = dynamic_cast<Operator*>(n->data());
309 
310     // if logic operator then lhs and rhs can't be both null
311     if (op)
312     {
313         if (!lhs || lhs->derivedTable() == "*")
314         {
315             n->derivedTable(rhs ? rhs->derivedTable() : "*");
316         }
317         else if (!rhs || rhs->derivedTable() == "*")
318         {
319             n->derivedTable(lhs->derivedTable());
320         }
321         else if (lhs->derivedTable() == rhs->derivedTable())
322         {
323             n->derivedTable(lhs->derivedTable());
324         }
325         else
326         {
327             n->derivedTable("");
328         }
329     }
330     else
331     {
332         n->data()->setDerivedTable();
333         n->derivedTable(n->data()->derivedTable());
334     }
335 }
336 
setDerivedFilter(THD * thd,ParseTree * & n,map<string,ParseTree * > & filterMap,CalpontSelectExecutionPlan::SelectList & derivedTbList)337 ParseTree* setDerivedFilter(THD* thd, ParseTree*& n,
338                             map<string, ParseTree*>& filterMap,
339                             CalpontSelectExecutionPlan::SelectList& derivedTbList)
340 {
341     if (!(n->derivedTable().empty()))
342     {
343         // @todo replace virtual column of n to real column
344         // all simple columns should belong to the same derived table
345         CalpontSelectExecutionPlan* csep = NULL;
346 
347         for (uint i = 0; i < derivedTbList.size(); i++)
348         {
349             CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get());
350 
351             if (plan->derivedTbAlias() == n->derivedTable())
352             {
353                 csep = plan;
354                 break;
355             }
356         }
357 
358         // should never be null; if null then give up optimization.
359         if (!csep)
360             return n;
361 
362         // 2. push the filter to the derived table filter stack, or 'and' with
363         // the filters in the stack
364         map<string, ParseTree*>::iterator mapIter = filterMap.find(n->derivedTable());
365 
366         if ( mapIter == filterMap.end())
367         {
368             filterMap.insert(pair<string, ParseTree*>(n->derivedTable(), n));
369         }
370         else
371         {
372             ParseTree* pt = new ParseTree(new LogicOperator("and"));
373             pt->left(mapIter->second);
374             pt->right(n);
375             mapIter->second = pt;
376         }
377 
378         int64_t val = 1;
379         n = new ParseTree(new ConstantColumn(val));
380         (dynamic_cast<ConstantColumn*>(n->data()))->timeZone(thd->variables.time_zone->get_name()->ptr());
381     }
382     else
383     {
384         Operator* op = dynamic_cast<Operator*>(n->data());
385 
386         if (op && (op->op() == OP_OR || op->op() == OP_XOR))
387         {
388             return n;
389         }
390         else
391         {
392             ParseTree* lhs = n->left();
393             ParseTree* rhs = n->right();
394 
395             if (lhs)
396                 n->left(setDerivedFilter(thd, lhs, filterMap, derivedTbList));
397 
398             if (rhs)
399                 n->right(setDerivedFilter(thd, rhs, filterMap, derivedTbList));
400         }
401     }
402 
403     return n;
404 }
405 
FromSubQuery(gp_walk_info & gwip)406 FromSubQuery::FromSubQuery(gp_walk_info& gwip) : SubQuery(gwip)
407 {}
408 
FromSubQuery(gp_walk_info & gwip,SELECT_LEX * sub)409 FromSubQuery::FromSubQuery(gp_walk_info& gwip,
410     SELECT_LEX* sub) :
411         SubQuery(gwip),
412         fFromSub(sub)
413 {}
414 
~FromSubQuery()415 FromSubQuery::~FromSubQuery()
416 {}
417 
transform()418 SCSEP FromSubQuery::transform()
419 {
420     assert (fFromSub);
421     SCSEP csep(new CalpontSelectExecutionPlan());
422     csep->sessionID(fGwip.sessionid);
423     csep->location(CalpontSelectExecutionPlan::FROM);
424     csep->subType (CalpontSelectExecutionPlan::FROM_SUBS);
425 
426     // gwi for the sub query
427     gp_walk_info gwi;
428     gwi.thd = fGwip.thd;
429     gwi.subQuery = this;
430     gwi.viewName = fGwip.viewName;
431     csep->derivedTbAlias(fAlias); // always lower case
432     csep->derivedTbView(fGwip.viewName.alias, lower_case_table_names);
433 
434     if (getSelectPlan(gwi, *fFromSub, csep, false) != 0)
435     {
436         fGwip.fatalParseError = true;
437 
438         if (!gwi.parseErrorText.empty())
439             fGwip.parseErrorText = gwi.parseErrorText;
440         else
441             fGwip.parseErrorText = "Error occured in FromSubQuery::transform()";
442 
443         csep.reset();
444         return csep;
445     }
446 
447     fGwip.subselectList.push_back(csep);
448     return csep;
449 }
450 
451 }
452