1 /* Copyright (C) 2014 InfiniDB, Inc.
2    Copyright (C) 2019 MariaDB Corporation
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17    MA 02110-1301, USA. */
18 
19 //  $Id: jlf_common.h 9702 2013-07-17 19:08:07Z xlou $
20 
21 
22 /** @file jlf_common.h
23  *
24  */
25 #ifndef JLF_COMMON_H__
26 #define JLF_COMMON_H__
27 
28 #include <map>
29 #include <set>
30 #include <stack>
31 #include <string>
32 #include <vector>
33 
34 #include <boost/shared_ptr.hpp>
35 #include <boost/uuid/uuid.hpp>
36 
37 #include "calpontexecutionplan.h"
38 #include "calpontselectexecutionplan.h"
39 #include "calpontsystemcatalog.h"
40 #include "simplecolumn.h"
41 
42 #include "dbrm.h"
43 
44 #include "joblist.h"
45 #include "jobstep.h"
46 #include "groupconcat.h"
47 #include "jl_logger.h"
48 
49 #include "resourcemanager.h"
50 #include "rowgroup.h"
51 
52 // forward reference
53 namespace execplan
54 {
55 class AggregateColumn;
56 class SimpleColumn;
57 }
58 
59 
60 namespace joblist
61 {
62 // for output error messages to screen.
63 const std::string boldStart = "\033[0;1m";
64 const std::string boldStop = "\033[0;39m";
65 
66 const int8_t CONST_COL_NONE  = 0;
67 const int8_t CONST_COL_EXIST = 1;
68 const int8_t CONST_COL_ONLY  = 2;
69 
70 // pretend all expressions belong to "virtual" table EXPRESSION, (CNX_EXP_TABLE_ID, expression)
71 // CNX_EXP_TABLE_ID(999) is not for user table or column, there will be no confilict in queries.
72 const int32_t CNX_EXP_TABLE_ID = 999;
73 
74 struct TupleInfo
75 {
76     TupleInfo(uint32_t w = 0, uint32_t o = 0, uint32_t k = -1, uint32_t t = -1, uint32_t s = 0, uint32_t p = 0,
77               execplan::CalpontSystemCatalog::ColDataType dt = execplan::CalpontSystemCatalog::BIT, uint32_t csn = 8) :
widthTupleInfo78         width(w), oid(o), key(k), tkey(t), scale(s), precision(p), dtype(dt), csNum(csn) { }
~TupleInfoTupleInfo79     ~TupleInfo() { }
80 
81     uint32_t width;
82     uint32_t oid;
83     uint32_t key;
84     uint32_t tkey;
85     uint32_t scale;
86     uint32_t precision;
87     execplan::CalpontSystemCatalog::ColDataType dtype;
88     uint32_t csNum; // For collations
89 };
90 
91 // for compound join
92 struct JoinData
93 {
94     int64_t fJoinId;
95     std::vector<uint32_t> fLeftKeys;
96     std::vector<uint32_t> fRightKeys;
97     std::vector<JoinType> fTypes; // joblisttypes.h: INNER, LEFTOUTER, RIGHTOUTER
98     bool fTypeless;
99 
JoinDataJoinData100     JoinData() : fJoinId(-1), fTypeless(false) {}
101 };
102 
103 typedef std::stack<JobStepVector> JobStepVectorStack;
104 typedef std::map<execplan::CalpontSystemCatalog::OID, execplan::CalpontSystemCatalog::OID> DictOidToColOidMap;
105 typedef std::vector<TupleInfo> TupleInfoVector;
106 typedef std::map<uint32_t, TupleInfo> TupleInfoMap;
107 
108 //for subquery support
109 struct UniqId
110 {
111     int         fId;     // OID for real table, sequence # for subquery
112 //	std::string fName;   // name (table alias + [column name, if column])
113     std::string fTable;  // table name (table alias)
114     std::string fSchema; // schema name
115     std::string fView;   // view name
116     uint32_t    fPseudo; // pseudo type
117 //	uint64_t	fEngine; // InfiniDB == 0
118     uint64_t    fSubId;  // subquery ID
119 
UniqIdUniqId120     UniqId() : fId(-1), fSubId(-1) {}
121     UniqId(int i, const std::string& t, const std::string& s, const std::string& v,
122            uint32_t pi = 0, uint64_t l = -1) :
fIdUniqId123         fId(i), fTable(t), fSchema(s), fView(v), fPseudo(pi), fSubId(l) {}
124     UniqId(const execplan::SimpleColumn* sc);
125     UniqId(int o, const execplan::SimpleColumn* sc);
126 
127     std::string toString() const;
128 };
129 bool operator < (const struct UniqId& x, const struct UniqId& y);
130 bool operator == (const struct UniqId& x, const struct UniqId& y);
131 typedef std::map<UniqId, uint32_t> TupleKeyMap;
132 
133 //typedef vector<SRCP> RetColsVector;
134 typedef execplan::CalpontSelectExecutionPlan::ReturnedColumnList RetColsVector;
135 
136 //join data between table pairs
137 typedef std::map<std::pair<uint32_t, uint32_t>, JoinData> TableJoinMap;
138 
139 struct TupleKeyInfo
140 {
141     uint32_t nextKey;
142     TupleKeyMap tupleKeyMap;
143     std::vector<UniqId> tupleKeyVec;
144     std::vector<std::string> tupleKeyToName;
145     std::vector<bool> crossEngine;
146 
147     // TODO: better organize these structs
148     std::map<uint32_t, execplan::CalpontSystemCatalog::OID> tupleKeyToTableOid;
149     std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> colType;
150     std::map<uint32_t, execplan::CalpontSystemCatalog::ColType> token2DictTypeMap;
151     std::map<uint32_t, std::string> keyName;
152     std::map<uint32_t, uint32_t> colKeyToTblKey;
153     std::map<uint32_t, uint32_t> dictKeyMap;    // map token key to dictionary key
154     DictOidToColOidMap dictOidToColOid;         // map dictionary OID to column OID
155     std::map<uint32_t, uint32_t> pseudoType;    // key to pseudo column type
156     std::set<uint32_t> functionJoinKeys;        // key used in function join
157     TupleInfoMap tupleInfoMap;
158 
TupleKeyInfoTupleKeyInfo159     TupleKeyInfo() : nextKey(0) {}
160 };
161 
162 
163 //------------------------------------------------------------------------------
164 /** @brief This struct maintains state for the query processing
165  *
166  */
167 //------------------------------------------------------------------------------
168 struct JobInfo
169 {
JobInfoJobInfo170     JobInfo(ResourceManager* r) :
171         rm(r),
172         sessionId(0),
173         txnId(0),
174         statementId(0),
175         maxBuckets(rm->getHjMaxBuckets()),
176         maxElems(rm->getHjMaxElems()),
177         flushInterval(rm->getJLFlushInterval()),
178         fifoSize(rm->getJlFifoSize()),
179         fifoSizeLargeSideHj(rm->getHjFifoSizeLargeSide()),
180         scanLbidReqLimit(rm->getJlScanLbidReqLimit()),
181         scanLbidReqThreshold(rm->getJlScanLbidReqThreshold()),
182         tempSaveSize(rm->getScTempSaveSize()),
183         logger(new Logger()),
184         traceFlags(0),
185         tupleDLMaxSize(rm->getTwMaxSize()),
186         tupleMaxBuckets(rm->getTwMaxBuckets()),
187         projectingTableOID(0),
188         isExeMgr(false),
189         trace(false),
190         tryTuples(false),
191         constantCol(CONST_COL_NONE),
192         hasDistinct(false),
193         hasAggregation(false),
194         limitStart(0),
195         limitCount(-1),
196         joinNum(0),
197         subLevel(0),
198         subNum(0),
199         subId(0),
200         pJobInfo(NULL),
201         constantFalse(false),
202         cntStarPos(-1),
203         stringScanThreshold(1),
204         wfqLimitStart(0),
205         wfqLimitCount(-1)
206     { }
207     ResourceManager* rm;
208     uint32_t  sessionId;
209     uint32_t  txnId;
210     BRM::QueryContext  verId;
211     uint32_t  statementId;
212     std::string  queryType;
213     boost::shared_ptr<execplan::CalpontSystemCatalog> csc;
214     int       maxBuckets;
215     uint64_t  maxElems;
216     JobStepVectorStack stack;
217     uint32_t  flushInterval;
218     uint32_t  fifoSize;
219     uint32_t  fifoSizeLargeSideHj;
220     //...joblist does not use scanLbidReqLimit and SdanLbidReqThreshold.
221     //...They are actually used by pcolscan and pdictionaryscan, but
222     //...we have joblist get and report the values here since they
223     //...are global to the job.
224     uint32_t  scanLbidReqLimit;
225     uint32_t  scanLbidReqThreshold;
226     uint32_t  tempSaveSize;
227     SPJL      logger;
228     uint32_t  traceFlags;
229     uint64_t  tupleDLMaxSize;
230     uint32_t  tupleMaxBuckets;
231     SErrorInfo errorInfo;
232     execplan::CalpontSystemCatalog::OID* projectingTableOID; // DeliveryWSDLs get a reference to this
233     bool      isExeMgr;
234     bool      trace;
235     bool      tryTuples;
236     int8_t    constantCol;
237     TupleInfoVector pjColList;
238 
239     // aggregation
240     bool       hasDistinct;
241     bool       hasAggregation;
242     std::vector<uint32_t>                  groupByColVec;
243     std::vector<uint32_t>                  distinctColVec;
244     std::vector<uint32_t>                  expressionVec;
245     std::vector<std::pair<uint32_t, int> > returnedColVec;
246 
247     // order by and limit
248     std::vector<std::pair<uint32_t, bool> > orderByColVec;
249     uint64_t                                limitStart;
250     uint64_t                                limitCount;
251     uint32_t                                orderByThreads;
252 
253     // tupleInfo
254     boost::shared_ptr<TupleKeyInfo> keyInfo;
255 
256     // skip dictionary step if the real string is not necessary to projected.
257     // In most case, the string is used for return or comparison, so default is false.
258     //     when setting to false, no need to check: false overwrites true;
259     //     When setting to true, need check: true cannot overwrite false.
260     std::map<uint32_t, bool> tokenOnly;
261 
262     // unique ID list of the tables in from clause
263     std::vector<uint32_t> tableList;
264 
265     // table join map
266     TableJoinMap tableJoinMap;
267 
268     // for expression
269     JobStepVector crossTableExpressions;
270     JobStepVector returnedExpressions;
271 
272     // @bug3683, function join
273     std::vector<JobStep*>  functionJoins;   // store expressions can be converted to joins
274 
275     // for function on aggregation
276     RetColsVector deliveredCols;            // columns to be sent to connector
277     RetColsVector nonConstCols;             // none constant columns
278     RetColsVector nonConstDelCols;          // delivered none constant columns
279     RetColsVector projectionCols;           // columns for projection
280     std::multimap<execplan::ReturnedColumn*, execplan::ReturnedColumn*> cloneAggregateColMap;
281     std::vector<std::pair<int, int> > aggEidIndexList;
282 
283     // table pairs with incompatible join which is treated as expression
284     std::map<uint32_t, uint32_t> incompatibleJoinMap;
285 
286     // bug 1573 & 3391, having
287     SJSTEP         havingStep;
288     JobStepVector  havingStepVec;
289 
290     // bug 2634, 5311 and 5374, outjoin and predicates
291     std::set<uint32_t> outerOnTable;
292     std::set<uint32_t> tableHasIsNull;
293     JobStepVector  outerJoinExpressions;
294 
295     // bug 3759, join in order
296     // mixed outer join
297     std::map<int, uint64_t> tableSize;
298     int64_t joinNum;
299 
300     // for subquery
301     boost::shared_ptr<int> subCount;      // # of subqueries in the query statement
302     int                    subLevel;      // subquery level
303     int                    subNum;        // # of subqueries @ level n
304     int                    subId;         // id of current subquery
305     JobInfo*               pJobInfo;      // jobinfo of outer query
306     bool                   constantFalse; // has constant false filter
307     std::string            subAlias;      // unique alias to identify the subquery
308     JobStepVector          correlateSteps;
309     JobStepVector          selectAndFromSubs;
310     std::set<uint64_t>     returnColSet;
311     std::map<UniqId, execplan::CalpontSystemCatalog::ColType> vtableColTypes;
312 
313     // step to process orderby, limit and fill in constants
314     SJSTEP annexStep;
315 
316     // @bug3475, aggregate constant column <position, aggregate column>
317     std::map<uint64_t, execplan::SRCP> constAggregate;
318     int64_t cntStarPos;  // position of count(*)
319 
320     // @bug3321, dictionary scan setting, HWM = stringScanThreshold -1
321     uint64_t stringScanThreshold;
322 
323     // @bug3362, group_concat
324     RetColsVector   groupConcatCols;
325     GroupConcatInfo groupConcatInfo;
326 
327     // @bug3736, column map
328     std::map<uint32_t, std::vector<uint32_t> > columnMap;
329 
330     // @bug3438, joblist for trace/stats
331     JobList* jobListPtr;  // just reference, NOT delete by JobInfo
332 
333     // WORKAROUND for join FE limitation (join Id to expression tables map)
334     std::map<uint32_t, std::set<uint32_t> > joinFeTableMap;
335 
336     uint32_t stringTableThreshold;
337 
338     // @bug4531, Window Function support
339     RetColsVector windowCols;
340     RetColsVector windowExps;
341     RetColsVector windowDels;
342     std::set<uint64_t> windowSet;
343     RetColsVector wfqOrderby;
344     uint64_t      wfqLimitStart;
345     uint64_t      wfqLimitCount;
346     // workaround for expression of windowfunction in IN/EXISTS sub-query
347     //std::map<uint32_t, RetColsVector>  exprWinfuncListMap;
348 
349     // Flag to tell us we are in local PM only query mode
350     uint32_t localQuery;
351 
352     boost::uuids::uuid uuid;
353 
354     // @bug4021, column map for all pseudo column queries
355     std::map<uint64_t, execplan::SRCP> tableColMap;
356     std::set<uint64_t> pseudoColTable;
357 
358     /* Disk-based join vars */
359     boost::shared_ptr<int64_t> smallSideUsage;
360     boost::shared_ptr<int64_t> umMemLimit;
361     int64_t smallSideLimit;    // need to get these from a session var in execplan
362     int64_t largeSideLimit;
363     uint64_t partitionSize;
364     bool isDML;
365     std::string timeZone;
366 
367     // This is for tracking any dynamically allocated ParseTree objects
368     // in simpleScalarFilterToParseTree() for later deletion in
369     // ~csep() or csep.unserialize()
370     std::vector<execplan::ParseTree*> dynamicParseTreeVec;
371 
372 private:
373     //defaults okay
374     //JobInfo(const JobInfo& rhs);
375     //JobInfo& operator=(const JobInfo& rhs);
376 };
377 
378 
379 //------------------------------------------------------------------------------
380 // namespace scoped functions
381 //------------------------------------------------------------------------------
382 
383 /** @brief Returns the table alias for the specified column
384  *
385  */
386 std::string extractTableAlias(const execplan::SimpleColumn* sc);
387 
388 /** @brief Returns the table alias for the specified column
389  *
390  */
391 std::string extractTableAlias(const execplan::SSC& sc);
392 
393 /** @brief Returns OID associated with colType if it is a dictionary column
394  *
395  */
396 execplan::CalpontSystemCatalog::OID isDictCol(const execplan::CalpontSystemCatalog::ColType& colType);
397 
398 /** @brief Determines if colType is a character column
399  *
400  */
401 bool isCharCol(const execplan::CalpontSystemCatalog::ColType& colType);
402 
403 /** @brief Returns OID associated with a table
404  *
405  */
406 execplan::CalpontSystemCatalog::OID tableOid(const execplan::SimpleColumn* sc,
407         boost::shared_ptr<execplan::CalpontSystemCatalog> cat);
408 
409 /** @brief Returns the unique ID to be used in tupleInfo
410  *
411  */
412 uint32_t getTupleKey(JobInfo& jobInfo,
413                      const execplan::SimpleColumn* sc,
414                      bool add = false);
415 uint32_t getTableKey(const JobInfo& jobInfo,
416                      execplan::CalpontSystemCatalog::OID tableOid,
417                      const std::string& alias,
418                      const std::string& schema,
419                      const std::string& view);
420 uint32_t getTupleKey(JobInfo& jobInfo,
421                      const execplan::SRCP& srcp,
422                      bool add = false);
423 uint32_t getTableKey(const JobInfo& jobInfo,
424                      uint32_t cid);
425 uint32_t getTableKey(JobInfo& jobInfo,
426                      JobStep* js);
427 
428 void updateTableKey(uint32_t cid,
429                     uint32_t tid,
430                     JobInfo& jobInfo);
431 
432 uint32_t getExpTupleKey(const JobInfo& jobInfo,
433                         uint64_t eid,
434                         bool cr = false);
435 
436 uint32_t makeTableKey(JobInfo& jobInfo,
437                       const execplan::SimpleColumn* sc);
438 uint32_t makeTableKey(JobInfo& jobInfo,
439                       execplan::CalpontSystemCatalog::OID tableOid,
440                       const std::string& tbl_name,
441                       const std::string& tbl_alias,
442                       const std::string& sch_name,
443                       const std::string& vw_name,
444                       uint64_t engine = 0);
445 
446 
447 /** @brief Returns the tupleInfo associate with the (table, column) key pair
448  *
449  */
450 TupleInfo getTupleInfo(uint32_t columnKey, const JobInfo& jobInfo);
451 
452 /** @brief set tuple info for simple column
453  *
454  */
455 TupleInfo setTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct,
456                        execplan::CalpontSystemCatalog::OID col_oid,
457                        JobInfo& jobInfo,
458                        execplan::CalpontSystemCatalog::OID tbl_oid,
459                        const execplan::SimpleColumn* sc,
460                        const std::string& alias);
461 
462 /** @brief set tuple info for expressions
463  *
464  */
465 TupleInfo setExpTupleInfo(const execplan::CalpontSystemCatalog::ColType& ct,
466                           uint64_t expressionId,
467                           const std::string& alias,
468                           JobInfo& jobInfo,
469                           bool rc = false);
470 
471 TupleInfo setExpTupleInfo(const execplan::ReturnedColumn* rc, JobInfo& jobInfo);
472 
473 /** @brief add an aggregate column info
474  *
475  */
476 void addAggregateColumn(execplan::ReturnedColumn*, int, RetColsVector&, JobInfo&);
477 
478 void makeJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
479                   JobStepVector& querySteps, JobStepVector& projectSteps,
480                   DeliveredTableMap& deliverySteps);
481 
482 void makeUnionJobSteps(execplan::CalpontSelectExecutionPlan* csep, JobInfo& jobInfo,
483                        JobStepVector& querySteps, JobStepVector&, DeliveredTableMap& deliverySteps);
484 
485 void updateDerivedColumn(JobInfo&, execplan::SimpleColumn*,
486                          execplan::CalpontSystemCatalog::ColType&);
487 
488 bool filterWithDictionary(execplan::CalpontSystemCatalog::OID dictOid, uint64_t n);
489 
490 bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColType& ct1,
491                            const execplan::CalpontSystemCatalog::ColType& ct2,
492                            bool  forJoin = true);
493 bool compatibleColumnTypes(const execplan::CalpontSystemCatalog::ColDataType& dt1, uint32_t scale1,
494                            const execplan::CalpontSystemCatalog::ColDataType& dt2, uint32_t scale2,
495                            bool  forJoin = true);
496 
497 
498 } // end of jlf_common namespace
499 
500 #endif
501