1 /* Copyright (C) 2014 InfiniDB, Inc.
2    Copyright (C) 2019 MariaDB Corporation
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17    MA 02110-1301, USA. */
18 
19 /***********************************************************************
20 *   $Id: aggregatecolumn.cpp 9679 2013-07-11 22:32:03Z zzhu $
21 *
22 *
23 ***********************************************************************/
24 #include <sstream>
25 #include <cstring>
26 using namespace std;
27 
28 #include <boost/algorithm/string/case_conv.hpp>
29 using namespace boost;
30 
31 #include "bytestream.h"
32 using namespace messageqcpp;
33 
34 #include "rowgroup.h"
35 using namespace rowgroup;
36 
37 #include "joblisttypes.h"
38 using namespace joblist;
39 
40 #include "aggregatecolumn.h"
41 #include "simplefilter.h"
42 #include "constantfilter.h"
43 #include "arithmeticcolumn.h"
44 #include "functioncolumn.h"
45 #include "objectreader.h"
46 
47 namespace execplan
48 {
49 
getAggCols(execplan::ParseTree * n,void * obj)50 void getAggCols(execplan::ParseTree* n, void* obj)
51 {
52     vector<AggregateColumn*>* list = reinterpret_cast< vector<AggregateColumn*>*>(obj);
53     TreeNode* tn = n->data();
54     AggregateColumn* sc = dynamic_cast<AggregateColumn*>(tn);
55     FunctionColumn* fc = dynamic_cast<FunctionColumn*>(tn);
56     ArithmeticColumn* ac = dynamic_cast<ArithmeticColumn*>(tn);
57     SimpleFilter* sf = dynamic_cast<SimpleFilter*>(tn);
58     ConstantFilter* cf = dynamic_cast<ConstantFilter*>(tn);
59 
60     if (sc)
61     {
62         list->push_back(sc);
63     }
64     else if (fc)
65     {
66         fc->hasAggregate();
67         list->insert(list->end(), fc->aggColumnList().begin(), fc->aggColumnList().end());
68     }
69     else if (ac)
70     {
71         ac->hasAggregate();
72         list->insert(list->end(), ac->aggColumnList().begin(), ac->aggColumnList().end());
73     }
74     else if (sf)
75     {
76         sf->hasAggregate();
77         list->insert(list->end(), sf->aggColumnList().begin(), sf->aggColumnList().end());
78     }
79     else if (cf)
80     {
81         cf->hasAggregate();
82         list->insert(list->end(), cf->aggColumnList().begin(), cf->aggColumnList().end());
83     }
84 }
85 
86 /**
87  * Constructors/Destructors
88  */
AggregateColumn()89 AggregateColumn::AggregateColumn():
90     fAggOp(NOOP),
91     fAsc(false)
92 {
93 }
94 
AggregateColumn(const uint32_t sessionID)95 AggregateColumn::AggregateColumn(const uint32_t sessionID):
96     ReturnedColumn(sessionID),
97     fAggOp(NOOP),
98     fAsc(false)
99 {
100 }
101 
102 // deprecated constructor. use function name as string
AggregateColumn(const string & functionName,const string & content,const uint32_t sessionID)103 AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID):
104     ReturnedColumn(sessionID),
105     fFunctionName(functionName),
106     fAggOp(NOOP),
107     fAsc(false),
108     fData(functionName + "(" + content + ")")
109 {
110     // TODO: need to handle distinct
111     SRCP srcp(new ArithmeticColumn(content));
112     fAggParms.push_back(srcp);
113 }
114 
AggregateColumn(const AggregateColumn & rhs,const uint32_t sessionID)115 AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ):
116     ReturnedColumn(rhs, sessionID),
117     fFunctionName (rhs.fFunctionName),
118     fAggOp(rhs.fAggOp),
119     fTableAlias(rhs.tableAlias()),
120     fAsc(rhs.asc()),
121     fData(rhs.data()),
122     fConstCol(rhs.fConstCol),
123     fTimeZone(rhs.timeZone())
124 {
125     fAlias = rhs.alias();
126     fAggParms = rhs.fAggParms;
127 }
128 
129 /**
130  * Methods
131  */
132 
toString() const133 const string AggregateColumn::toString() const
134 {
135     ostringstream output;
136     output << "AggregateColumn " << data() << endl;
137     output << "func/distinct: " << (int)fAggOp << "/" << fDistinct << endl;
138     output << "expressionId=" << fExpressionId << endl;
139 
140     if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl;
141 
142     if (fAggParms.size() == 0)
143         output << "No arguments";
144     else
145         for (uint32_t i = 0; i < fAggParms.size(); ++i)
146         {
147             output << *(fAggParms[i]) << " ";
148         }
149 
150     output << endl;
151 
152     if (fConstCol)
153         output << *fConstCol;
154 
155     return output.str();
156 }
157 
operator <<(ostream & output,const AggregateColumn & rhs)158 ostream& operator<<(ostream& output, const AggregateColumn& rhs)
159 {
160     output << rhs.toString();
161     return output;
162 }
163 
serialize(messageqcpp::ByteStream & b) const164 void AggregateColumn::serialize(messageqcpp::ByteStream& b) const
165 {
166     CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator rcit;
167     b << (uint8_t) ObjectReader::AGGREGATECOLUMN;
168     ReturnedColumn::serialize(b);
169     b << fFunctionName;
170     b << static_cast<uint8_t>(fAggOp);
171 
172     b << static_cast<uint32_t>(fAggParms.size());
173 
174     for (uint32_t i = 0; i < fAggParms.size(); ++i)
175     {
176         fAggParms[i]->serialize(b);
177     }
178 
179     b << static_cast<uint32_t>(fGroupByColList.size());
180 
181     for (rcit = fGroupByColList.begin(); rcit != fGroupByColList.end(); ++rcit)
182         (*rcit)->serialize(b);
183 
184     b << static_cast<uint32_t>(fProjectColList.size());
185 
186     for (rcit = fProjectColList.begin(); rcit != fProjectColList.end(); ++rcit)
187         (*rcit)->serialize(b);
188 
189     b << fData;
190     b << fTimeZone;
191     //b << fAlias;
192     b << fTableAlias;
193     b << static_cast<ByteStream::doublebyte>(fAsc);
194 
195     if (fConstCol.get() == 0)
196         b << (uint8_t) ObjectReader::NULL_CLASS;
197     else
198         fConstCol->serialize(b);
199 }
200 
unserialize(messageqcpp::ByteStream & b)201 void AggregateColumn::unserialize(messageqcpp::ByteStream& b)
202 {
203     messageqcpp::ByteStream::quadbyte size;
204     messageqcpp::ByteStream::quadbyte i;
205     ReturnedColumn* rc;
206 
207     ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN);
208     fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end());
209     fProjectColList.erase(fProjectColList.begin(), fProjectColList.end());
210     fAggParms.erase(fAggParms.begin(), fAggParms.end());
211     ReturnedColumn::unserialize(b);
212     b >> fFunctionName;
213     b >> fAggOp;
214 
215     b >> size;
216 
217     for (i = 0; i < size; i++)
218     {
219         rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
220         SRCP srcp(rc);
221         fAggParms.push_back(srcp);
222     }
223 
224     b >> size;
225 
226     for (i = 0; i < size; i++)
227     {
228         rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
229         SRCP srcp(rc);
230         fGroupByColList.push_back(srcp);
231     }
232 
233     b >> size;
234 
235     for (i = 0; i < size; i++)
236     {
237         rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
238         SRCP srcp(rc);
239         fProjectColList.push_back(srcp);
240     }
241 
242     b >> fData;
243     b >> fTimeZone;
244     //b >> fAlias;
245     b >> fTableAlias;
246     b >> reinterpret_cast< ByteStream::doublebyte&>(fAsc);
247     fConstCol.reset(dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b)));
248 }
249 
operator ==(const AggregateColumn & t) const250 bool AggregateColumn::operator==(const AggregateColumn& t) const
251 {
252     const ReturnedColumn* rc1, *rc2;
253     AggParms::const_iterator it, it2;
254 
255     rc1 = static_cast<const ReturnedColumn*>(this);
256     rc2 = static_cast<const ReturnedColumn*>(&t);
257 
258     if (*rc1 != *rc2)
259         return false;
260 
261     if (fFunctionName != t.fFunctionName)
262         return false;
263 
264     if (fAggOp == COUNT_ASTERISK && t.fAggOp == COUNT_ASTERISK)
265         return true;
266 
267     if (fAggOp != t.fAggOp)
268         return false;
269 
270     if (aggParms().size() != t.aggParms().size())
271     {
272         return false;
273     }
274 
275     for (it = fAggParms.begin(), it2 = t.fAggParms.begin();
276             it != fAggParms.end();
277             ++it, ++it2)
278     {
279         if (**it != **it2)
280             return false;
281     }
282 
283     if (fTableAlias != t.fTableAlias)
284         return false;
285 
286     if (fData != t.fData)
287         return false;
288 
289     if (fAsc != t.fAsc)
290         return false;
291 
292     if ((fConstCol.get() != NULL && t.fConstCol.get() == NULL) ||
293             (fConstCol.get() == NULL && t.fConstCol.get() != NULL) ||
294             (fConstCol.get() != NULL && t.fConstCol.get() != NULL &&
295              *(fConstCol.get()) != t.fConstCol.get()))
296         return false;
297 
298     if (fTimeZone != t.fTimeZone)
299         return false;
300 
301     return true;
302 }
303 
operator ==(const TreeNode * t) const304 bool AggregateColumn::operator==(const TreeNode* t) const
305 {
306     const AggregateColumn* ac;
307 
308     ac = dynamic_cast<const AggregateColumn*>(t);
309 
310     if (ac == NULL)
311         return false;
312 
313     return *this == *ac;
314 }
315 
operator !=(const AggregateColumn & t) const316 bool AggregateColumn::operator!=(const AggregateColumn& t) const
317 {
318     return !(*this == t);
319 }
320 
operator !=(const TreeNode * t) const321 bool AggregateColumn::operator!=(const TreeNode* t) const
322 {
323     return !(*this == t);
324 }
325 
hasAggregate()326 bool AggregateColumn::hasAggregate()
327 {
328     fAggColumnList.push_back(this);
329     return true;
330 }
331 
evaluate(Row & row,bool & isNull)332 void AggregateColumn::evaluate(Row& row, bool& isNull)
333 {
334     switch (fResultType.colDataType)
335     {
336         case CalpontSystemCatalog::DATE:
337             if (row.equals<4>(DATENULL, fInputIndex))
338                 isNull = true;
339             else
340                 fResult.intVal = row.getUintField<4>(fInputIndex);
341 
342             break;
343 
344         case CalpontSystemCatalog::DATETIME:
345             if (row.equals<8>(DATETIMENULL, fInputIndex))
346                 isNull = true;
347             else
348                 fResult.intVal = row.getUintField<8>(fInputIndex);
349 
350             break;
351 
352         case CalpontSystemCatalog::TIMESTAMP:
353             if (row.equals<8>(TIMESTAMPNULL, fInputIndex))
354                 isNull = true;
355             else
356                 fResult.intVal = row.getUintField<8>(fInputIndex);
357 
358             break;
359 
360         case CalpontSystemCatalog::TIME:
361             if (row.equals<8>(TIMENULL, fInputIndex))
362                 isNull = true;
363             else
364                 fResult.intVal = row.getIntField<8>(fInputIndex);
365 
366             break;
367 
368         case CalpontSystemCatalog::CHAR:
369         case CalpontSystemCatalog::VARCHAR:
370         case CalpontSystemCatalog::STRINT:
371         case CalpontSystemCatalog::TEXT:
372             switch (row.getColumnWidth(fInputIndex))
373             {
374                 case 1:
375                     if (row.equals<1>(CHAR1NULL, fInputIndex))
376                         isNull = true;
377                     else
378                         fResult.origIntVal = row.getUintField<1>(fInputIndex);
379 
380                     break;
381 
382                 case 2:
383                     if (row.equals<2>(CHAR2NULL, fInputIndex))
384                         isNull = true;
385                     else
386                         fResult.origIntVal = row.getUintField<2>(fInputIndex);
387 
388                     break;
389 
390                 case 3:
391                 case 4:
392                     if (row.equals<4>(CHAR4NULL, fInputIndex))
393                         isNull = true;
394                     else
395                         fResult.origIntVal = row.getUintField<4>(fInputIndex);
396 
397                     break;
398 
399                 case 5:
400                 case 6:
401                 case 7:
402                 case 8:
403                     if (row.equals<8>(CHAR8NULL, fInputIndex))
404                         isNull = true;
405                     else
406                         fResult.origIntVal = row.getUintField<8>(fInputIndex);
407 
408                     break;
409 
410                 default:
411                     if (row.equals(CPNULLSTRMARK, fInputIndex))
412                         isNull = true;
413                     else
414                         fResult.strVal = row.getStringField(fInputIndex);
415 
416                     // stringColVal is padded with '\0' to colWidth so can't use str.length()
417                     if (strlen(fResult.strVal.c_str()) == 0)
418                         isNull = true;
419 
420                     break;
421             }
422 
423             if (fResultType.colDataType == CalpontSystemCatalog::STRINT)
424                 fResult.intVal = uint64ToStr(fResult.origIntVal);
425             else
426                 fResult.intVal = atoll((char*)&fResult.origIntVal);
427 
428             break;
429 
430         case CalpontSystemCatalog::BIGINT:
431             if (row.equals<8>(BIGINTNULL, fInputIndex))
432                 isNull = true;
433             else
434                 fResult.intVal = row.getIntField<8>(fInputIndex);
435 
436             break;
437 
438         case CalpontSystemCatalog::UBIGINT:
439             if (row.equals<8>(UBIGINTNULL, fInputIndex))
440                 isNull = true;
441             else
442                 fResult.uintVal = row.getUintField<8>(fInputIndex);
443 
444             break;
445 
446         case CalpontSystemCatalog::INT:
447         case CalpontSystemCatalog::MEDINT:
448             if (row.equals<4>(INTNULL, fInputIndex))
449                 isNull = true;
450             else
451                 fResult.intVal = row.getIntField<4>(fInputIndex);
452 
453             break;
454 
455         case CalpontSystemCatalog::UINT:
456         case CalpontSystemCatalog::UMEDINT:
457             if (row.equals<4>(UINTNULL, fInputIndex))
458                 isNull = true;
459             else
460                 fResult.uintVal = row.getUintField<4>(fInputIndex);
461 
462             break;
463 
464         case CalpontSystemCatalog::SMALLINT:
465             if (row.equals<2>(SMALLINTNULL, fInputIndex))
466                 isNull = true;
467             else
468                 fResult.intVal = row.getIntField<2>(fInputIndex);
469 
470             break;
471 
472         case CalpontSystemCatalog::USMALLINT:
473             if (row.equals<2>(USMALLINTNULL, fInputIndex))
474                 isNull = true;
475             else
476                 fResult.uintVal = row.getUintField<2>(fInputIndex);
477 
478             break;
479 
480         case CalpontSystemCatalog::TINYINT:
481             if (row.equals<1>(TINYINTNULL, fInputIndex))
482                 isNull = true;
483             else
484                 fResult.intVal = row.getIntField<1>(fInputIndex);
485 
486             break;
487 
488         case CalpontSystemCatalog::UTINYINT:
489             if (row.equals<1>(UTINYINTNULL, fInputIndex))
490                 isNull = true;
491             else
492                 fResult.uintVal = row.getUintField<1>(fInputIndex);
493 
494             break;
495 
496         //In this case, we're trying to load a double output column with float data. This is the
497         // case when you do sum(floatcol), e.g.
498         case CalpontSystemCatalog::FLOAT:
499         case CalpontSystemCatalog::UFLOAT:
500             if (row.equals<4>(FLOATNULL, fInputIndex))
501                 isNull = true;
502             else
503                 fResult.floatVal = row.getFloatField(fInputIndex);
504 
505             break;
506 
507         case CalpontSystemCatalog::DOUBLE:
508         case CalpontSystemCatalog::UDOUBLE:
509             if (row.equals<8>(DOUBLENULL, fInputIndex))
510                 isNull = true;
511             else
512                 fResult.doubleVal = row.getDoubleField(fInputIndex);
513 
514             break;
515 
516         case CalpontSystemCatalog::LONGDOUBLE:
517             if (row.equals(LONGDOUBLENULL, fInputIndex))
518                 isNull = true;
519             else
520                 fResult.longDoubleVal = row.getLongDoubleField(fInputIndex);
521 
522             break;
523 
524         case CalpontSystemCatalog::DECIMAL:
525         case CalpontSystemCatalog::UDECIMAL:
526             switch (fResultType.colWidth)
527             {
528                 case 1:
529                     if (row.equals<1>(TINYINTNULL, fInputIndex))
530                         isNull = true;
531                     else
532                     {
533                         fResult.decimalVal.value = row.getIntField<1>(fInputIndex);
534                         fResult.decimalVal.scale = (unsigned)fResultType.scale;
535                     }
536 
537                     break;
538 
539                 case 2:
540                     if (row.equals<2>(SMALLINTNULL, fInputIndex))
541                         isNull = true;
542                     else
543                     {
544                         fResult.decimalVal.value = row.getIntField<2>(fInputIndex);
545                         fResult.decimalVal.scale = (unsigned)fResultType.scale;
546                     }
547 
548                     break;
549 
550                 case 4:
551                     if (row.equals<4>(INTNULL, fInputIndex))
552                         isNull = true;
553                     else
554                     {
555                         fResult.decimalVal.value = row.getIntField<4>(fInputIndex);
556                         fResult.decimalVal.scale = (unsigned)fResultType.scale;
557                     }
558 
559                     break;
560 
561                 default:
562                     if (row.equals<8>(BIGINTNULL, fInputIndex))
563                         isNull = true;
564                     else
565                     {
566                         fResult.decimalVal.value = (int64_t)row.getUintField<8>(fInputIndex);
567                         fResult.decimalVal.scale = (unsigned)fResultType.scale;
568                     }
569 
570                     break;
571             }
572 
573             break;
574 
575         case CalpontSystemCatalog::VARBINARY:
576         case CalpontSystemCatalog::BLOB:
577             isNull = true;
578             break;
579 
580         default:	// treat as int64
581             if (row.equals<8>(BIGINTNULL, fInputIndex))
582                 isNull = true;
583             else
584                 fResult.intVal = row.getUintField<8>(fInputIndex);
585 
586             break;
587     }
588 }
589 
590 /*static*/
agname2num(const string & agname)591 AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname)
592 {
593     /*
594     		NOOP = 0,
595     		COUNT_ASTERISK,
596     		COUNT,
597     		SUM,
598     		AVG,
599     		MIN,
600     		MAX,
601     		CONSTANT,
602     		DISTINCT_COUNT,
603     		DISTINCT_SUM,
604     		DISTINCT_AVG,
605     		STDDEV_POP,
606     		STDDEV_SAMP,
607     		VAR_POP,
608     		VAR_SAMP,
609     		BIT_AND,
610     		BIT_OR,
611     		BIT_XOR,
612     		GROUP_CONCAT
613     */
614     string lfn(agname);
615     algorithm::to_lower(lfn);
616 
617     if (lfn == "count(*)")
618         return COUNT_ASTERISK;
619 
620     if (lfn == "count")
621         return COUNT;
622 
623     if (lfn == "sum")
624         return SUM;
625 
626     if (lfn == "avg")
627         return AVG;
628 
629     if (lfn == "min")
630         return MIN;
631 
632     if (lfn == "max")
633         return MAX;
634 
635     if (lfn == "std")
636         return STDDEV_POP;
637 
638     if (lfn == "stddev_pop")
639         return STDDEV_POP;
640 
641     if (lfn == "stddev_samp")
642         return STDDEV_SAMP;
643 
644     if (lfn == "stddev")
645         return STDDEV_POP;
646 
647     if (lfn == "var_pop")
648         return VAR_POP;
649 
650     if (lfn == "var_samp")
651         return VAR_SAMP;
652 
653     if (lfn == "variance")
654         return VAR_POP;
655 
656     return NOOP;
657 }
658 
659 } // namespace execplan
660 
661