1 /* Copyright (C) 2014 InfiniDB, Inc.
2 Copyright (C) 2019 MariaDB Corporation
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; version 2 of
7 the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 MA 02110-1301, USA. */
18
19 /***********************************************************************
20 * $Id: aggregatecolumn.cpp 9679 2013-07-11 22:32:03Z zzhu $
21 *
22 *
23 ***********************************************************************/
24 #include <sstream>
25 #include <cstring>
26 using namespace std;
27
28 #include <boost/algorithm/string/case_conv.hpp>
29 using namespace boost;
30
31 #include "bytestream.h"
32 using namespace messageqcpp;
33
34 #include "rowgroup.h"
35 using namespace rowgroup;
36
37 #include "joblisttypes.h"
38 using namespace joblist;
39
40 #include "aggregatecolumn.h"
41 #include "simplefilter.h"
42 #include "constantfilter.h"
43 #include "arithmeticcolumn.h"
44 #include "functioncolumn.h"
45 #include "objectreader.h"
46
47 namespace execplan
48 {
49
getAggCols(execplan::ParseTree * n,void * obj)50 void getAggCols(execplan::ParseTree* n, void* obj)
51 {
52 vector<AggregateColumn*>* list = reinterpret_cast< vector<AggregateColumn*>*>(obj);
53 TreeNode* tn = n->data();
54 AggregateColumn* sc = dynamic_cast<AggregateColumn*>(tn);
55 FunctionColumn* fc = dynamic_cast<FunctionColumn*>(tn);
56 ArithmeticColumn* ac = dynamic_cast<ArithmeticColumn*>(tn);
57 SimpleFilter* sf = dynamic_cast<SimpleFilter*>(tn);
58 ConstantFilter* cf = dynamic_cast<ConstantFilter*>(tn);
59
60 if (sc)
61 {
62 list->push_back(sc);
63 }
64 else if (fc)
65 {
66 fc->hasAggregate();
67 list->insert(list->end(), fc->aggColumnList().begin(), fc->aggColumnList().end());
68 }
69 else if (ac)
70 {
71 ac->hasAggregate();
72 list->insert(list->end(), ac->aggColumnList().begin(), ac->aggColumnList().end());
73 }
74 else if (sf)
75 {
76 sf->hasAggregate();
77 list->insert(list->end(), sf->aggColumnList().begin(), sf->aggColumnList().end());
78 }
79 else if (cf)
80 {
81 cf->hasAggregate();
82 list->insert(list->end(), cf->aggColumnList().begin(), cf->aggColumnList().end());
83 }
84 }
85
86 /**
87 * Constructors/Destructors
88 */
AggregateColumn()89 AggregateColumn::AggregateColumn():
90 fAggOp(NOOP),
91 fAsc(false)
92 {
93 }
94
AggregateColumn(const uint32_t sessionID)95 AggregateColumn::AggregateColumn(const uint32_t sessionID):
96 ReturnedColumn(sessionID),
97 fAggOp(NOOP),
98 fAsc(false)
99 {
100 }
101
102 // deprecated constructor. use function name as string
AggregateColumn(const string & functionName,const string & content,const uint32_t sessionID)103 AggregateColumn::AggregateColumn(const string& functionName, const string& content, const uint32_t sessionID):
104 ReturnedColumn(sessionID),
105 fFunctionName(functionName),
106 fAggOp(NOOP),
107 fAsc(false),
108 fData(functionName + "(" + content + ")")
109 {
110 // TODO: need to handle distinct
111 SRCP srcp(new ArithmeticColumn(content));
112 fAggParms.push_back(srcp);
113 }
114
AggregateColumn(const AggregateColumn & rhs,const uint32_t sessionID)115 AggregateColumn::AggregateColumn( const AggregateColumn& rhs, const uint32_t sessionID ):
116 ReturnedColumn(rhs, sessionID),
117 fFunctionName (rhs.fFunctionName),
118 fAggOp(rhs.fAggOp),
119 fTableAlias(rhs.tableAlias()),
120 fAsc(rhs.asc()),
121 fData(rhs.data()),
122 fConstCol(rhs.fConstCol),
123 fTimeZone(rhs.timeZone())
124 {
125 fAlias = rhs.alias();
126 fAggParms = rhs.fAggParms;
127 }
128
129 /**
130 * Methods
131 */
132
toString() const133 const string AggregateColumn::toString() const
134 {
135 ostringstream output;
136 output << "AggregateColumn " << data() << endl;
137 output << "func/distinct: " << (int)fAggOp << "/" << fDistinct << endl;
138 output << "expressionId=" << fExpressionId << endl;
139
140 if (fAlias.length() > 0) output << "/Alias: " << fAlias << endl;
141
142 if (fAggParms.size() == 0)
143 output << "No arguments";
144 else
145 for (uint32_t i = 0; i < fAggParms.size(); ++i)
146 {
147 output << *(fAggParms[i]) << " ";
148 }
149
150 output << endl;
151
152 if (fConstCol)
153 output << *fConstCol;
154
155 return output.str();
156 }
157
operator <<(ostream & output,const AggregateColumn & rhs)158 ostream& operator<<(ostream& output, const AggregateColumn& rhs)
159 {
160 output << rhs.toString();
161 return output;
162 }
163
serialize(messageqcpp::ByteStream & b) const164 void AggregateColumn::serialize(messageqcpp::ByteStream& b) const
165 {
166 CalpontSelectExecutionPlan::ReturnedColumnList::const_iterator rcit;
167 b << (uint8_t) ObjectReader::AGGREGATECOLUMN;
168 ReturnedColumn::serialize(b);
169 b << fFunctionName;
170 b << static_cast<uint8_t>(fAggOp);
171
172 b << static_cast<uint32_t>(fAggParms.size());
173
174 for (uint32_t i = 0; i < fAggParms.size(); ++i)
175 {
176 fAggParms[i]->serialize(b);
177 }
178
179 b << static_cast<uint32_t>(fGroupByColList.size());
180
181 for (rcit = fGroupByColList.begin(); rcit != fGroupByColList.end(); ++rcit)
182 (*rcit)->serialize(b);
183
184 b << static_cast<uint32_t>(fProjectColList.size());
185
186 for (rcit = fProjectColList.begin(); rcit != fProjectColList.end(); ++rcit)
187 (*rcit)->serialize(b);
188
189 b << fData;
190 b << fTimeZone;
191 //b << fAlias;
192 b << fTableAlias;
193 b << static_cast<ByteStream::doublebyte>(fAsc);
194
195 if (fConstCol.get() == 0)
196 b << (uint8_t) ObjectReader::NULL_CLASS;
197 else
198 fConstCol->serialize(b);
199 }
200
unserialize(messageqcpp::ByteStream & b)201 void AggregateColumn::unserialize(messageqcpp::ByteStream& b)
202 {
203 messageqcpp::ByteStream::quadbyte size;
204 messageqcpp::ByteStream::quadbyte i;
205 ReturnedColumn* rc;
206
207 ObjectReader::checkType(b, ObjectReader::AGGREGATECOLUMN);
208 fGroupByColList.erase(fGroupByColList.begin(), fGroupByColList.end());
209 fProjectColList.erase(fProjectColList.begin(), fProjectColList.end());
210 fAggParms.erase(fAggParms.begin(), fAggParms.end());
211 ReturnedColumn::unserialize(b);
212 b >> fFunctionName;
213 b >> fAggOp;
214
215 b >> size;
216
217 for (i = 0; i < size; i++)
218 {
219 rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
220 SRCP srcp(rc);
221 fAggParms.push_back(srcp);
222 }
223
224 b >> size;
225
226 for (i = 0; i < size; i++)
227 {
228 rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
229 SRCP srcp(rc);
230 fGroupByColList.push_back(srcp);
231 }
232
233 b >> size;
234
235 for (i = 0; i < size; i++)
236 {
237 rc = dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b));
238 SRCP srcp(rc);
239 fProjectColList.push_back(srcp);
240 }
241
242 b >> fData;
243 b >> fTimeZone;
244 //b >> fAlias;
245 b >> fTableAlias;
246 b >> reinterpret_cast< ByteStream::doublebyte&>(fAsc);
247 fConstCol.reset(dynamic_cast<ReturnedColumn*>(ObjectReader::createTreeNode(b)));
248 }
249
operator ==(const AggregateColumn & t) const250 bool AggregateColumn::operator==(const AggregateColumn& t) const
251 {
252 const ReturnedColumn* rc1, *rc2;
253 AggParms::const_iterator it, it2;
254
255 rc1 = static_cast<const ReturnedColumn*>(this);
256 rc2 = static_cast<const ReturnedColumn*>(&t);
257
258 if (*rc1 != *rc2)
259 return false;
260
261 if (fFunctionName != t.fFunctionName)
262 return false;
263
264 if (fAggOp == COUNT_ASTERISK && t.fAggOp == COUNT_ASTERISK)
265 return true;
266
267 if (fAggOp != t.fAggOp)
268 return false;
269
270 if (aggParms().size() != t.aggParms().size())
271 {
272 return false;
273 }
274
275 for (it = fAggParms.begin(), it2 = t.fAggParms.begin();
276 it != fAggParms.end();
277 ++it, ++it2)
278 {
279 if (**it != **it2)
280 return false;
281 }
282
283 if (fTableAlias != t.fTableAlias)
284 return false;
285
286 if (fData != t.fData)
287 return false;
288
289 if (fAsc != t.fAsc)
290 return false;
291
292 if ((fConstCol.get() != NULL && t.fConstCol.get() == NULL) ||
293 (fConstCol.get() == NULL && t.fConstCol.get() != NULL) ||
294 (fConstCol.get() != NULL && t.fConstCol.get() != NULL &&
295 *(fConstCol.get()) != t.fConstCol.get()))
296 return false;
297
298 if (fTimeZone != t.fTimeZone)
299 return false;
300
301 return true;
302 }
303
operator ==(const TreeNode * t) const304 bool AggregateColumn::operator==(const TreeNode* t) const
305 {
306 const AggregateColumn* ac;
307
308 ac = dynamic_cast<const AggregateColumn*>(t);
309
310 if (ac == NULL)
311 return false;
312
313 return *this == *ac;
314 }
315
operator !=(const AggregateColumn & t) const316 bool AggregateColumn::operator!=(const AggregateColumn& t) const
317 {
318 return !(*this == t);
319 }
320
operator !=(const TreeNode * t) const321 bool AggregateColumn::operator!=(const TreeNode* t) const
322 {
323 return !(*this == t);
324 }
325
hasAggregate()326 bool AggregateColumn::hasAggregate()
327 {
328 fAggColumnList.push_back(this);
329 return true;
330 }
331
evaluate(Row & row,bool & isNull)332 void AggregateColumn::evaluate(Row& row, bool& isNull)
333 {
334 switch (fResultType.colDataType)
335 {
336 case CalpontSystemCatalog::DATE:
337 if (row.equals<4>(DATENULL, fInputIndex))
338 isNull = true;
339 else
340 fResult.intVal = row.getUintField<4>(fInputIndex);
341
342 break;
343
344 case CalpontSystemCatalog::DATETIME:
345 if (row.equals<8>(DATETIMENULL, fInputIndex))
346 isNull = true;
347 else
348 fResult.intVal = row.getUintField<8>(fInputIndex);
349
350 break;
351
352 case CalpontSystemCatalog::TIMESTAMP:
353 if (row.equals<8>(TIMESTAMPNULL, fInputIndex))
354 isNull = true;
355 else
356 fResult.intVal = row.getUintField<8>(fInputIndex);
357
358 break;
359
360 case CalpontSystemCatalog::TIME:
361 if (row.equals<8>(TIMENULL, fInputIndex))
362 isNull = true;
363 else
364 fResult.intVal = row.getIntField<8>(fInputIndex);
365
366 break;
367
368 case CalpontSystemCatalog::CHAR:
369 case CalpontSystemCatalog::VARCHAR:
370 case CalpontSystemCatalog::STRINT:
371 case CalpontSystemCatalog::TEXT:
372 switch (row.getColumnWidth(fInputIndex))
373 {
374 case 1:
375 if (row.equals<1>(CHAR1NULL, fInputIndex))
376 isNull = true;
377 else
378 fResult.origIntVal = row.getUintField<1>(fInputIndex);
379
380 break;
381
382 case 2:
383 if (row.equals<2>(CHAR2NULL, fInputIndex))
384 isNull = true;
385 else
386 fResult.origIntVal = row.getUintField<2>(fInputIndex);
387
388 break;
389
390 case 3:
391 case 4:
392 if (row.equals<4>(CHAR4NULL, fInputIndex))
393 isNull = true;
394 else
395 fResult.origIntVal = row.getUintField<4>(fInputIndex);
396
397 break;
398
399 case 5:
400 case 6:
401 case 7:
402 case 8:
403 if (row.equals<8>(CHAR8NULL, fInputIndex))
404 isNull = true;
405 else
406 fResult.origIntVal = row.getUintField<8>(fInputIndex);
407
408 break;
409
410 default:
411 if (row.equals(CPNULLSTRMARK, fInputIndex))
412 isNull = true;
413 else
414 fResult.strVal = row.getStringField(fInputIndex);
415
416 // stringColVal is padded with '\0' to colWidth so can't use str.length()
417 if (strlen(fResult.strVal.c_str()) == 0)
418 isNull = true;
419
420 break;
421 }
422
423 if (fResultType.colDataType == CalpontSystemCatalog::STRINT)
424 fResult.intVal = uint64ToStr(fResult.origIntVal);
425 else
426 fResult.intVal = atoll((char*)&fResult.origIntVal);
427
428 break;
429
430 case CalpontSystemCatalog::BIGINT:
431 if (row.equals<8>(BIGINTNULL, fInputIndex))
432 isNull = true;
433 else
434 fResult.intVal = row.getIntField<8>(fInputIndex);
435
436 break;
437
438 case CalpontSystemCatalog::UBIGINT:
439 if (row.equals<8>(UBIGINTNULL, fInputIndex))
440 isNull = true;
441 else
442 fResult.uintVal = row.getUintField<8>(fInputIndex);
443
444 break;
445
446 case CalpontSystemCatalog::INT:
447 case CalpontSystemCatalog::MEDINT:
448 if (row.equals<4>(INTNULL, fInputIndex))
449 isNull = true;
450 else
451 fResult.intVal = row.getIntField<4>(fInputIndex);
452
453 break;
454
455 case CalpontSystemCatalog::UINT:
456 case CalpontSystemCatalog::UMEDINT:
457 if (row.equals<4>(UINTNULL, fInputIndex))
458 isNull = true;
459 else
460 fResult.uintVal = row.getUintField<4>(fInputIndex);
461
462 break;
463
464 case CalpontSystemCatalog::SMALLINT:
465 if (row.equals<2>(SMALLINTNULL, fInputIndex))
466 isNull = true;
467 else
468 fResult.intVal = row.getIntField<2>(fInputIndex);
469
470 break;
471
472 case CalpontSystemCatalog::USMALLINT:
473 if (row.equals<2>(USMALLINTNULL, fInputIndex))
474 isNull = true;
475 else
476 fResult.uintVal = row.getUintField<2>(fInputIndex);
477
478 break;
479
480 case CalpontSystemCatalog::TINYINT:
481 if (row.equals<1>(TINYINTNULL, fInputIndex))
482 isNull = true;
483 else
484 fResult.intVal = row.getIntField<1>(fInputIndex);
485
486 break;
487
488 case CalpontSystemCatalog::UTINYINT:
489 if (row.equals<1>(UTINYINTNULL, fInputIndex))
490 isNull = true;
491 else
492 fResult.uintVal = row.getUintField<1>(fInputIndex);
493
494 break;
495
496 //In this case, we're trying to load a double output column with float data. This is the
497 // case when you do sum(floatcol), e.g.
498 case CalpontSystemCatalog::FLOAT:
499 case CalpontSystemCatalog::UFLOAT:
500 if (row.equals<4>(FLOATNULL, fInputIndex))
501 isNull = true;
502 else
503 fResult.floatVal = row.getFloatField(fInputIndex);
504
505 break;
506
507 case CalpontSystemCatalog::DOUBLE:
508 case CalpontSystemCatalog::UDOUBLE:
509 if (row.equals<8>(DOUBLENULL, fInputIndex))
510 isNull = true;
511 else
512 fResult.doubleVal = row.getDoubleField(fInputIndex);
513
514 break;
515
516 case CalpontSystemCatalog::LONGDOUBLE:
517 if (row.equals(LONGDOUBLENULL, fInputIndex))
518 isNull = true;
519 else
520 fResult.longDoubleVal = row.getLongDoubleField(fInputIndex);
521
522 break;
523
524 case CalpontSystemCatalog::DECIMAL:
525 case CalpontSystemCatalog::UDECIMAL:
526 switch (fResultType.colWidth)
527 {
528 case 1:
529 if (row.equals<1>(TINYINTNULL, fInputIndex))
530 isNull = true;
531 else
532 {
533 fResult.decimalVal.value = row.getIntField<1>(fInputIndex);
534 fResult.decimalVal.scale = (unsigned)fResultType.scale;
535 }
536
537 break;
538
539 case 2:
540 if (row.equals<2>(SMALLINTNULL, fInputIndex))
541 isNull = true;
542 else
543 {
544 fResult.decimalVal.value = row.getIntField<2>(fInputIndex);
545 fResult.decimalVal.scale = (unsigned)fResultType.scale;
546 }
547
548 break;
549
550 case 4:
551 if (row.equals<4>(INTNULL, fInputIndex))
552 isNull = true;
553 else
554 {
555 fResult.decimalVal.value = row.getIntField<4>(fInputIndex);
556 fResult.decimalVal.scale = (unsigned)fResultType.scale;
557 }
558
559 break;
560
561 default:
562 if (row.equals<8>(BIGINTNULL, fInputIndex))
563 isNull = true;
564 else
565 {
566 fResult.decimalVal.value = (int64_t)row.getUintField<8>(fInputIndex);
567 fResult.decimalVal.scale = (unsigned)fResultType.scale;
568 }
569
570 break;
571 }
572
573 break;
574
575 case CalpontSystemCatalog::VARBINARY:
576 case CalpontSystemCatalog::BLOB:
577 isNull = true;
578 break;
579
580 default: // treat as int64
581 if (row.equals<8>(BIGINTNULL, fInputIndex))
582 isNull = true;
583 else
584 fResult.intVal = row.getUintField<8>(fInputIndex);
585
586 break;
587 }
588 }
589
590 /*static*/
agname2num(const string & agname)591 AggregateColumn::AggOp AggregateColumn::agname2num(const string& agname)
592 {
593 /*
594 NOOP = 0,
595 COUNT_ASTERISK,
596 COUNT,
597 SUM,
598 AVG,
599 MIN,
600 MAX,
601 CONSTANT,
602 DISTINCT_COUNT,
603 DISTINCT_SUM,
604 DISTINCT_AVG,
605 STDDEV_POP,
606 STDDEV_SAMP,
607 VAR_POP,
608 VAR_SAMP,
609 BIT_AND,
610 BIT_OR,
611 BIT_XOR,
612 GROUP_CONCAT
613 */
614 string lfn(agname);
615 algorithm::to_lower(lfn);
616
617 if (lfn == "count(*)")
618 return COUNT_ASTERISK;
619
620 if (lfn == "count")
621 return COUNT;
622
623 if (lfn == "sum")
624 return SUM;
625
626 if (lfn == "avg")
627 return AVG;
628
629 if (lfn == "min")
630 return MIN;
631
632 if (lfn == "max")
633 return MAX;
634
635 if (lfn == "std")
636 return STDDEV_POP;
637
638 if (lfn == "stddev_pop")
639 return STDDEV_POP;
640
641 if (lfn == "stddev_samp")
642 return STDDEV_SAMP;
643
644 if (lfn == "stddev")
645 return STDDEV_POP;
646
647 if (lfn == "var_pop")
648 return VAR_POP;
649
650 if (lfn == "var_samp")
651 return VAR_SAMP;
652
653 if (lfn == "variance")
654 return VAR_POP;
655
656 return NOOP;
657 }
658
659 } // namespace execplan
660
661