1 /* Copyright (C) 2014 InfiniDB, Inc.
2 
3    This program is free software; you can redistribute it and/or
4    modify it under the terms of the GNU General Public License
5    as published by the Free Software Foundation; version 2 of
6    the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16    MA 02110-1301, USA. */
17 
18 /****************************************************************************
19 * $Id: dataconvert.h 3693 2013-04-05 16:11:30Z chao $
20 *
21 *
22 ****************************************************************************/
23 /** @file */
24 
25 #ifndef DATACONVERT_H
26 #define DATACONVERT_H
27 
28 #include <unistd.h>
29 #include <string>
30 #include <boost/any.hpp>
31 #include <vector>
32 #ifdef _MSC_VER
33 #include <winsock2.h>
34 #include <ws2tcpip.h>
35 #include <stdio.h>
36 #else
37 #include <netinet/in.h>
38 #endif
39 
40 #ifdef __linux__
41 #define POSIX_REGEX
42 #endif
43 
44 #ifdef POSIX_REGEX
45 #include <regex.h>
46 #else
47 #include <boost/regex.hpp>
48 #endif
49 
50 #include "calpontsystemcatalog.h"
51 #include "columnresult.h"
52 #include "exceptclasses.h"
53 
54 // remove this block if the htonll is defined in library
55 #ifdef __linux__
56 #include <endian.h>
57 #if __BYTE_ORDER == __BIG_ENDIAN       // 4312
htonll(uint64_t n)58 inline uint64_t htonll(uint64_t n)
59 {
60     return n;
61 }
62 #elif __BYTE_ORDER == __LITTLE_ENDIAN  // 1234
htonll(uint64_t n)63 inline uint64_t htonll(uint64_t n)
64 {
65     return ((((uint64_t) htonl(n & 0xFFFFFFFFLLU)) << 32) | (htonl((n & 0xFFFFFFFF00000000LLU) >> 32)));
66 }
67 #else  // __BYTE_ORDER == __PDP_ENDIAN    3412
68 inline uint64_t htonll(uint64_t n);
69 // don't know 34127856 or 78563412, hope never be required to support this byte order.
70 #endif
71 #else //!__linux__
72 #if _MSC_VER < 1600
73 //Assume we're on little-endian
htonll(uint64_t n)74 inline uint64_t htonll(uint64_t n)
75 {
76     return ((((uint64_t) htonl(n & 0xFFFFFFFFULL)) << 32) | (htonl((n & 0xFFFFFFFF00000000ULL) >> 32)));
77 }
78 #endif //_MSC_VER
79 #endif //__linux__
80 
81 // this method evalutes the uint64 that stores a char[] to expected value
uint64ToStr(uint64_t n)82 inline uint64_t uint64ToStr(uint64_t n)
83 {
84     return htonll(n);
85 }
86 
87 
88 #if defined(_MSC_VER) && defined(xxxDATACONVERT_DLLEXPORT)
89 #define EXPORT __declspec(dllexport)
90 #else
91 #define EXPORT
92 #endif
93 
94 const int64_t IDB_pow[19] =
95 {
96     1,
97     10,
98     100,
99     1000,
100     10000,
101     100000,
102     1000000,
103     10000000,
104     100000000,
105     1000000000,
106     10000000000LL,
107     100000000000LL,
108     1000000000000LL,
109     10000000000000LL,
110     100000000000000LL,
111     1000000000000000LL,
112     10000000000000000LL,
113     100000000000000000LL,
114     1000000000000000000LL
115 };
116 
117 
118 const int32_t SECS_PER_MIN = 60;
119 const int32_t MINS_PER_HOUR = 60;
120 const int32_t HOURS_PER_DAY = 24;
121 const int32_t DAYS_PER_WEEK = 7;
122 const int32_t DAYS_PER_NYEAR = 365;
123 const int32_t DAYS_PER_LYEAR = 366;
124 const int32_t SECS_PER_HOUR = SECS_PER_MIN * MINS_PER_HOUR;
125 const int32_t SECS_PER_DAY = SECS_PER_HOUR * HOURS_PER_DAY;
126 const int32_t EPOCH_YEAR = 1970;
127 const int32_t MONS_PER_YEAR = 12;
128 const int32_t MAX_TIMESTAMP_YEAR = 2038;
129 const int32_t MIN_TIMESTAMP_YEAR = 1969;
130 const int32_t MAX_TIMESTAMP_VALUE = (1ULL << 31) - 1;
131 const int32_t MIN_TIMESTAMP_VALUE = 0;
132 
133 
134 namespace dataconvert
135 {
136 
137 enum CalpontDateTimeFormat
138 {
139     CALPONTDATE_ENUM     = 1, // date format is: "YYYY-MM-DD"
140     CALPONTDATETIME_ENUM = 2, // date format is: "YYYY-MM-DD HH:MI:SS"
141     CALPONTTIME_ENUM     = 3
142 };
143 
144 /** @brief a structure that represents a timestamp in broken down
145  *  representation
146  */
147 struct MySQLTime
148 {
149     unsigned int year, month, day, hour, minute, second;
150     unsigned long second_part;
151     CalpontDateTimeFormat time_type;
resetMySQLTime152     void reset()
153     {
154         year = month = day = 0;
155         hour = minute = second = second_part = 0;
156         time_type = CALPONTDATETIME_ENUM;
157     }
158 };
159 
160 /**
161  * This function converts the timezone represented as a string
162  * in the format "+HH:MM" or "-HH:MM" to a signed offset in seconds
163  * Most of this code is taken from tztime.cc:str_to_offset
164  */
165 inline
timeZoneToOffset(const char * str,std::string::size_type length,long * offset)166 bool timeZoneToOffset(const char *str, std::string::size_type length, long *offset)
167 {
168     const char *end = str + length;
169     bool negative;
170     unsigned long number_tmp;
171     long offset_tmp;
172 
173     if (length < 4)
174         return 1;
175 
176     if (*str == '+')
177         negative = 0;
178     else if (*str == '-')
179         negative = 1;
180     else
181         return 1;
182     str++;
183 
184     number_tmp = 0;
185 
186     while (str < end && isdigit(*str))
187     {
188         number_tmp = number_tmp * 10 + *str - '0';
189         str++;
190     }
191 
192     if (str + 1 >= end || *str != ':')
193         return 1;
194     str++;
195 
196     offset_tmp = number_tmp * 60L;
197     number_tmp = 0;
198 
199     while (str < end && isdigit(*str))
200     {
201         number_tmp = number_tmp * 10 + *str - '0';
202         str++;
203     }
204 
205     if (str != end)
206         return 1;
207 
208     offset_tmp = (offset_tmp + number_tmp) * 60L;
209 
210     if (negative)
211         offset_tmp = -offset_tmp;
212 
213     /*
214       Check if offset is in range prescribed by standard
215       (from -12:59 to 13:00).
216     */
217 
218     if (number_tmp > 59 || offset_tmp < -13 * 3600L + 1 ||
219         offset_tmp > 13 * 3600L)
220         return 1;
221 
222     *offset = offset_tmp;
223 
224     return 0;
225 }
226 
227 const int32_t year_lengths[2] =
228 {
229   DAYS_PER_NYEAR, DAYS_PER_LYEAR
230 };
231 
232 const unsigned int mon_lengths[2][MONS_PER_YEAR]=
233 {
234   { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
235   { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
236 };
237 
238 const unsigned int mon_starts[2][MONS_PER_YEAR]=
239 {
240   { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 },
241   { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 }
242 };
243 
leapsThruEndOf(int32_t year)244 inline int32_t leapsThruEndOf(int32_t year)
245 {
246     return (year / 4 - year / 100 + year / 400);
247 }
248 
isLeapYear(int year)249 inline bool isLeapYear ( int year)
250 {
251     if ( year % 400 == 0 )
252         return true;
253 
254     if ( ( year % 4 == 0 ) && ( year % 100 != 0 ) )
255         return true;
256 
257     return false;
258 }
259 
260 static uint32_t daysInMonth[13] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0};
261 
getDaysInMonth(uint32_t month,int year)262 inline uint32_t getDaysInMonth(uint32_t month, int year)
263 {
264     if (month < 1 || month > 12)
265         return 0;
266 
267     uint32_t days = daysInMonth[month - 1];
268 
269     if ((month == 2) && isLeapYear(year))
270         days++;
271 
272     return days;
273 }
274 
275 inline
isDateValid(int day,int month,int year)276 bool isDateValid ( int day, int month, int year)
277 {
278     bool valid = true;
279 
280     if ( day == 0 && month == 0 && year == 0 )
281     {
282         return true;
283     }
284 
285     int daycheck = getDaysInMonth( month, year );
286 
287     if ( ( year < 1000 ) || ( year > 9999 ) )
288         valid = false;
289     else if ( month < 1 || month > 12 )
290         valid = false;
291     else if ( day < 1 || day > daycheck )
292         valid = false;
293 
294     return ( valid );
295 }
296 
297 inline
isDateTimeValid(int hour,int minute,int second,int microSecond)298 bool isDateTimeValid ( int hour, int minute, int second, int microSecond)
299 {
300     bool valid = false;
301 
302     if ( hour >= 0 && hour <= 24 )
303     {
304         if ( minute >= 0 && minute < 60 )
305         {
306             if ( second >= 0 && second < 60 )
307             {
308                 if ( microSecond >= 0 && microSecond <= 999999 )
309                 {
310                     valid = true;
311                 }
312             }
313         }
314     }
315 
316     return valid;
317 }
318 
319 inline
isTimeValid(int hour,int minute,int second,int microSecond)320 bool isTimeValid ( int hour, int minute, int second, int microSecond)
321 {
322     bool valid = false;
323 
324     if ( hour >= -838 && hour <= 838 )
325     {
326         if ( minute >= 0 && minute < 60 )
327         {
328             if ( second >= 0 && second < 60 )
329             {
330                 if ( microSecond >= 0 && microSecond <= 999999 )
331                 {
332                     valid = true;
333                 }
334             }
335         }
336     }
337 
338     return valid;
339 }
340 
341 inline
isTimestampValid(uint64_t second,uint64_t microsecond)342 bool isTimestampValid ( uint64_t second, uint64_t microsecond )
343 {
344     bool valid = false;
345 
346     // MariaDB server currently sets the upper limit on timestamp to
347     // 0x7FFFFFFF. So enforce the same restriction here.
348     // TODO: We however store the seconds portion of the timestamp in
349     // 44 bits, so change this limit when the server supports higher values.
350     if ( second <= MAX_TIMESTAMP_VALUE )
351     {
352         if ( microsecond <= 999999 )
353         {
354             valid = true;
355         }
356     }
357 
358     return valid;
359 }
360 
361 /**
362  * @brief converts a timestamp (seconds in UTC since Epoch)
363  * to broken-down representation. Most of this code is taken
364  * from sec_to_TIME and Time_zone_system::gmt_sec_to_TIME
365  * functions in tztime.cc in the server
366  *
367  * @param seconds the value to be converted
368  * @param time the broken-down representation of the timestamp
369  * @param timeZone a string with the server timezone of the machine
370  * which initiated the query
371  */
gmtSecToMySQLTime(int64_t seconds,MySQLTime & time,const std::string & timeZone)372 inline void gmtSecToMySQLTime(int64_t seconds, MySQLTime& time,
373                               const std::string& timeZone)
374 {
375     if (seconds == 0)
376     {
377         time.reset();
378         return;
379     }
380 
381     if (timeZone == "SYSTEM")
382     {
383         struct tm tmp_tm;
384         time_t tmp_t = (time_t)seconds;
385         localtime_r(&tmp_t, &tmp_tm);
386         time.second_part = 0;
387         time.year = (int) ((tmp_tm.tm_year + 1900) % 10000);
388         time.month = (int) tmp_tm.tm_mon + 1;
389         time.day = (int) tmp_tm.tm_mday;
390         time.hour = (int) tmp_tm.tm_hour;
391         time.minute = (int) tmp_tm.tm_min;
392         time.second = (int) tmp_tm.tm_sec;
393         time.time_type = CALPONTDATETIME_ENUM;
394         if (time.second == 60 || time.second == 61)
395             time.second = 59;
396     }
397     else
398     {
399         long offset;
400         if (timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
401         {
402             time.reset();
403             return;
404         }
405 
406         int64_t days;
407         int32_t rem;
408         int32_t y;
409         int32_t yleap;
410         const unsigned int *ip;
411 
412         days = (int64_t) (seconds / SECS_PER_DAY);
413         rem = (int32_t) (seconds % SECS_PER_DAY);
414 
415         rem += offset;
416         while (rem < 0)
417         {
418             rem += SECS_PER_DAY;
419             days--;
420         }
421         while (rem >= SECS_PER_DAY)
422         {
423             rem -= SECS_PER_DAY;
424             days++;
425         }
426         time.hour = (unsigned int) (rem / SECS_PER_HOUR);
427         rem = rem % SECS_PER_HOUR;
428         time.minute = (unsigned int) (rem / SECS_PER_MIN);
429         time.second = (unsigned int) (rem % SECS_PER_MIN);
430 
431         y = EPOCH_YEAR;
432         while (days < 0 || days >= (int64_t) (year_lengths[yleap = isLeapYear(y)]))
433         {
434             int32_t newy;
435 
436             newy = y + days / DAYS_PER_NYEAR;
437             if (days < 0)
438                 newy--;
439             days -= (newy - y) * DAYS_PER_NYEAR +
440                     leapsThruEndOf(newy - 1) -
441                     leapsThruEndOf(y - 1);
442             y = newy;
443         }
444         time.year = y;
445 
446         ip = mon_lengths[yleap];
447         for (time.month = 0; days >= (int64_t) ip[time.month]; time.month++)
448             days -= (int64_t) ip[time.month];
449         time.month++;
450         time.day = (unsigned int) (days + 1);
451 
452         time.second_part = 0;
453         time.time_type = CALPONTDATETIME_ENUM;
454     }
455 }
456 
457 /**
458  * @brief function that provides a rough estimate if a broken-down
459  * representation of timestamp is in range
460  *
461  * @param t the broken-down representation of timestamp
462  */
validateTimestampRange(const MySQLTime & t)463 inline bool validateTimestampRange(const MySQLTime& t)
464 {
465   if ((t.year > MAX_TIMESTAMP_YEAR || t.year < MIN_TIMESTAMP_YEAR) ||
466       (t.year == MAX_TIMESTAMP_YEAR && (t.month > 1 || t.day > 19)))
467     return false;
468 
469   return true;
470 }
471 
472 inline
secSinceEpoch(int year,int month,int day,int hour,int min,int sec)473 int64_t secSinceEpoch(int year, int month, int day, int hour, int min, int sec)
474 {
475     int64_t days = (year - EPOCH_YEAR) * DAYS_PER_NYEAR +
476                    leapsThruEndOf(year - 1) -
477                    leapsThruEndOf(EPOCH_YEAR - 1);
478     days += mon_starts[isLeapYear(year)][month - 1];
479     days += day - 1;
480 
481     return ((days * HOURS_PER_DAY + hour) * MINS_PER_HOUR + min) *
482            SECS_PER_MIN + sec;
483 }
484 
485 // This is duplicate of funchelpers.h:calc_mysql_daynr,
486 // with one additional function parameter
calc_mysql_daynr(uint32_t year,uint32_t month,uint32_t day,bool & isValid)487 inline uint32_t calc_mysql_daynr( uint32_t year, uint32_t month, uint32_t day, bool& isValid )
488 {
489     int temp;
490     int y = year;
491     long delsum;
492 
493     if ( !isDateValid( day, month, year ) )
494     {
495         isValid = false;
496         return 0;
497     }
498 
499     delsum = (long) (365 * y + 31 * ((int) month - 1) + (int) day);
500 
501     if (month <= 2)
502         y--;
503     else
504         delsum -= (long) ((int) month * 4 + 23) / 10;
505 
506     temp = (int) ((y / 100 + 1) * 3) / 4;
507 
508     return delsum + (int) y / 4 - temp;
509 }
510 
511 /**
512  * @brief converts a timestamp from broken-down representation
513  * to seconds since UTC epoch
514  *
515  * @param time the broken-down representation of the timestamp
516    @param timeZone a string with the server timezone of the machine
517    which initiated the query
518  */
mySQLTimeToGmtSec(const MySQLTime & time,const std::string & timeZone,bool & isValid)519 inline int64_t mySQLTimeToGmtSec(const MySQLTime& time,
520                                  const std::string& timeZone, bool& isValid)
521 {
522     int64_t seconds;
523 
524     if (!validateTimestampRange(time))
525     {
526         isValid = false;
527         return 0;
528     }
529 
530     if (timeZone == "SYSTEM")
531     {
532         // This is mirror of code in func_unix_timestamp.cpp
533         uint32_t loop;
534         time_t tmp_t = 0;
535         int shift = 0;
536         struct tm* l_time, tm_tmp;
537         int64_t diff;
538         localtime_r(&tmp_t, &tm_tmp);
539         // Get the system timezone offset at 0 seconds since epoch
540         int64_t my_time_zone = tm_tmp.tm_gmtoff;
541         int day = time.day;
542 
543         if ((time.year == MAX_TIMESTAMP_YEAR) && (time.month == 1) && (day > 4))
544         {
545             day -= 2;
546             shift = 2;
547         }
548 
549         tmp_t = (time_t)(((calc_mysql_daynr(time.year, time.month, day, isValid) -
550                            719528) * 86400L + (int64_t)time.hour * 3600L +
551                           (int64_t)(time.minute * 60 + time.second)) - (time_t)my_time_zone);
552         if (!isValid)
553             return 0;
554 
555         localtime_r(&tmp_t, &tm_tmp);
556         l_time = &tm_tmp;
557 
558         for (loop = 0; loop < 2 && (time.hour != (uint32_t) l_time->tm_hour ||
559                                     time.minute != (uint32_t) l_time->tm_min ||
560                                     time.second != (uint32_t)l_time->tm_sec); loop++)
561         {
562             int days = day - l_time->tm_mday;
563 
564             if (days < -1)
565                 days = 1; /* Month has wrapped */
566             else if (days > 1)
567                 days = -1;
568 
569             diff = (3600L * (int64_t) (days * 24 + ((int) time.hour - (int) l_time->tm_hour)) +
570                     (int64_t) (60 * ((int) time.minute - (int) l_time->tm_min)) +
571                     (int64_t) ((int) time.second - (int) l_time->tm_sec));
572             tmp_t += (time_t) diff;
573             localtime_r(&tmp_t, &tm_tmp);
574             l_time = &tm_tmp;
575         }
576 
577         if (loop == 2 && time.hour != (uint32_t)l_time->tm_hour)
578         {
579             int days = day - l_time->tm_mday;
580 
581             if (days < -1)
582                 days = 1; /* Month has wrapped */
583             else if (days > 1)
584                 days = -1;
585 
586             diff = (3600L * (int64_t) (days * 24 + ((int) time.hour - (int) l_time->tm_hour)) +
587                     (int64_t) (60 * ((int) time.minute - (int) l_time->tm_min)) +
588                     (int64_t) ((int) time.second - (int) l_time->tm_sec));
589 
590             if (diff == 3600)
591                 tmp_t += 3600 - time.minute * 60 - time.second;	/* Move to next hour */
592             else if (diff == -3600)
593                 tmp_t -= time.minute * 60 + time.second;	/* Move to previous hour */
594         }
595 
596 
597         /* shift back, if we were dealing with boundary dates */
598         tmp_t += shift * 86400L;
599 
600         seconds = (int64_t)tmp_t;
601     }
602     else
603     {
604         long offset;
605         if (timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
606         {
607             isValid = false;
608             return 0;
609         }
610         seconds = secSinceEpoch(time.year, time.month, time.day,
611                                 time.hour, time.minute, time.second) - offset;
612     }
613 
614     /* make sure we have legit timestamps (i.e. we didn't over/underflow anywhere above) */
615     if (seconds >= MIN_TIMESTAMP_VALUE && seconds <= MAX_TIMESTAMP_VALUE)
616         return seconds;
617 
618     isValid = false;
619     return 0;
620 
621 }
622 
623 
624 /** @brief a structure to hold a date
625  */
626 struct Date
627 {
628     unsigned spare  : 6;
629     unsigned day    : 6;
630     unsigned month  : 4;
631     unsigned year   : 16;
632     // NULL column value = 0xFFFFFFFE
DateDate633     Date( ) :
634         spare(0x3E), day(0x3F), month(0xF), year(0xFFFF) {}
635     // Construct a Date from a 64 bit integer Calpont date.
DateDate636     Date(uint64_t val) :
637         spare(0x3E), day((val >> 6) & 077), month((val >> 12) & 0xF), year((val >> 16)) {}
638     // Construct using passed in parameters, no value checking
DateDate639     Date(unsigned y, unsigned m, unsigned d) : spare(0x3E), day(d), month(m), year(y) {}
640 
641     int32_t convertToMySQLint() const;
642 };
643 
644 inline
convertToMySQLint()645 int32_t Date::convertToMySQLint() const
646 {
647     return (int32_t) (year * 10000) + (month * 100) + day;
648 }
649 
650 /** @brief a structure to hold a datetime
651  */
652 struct DateTime
653 {
654     unsigned msecond : 20;
655     unsigned second  : 6;
656     unsigned minute  : 6;
657     unsigned hour    : 6;
658     unsigned day     : 6;
659     unsigned month   : 4;
660     unsigned year    : 16;
661     // NULL column value = 0xFFFFFFFFFFFFFFFE
DateTimeDateTime662     DateTime( ) :
663         msecond(0xFFFFE), second(0x3F), minute(0x3F), hour(0x3F), day(0x3F), month(0xF), year(0xFFFF) {}
664     // Construct a DateTime from a 64 bit integer Calpont datetime.
DateTimeDateTime665     DateTime(uint64_t val) :
666         msecond(val & 0xFFFFF), second((val >> 20) & 077), minute((val >> 26) & 077),
667         hour((val >> 32) & 077), day((val >> 38) & 077), month((val >> 44) & 0xF),
668         year(val >> 48) {}
669     // Construct using passed in parameters, no value checking
DateTimeDateTime670     DateTime(unsigned y, unsigned m, unsigned d, unsigned h, unsigned min, unsigned sec, unsigned msec) :
671         msecond(msec), second(sec), minute(min), hour(h), day(d), month(m), year(y) {}
672 
673     int64_t convertToMySQLint() const;
674     void    reset();
675 };
676 
677 inline
convertToMySQLint()678 int64_t DateTime::convertToMySQLint() const
679 {
680     return (int64_t) (year * 10000000000LL) + (month * 100000000) + (day * 1000000) + (hour * 10000) + (minute * 100) + second;
681 }
682 
683 inline
reset()684 void    DateTime::reset()
685 {
686     msecond = 0xFFFFE;
687     second  = 0x3F;
688     minute  = 0x3F;
689     hour    = 0x3F;
690     day     = 0x3F;
691     month   = 0xF;
692     year    = 0xFFFF;
693 }
694 
695 /** @brief a structure to hold a time
696  *  range: -838:59:59 ~ 838:59:59
697  */
698 struct Time
699 {
700     signed msecond : 24;
701     signed second  : 8;
702     signed minute  : 8;
703     signed hour    : 12;
704     signed day     : 11;
705     signed is_neg  : 1;
706 
707     // NULL column value = 0xFFFFFFFFFFFFFFFE
TimeTime708     Time() : msecond (0xFFFFFE),
709         second (0xFF),
710         minute (0xFF),
711         hour (0xFFF),
712         day (0x7FF),
713         is_neg (0b1)
714     {}
715 
716     // Construct a Time from a 64 bit integer InfiniDB time.
TimeTime717     Time(int64_t val) :
718         msecond(val & 0xffffff),
719         second((val >> 24) & 0xff),
720         minute((val >> 32) & 0xff),
721         hour((val >> 40) & 0xfff),
722         day((val >> 52) & 0x7ff),
723         is_neg(val >> 63)
724     {}
725 
TimeTime726     Time(signed d, signed h, signed min, signed sec, signed msec, bool neg) :
727         msecond(msec), second(sec), minute(min), hour(h), day(d), is_neg(neg)
728     {
729         if (h < 0)
730             is_neg = 0b1;
731     }
732 
733     int64_t convertToMySQLint() const;
734     void reset();
735 };
736 
737 inline
reset()738 void    Time::reset()
739 {
740     msecond = 0xFFFFFE;
741     second  = 0xFF;
742     minute  = 0xFF;
743     hour    = 0xFFF;
744     is_neg  = 0b1;
745     day     = 0x7FF;
746 }
747 
748 inline
convertToMySQLint()749 int64_t Time::convertToMySQLint() const
750 {
751     if ((hour >= 0) && is_neg)
752     {
753         return (int64_t) ((hour * 10000) + (minute * 100) + second) * -1;
754     }
755     else if (hour >= 0)
756     {
757         return (int64_t) (hour * 10000) + (minute * 100) + second;
758     }
759     else
760     {
761         return (int64_t) (hour * 10000) - (minute * 100) - second;
762     }
763 }
764 
765 /** @brief a structure to hold a timestamp
766  */
767 struct TimeStamp
768 {
769     unsigned msecond          : 20;
770     unsigned long long second : 44;
771     // NULL column value = 0xFFFFFFFFFFFFFFFE
TimeStampTimeStamp772     TimeStamp( ) :
773         msecond(0xFFFFE), second(0xFFFFFFFFFFF) {}
774     // Construct a TimeStamp from a 64 bit integer Calpont timestamp.
TimeStampTimeStamp775     TimeStamp(uint64_t val) :
776         msecond(val & 0xFFFFF), second(val >> 20) {}
TimeStampTimeStamp777     TimeStamp(unsigned msec, unsigned long long sec) :
778         msecond(msec), second(sec) {}
779 
780     int64_t convertToMySQLint(const std::string& timeZone) const;
781     void reset();
782 };
783 
784 inline
convertToMySQLint(const std::string & timeZone)785 int64_t TimeStamp::convertToMySQLint(const std::string& timeZone) const
786 {
787     const int TIMESTAMPTOSTRING1_LEN = 22; // YYYYMMDDHHMMSSmmmmmm\0
788     char buf[TIMESTAMPTOSTRING1_LEN];
789 
790     MySQLTime time;
791     gmtSecToMySQLTime(second, time, timeZone);
792 
793     sprintf(buf, "%04d%02d%02d%02d%02d%02d", time.year, time.month, time.day, time.hour, time.minute, time.second);
794 
795     return (int64_t) atoll(buf);
796 }
797 
798 inline
reset()799 void TimeStamp::reset()
800 {
801     msecond = 0xFFFFE;
802     second = 0xFFFFFFFFFFF;
803 }
804 
805 inline
string_to_ll(const std::string & data,bool & bSaturate)806 int64_t string_to_ll( const std::string& data, bool& bSaturate )
807 {
808     // This function doesn't take into consideration our special values
809     // for NULL and EMPTY when setting the saturation point. Should it?
810     char* ep = NULL;
811     const char* str = data.c_str();
812     errno = 0;
813     int64_t value = strtoll(str, &ep, 10);
814 
815     //  (no digits) || (more chars)  || (other errors & value = 0)
816     if ((ep == str) || (*ep != '\0') || (errno != 0 && value == 0))
817         throw logging::QueryDataExcept("value is not numerical.", logging::formatErr);
818 
819     if (errno == ERANGE && (value == std::numeric_limits<int64_t>::max() || value == std::numeric_limits<int64_t>::min()))
820         bSaturate = true;
821 
822     return value;
823 }
824 
825 inline
string_to_ull(const std::string & data,bool & bSaturate)826 uint64_t string_to_ull( const std::string& data, bool& bSaturate )
827 {
828     // This function doesn't take into consideration our special values
829     // for NULL and EMPTY when setting the saturation point. Should it?
830     char* ep = NULL;
831     const char* str = data.c_str();
832     errno = 0;
833 
834     // check for negative number. saturate to 0;
835     if (data.find('-') != data.npos)
836     {
837         bSaturate = true;
838         return 0;
839     }
840 
841     uint64_t value = strtoull(str, &ep, 10);
842 
843     //  (no digits) || (more chars)  || (other errors & value = 0)
844     if ((ep == str) || (*ep != '\0') || (errno != 0 && value == 0))
845         throw logging::QueryDataExcept("value is not numerical.", logging::formatErr);
846 
847     if (errno == ERANGE && (value == std::numeric_limits<uint64_t>::max()))
848         bSaturate = true;
849 
850     return value;
851 }
852 
853 /** @brief DataConvert is a component for converting string data to Calpont format
854   */
855 class DataConvert
856 {
857 public:
858 
859     /**
860      * @brief convert a columns data, represnted as a string, to it's native
861      * format
862      *
863      * @param type the columns data type
864      * @param data the columns string representation of it's data
865      */
866     EXPORT static boost::any convertColumnData( const execplan::CalpontSystemCatalog::ColType& colType,
867             const std::string& dataOrig, bool& bSaturate, const std::string& timeZone,
868             bool nulFlag = false, bool noRoundup = false, bool isUpdate = false);
869 
870     /**
871       * @brief convert a columns data from native format to a string
872       *
873       * @param type the columns database type
874       * @param data the columns string representation of it's data
875       */
876     EXPORT static std::string dateToString( int  datevalue );
877     static inline void dateToString( int datevalue, char* buf, unsigned int buflen );
878 
879     /**
880       * @brief convert a columns data from native format to a string
881       *
882       * @param type the columns database type
883       * @param data the columns string representation of it's data
884       */
885     EXPORT static std::string datetimeToString( long long  datetimevalue, long decimals = 0 );
886     static inline void datetimeToString( long long datetimevalue, char* buf, unsigned int buflen, long decimals = 0 );
887 
888     /**
889       * @brief convert a columns data from native format to a string
890       *
891       * @param type the columns database type
892       * @param data the columns string representation of it's data
893       */
894     EXPORT static std::string timestampToString( long long  timestampvalue, const std::string& timezone, long decimals = 0 );
895     static inline void timestampToString( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone, long decimals = 0 );
896 
897     /**
898       * @brief convert a columns data from native format to a string
899       *
900       * @param type the columns database type
901       * @param data the columns string representation of it's data
902       */
903     EXPORT static std::string timeToString( long long  timevalue, long decimals = 0 );
904     static inline void timeToString( long long timevalue, char* buf, unsigned int buflen, long decimals = 0);
905 
906     /**
907       * @brief convert a columns data from native format to a string
908       *
909       * @param type the columns database type
910       * @param data the columns string representation of it's data
911       */
912     EXPORT static std::string dateToString1( int  datevalue );
913     static inline void dateToString1( int datevalue, char* buf, unsigned int buflen );
914 
915     /**
916       * @brief convert a columns data from native format to a string
917       *
918       * @param type the columns database type
919       * @param data the columns string representation of it's data
920       */
921     EXPORT static std::string datetimeToString1( long long  datetimevalue );
922     static inline void datetimeToString1( long long datetimevalue, char* buf, unsigned int buflen );
923 
924     /**
925       * @brief convert a columns data from native format to a string
926       *
927       * @param type the columns database type
928       * @param data the columns string representation of it's data
929       */
930     EXPORT static std::string timestampToString1( long long  timestampvalue, const std::string& timezone );
931     static inline void timestampToString1( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone );
932 
933     /**
934       * @brief convert a columns data from native format to a string
935       *
936       * @param type the columns database type
937       * @param data the columns string representation of it's data
938       */
939     EXPORT static std::string timeToString1( long long  timevalue );
940     static inline void timeToString1( long long timevalue, char* buf, unsigned int buflen );
941 
942     /**
943      * @brief convert a date column data, represnted as a string, to it's native
944      * format. This function is for bulkload to use.
945      *
946      * @param type the columns data type
947      * @param dataOrig the columns string representation of it's data
948      * @param dateFormat the format the date value in
949      * @param status 0 - success, -1 - fail
950      * @param dataOrgLen length specification of dataOrg
951      */
952     EXPORT static int32_t convertColumnDate( const char* dataOrg,
953             CalpontDateTimeFormat dateFormat,
954             int& status, unsigned int dataOrgLen );
955 
956     /**
957      * @brief Is specified date valid; used by binary bulk load
958      */
959     EXPORT static bool      isColumnDateValid( int32_t date );
960 
961     /**
962      * @brief convert a datetime column data, represented as a string,
963      * to it's native format. This function is for bulkload to use.
964      *
965      * @param type the columns data type
966      * @param dataOrig the columns string representation of it's data
967      * @param datetimeFormat the format the date value in
968      * @param status 0 - success, -1 - fail
969      * @param dataOrgLen length specification of dataOrg
970      */
971     EXPORT static int64_t convertColumnDatetime( const char* dataOrg,
972             CalpontDateTimeFormat datetimeFormat,
973             int& status, unsigned int dataOrgLen );
974 
975     /**
976      * @brief convert a timestamp column data, represented as a string,
977      * to it's native format. This function is for bulkload to use.
978      *
979      * @param dataOrg the columns string representation of it's data
980      * @param datetimeFormat the format the date value in
981      * @param status 0 - success, -1 - fail
982      * @param dataOrgLen length specification of dataOrg
983      * @param timeZone the timezone used for conversion to native format
984      */
985     EXPORT static int64_t convertColumnTimestamp( const char* dataOrg,
986             CalpontDateTimeFormat datetimeFormat,
987             int& status, unsigned int dataOrgLen,
988             const std::string& timeZone );
989 
990     /**
991      * @brief convert a time column data, represented as a string,
992      * to it's native format. This function is for bulkload to use.
993      *
994      * @param type the columns data type
995      * @param dataOrig the columns string representation of it's data
996      * @param timeFormat the format the time value in
997      * @param status 0 - success, -1 - fail
998      * @param dataOrgLen length specification of dataOrg
999      */
1000     EXPORT static int64_t convertColumnTime( const char* dataOrg,
1001             CalpontDateTimeFormat datetimeFormat,
1002             int& status, unsigned int dataOrgLen );
1003 
1004     /**
1005      * @brief Is specified datetime valid; used by binary bulk load
1006      */
1007     EXPORT static bool      isColumnDateTimeValid( int64_t dateTime );
1008     EXPORT static bool      isColumnTimeValid( int64_t time );
1009     EXPORT static bool      isColumnTimeStampValid( int64_t timeStamp );
1010 
1011     EXPORT static bool isNullData(execplan::ColumnResult* cr, int rownum, execplan::CalpontSystemCatalog::ColType colType);
1012     static inline std::string decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType);
1013     static inline void decimalToString(int64_t value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
1014     static inline void trimWhitespace(int64_t& charData);
1015 
1016     // convert string to date
1017     EXPORT static int64_t stringToDate(const std::string& data);
1018     // convert string to datetime
1019     EXPORT static int64_t stringToDatetime(const std::string& data, bool* isDate = NULL);
1020     // convert string to timestamp
1021     EXPORT static int64_t stringToTimestamp(const std::string& data, const std::string& timeZone);
1022     // convert integer to date
1023     EXPORT static int64_t intToDate(int64_t data);
1024     // convert integer to datetime
1025     EXPORT static int64_t intToDatetime(int64_t data, bool* isDate = NULL);
1026     // convert integer to date
1027     EXPORT static int64_t intToTime(int64_t data, bool fromString = false);
1028     // convert string to date. alias to stringToDate
1029     EXPORT static int64_t dateToInt(const std::string& date);
1030     // convert string to datetime. alias to datetimeToInt
1031     EXPORT static int64_t datetimeToInt(const std::string& datetime);
1032     EXPORT static int64_t timestampToInt(const std::string& timestamp, const std::string& timeZone);
1033     EXPORT static int64_t timeToInt(const std::string& time);
1034     EXPORT static int64_t stringToTime (const std::string& data);
1035     // bug4388, union type conversion
1036     EXPORT static execplan::CalpontSystemCatalog::ColType convertUnionColType(std::vector<execplan::CalpontSystemCatalog::ColType>&);
1037 };
1038 
dateToString(int datevalue,char * buf,unsigned int buflen)1039 inline void DataConvert::dateToString( int datevalue, char* buf, unsigned int buflen)
1040 {
1041     snprintf( buf, buflen, "%04d-%02d-%02d",
1042               (unsigned)((datevalue >> 16) & 0xffff),
1043               (unsigned)((datevalue >> 12) & 0xf),
1044               (unsigned)((datevalue >> 6) & 0x3f)
1045             );
1046 }
1047 
datetimeToString(long long datetimevalue,char * buf,unsigned int buflen,long decimals)1048 inline void DataConvert::datetimeToString( long long datetimevalue, char* buf, unsigned int buflen, long decimals )
1049 {
1050     // 10 is default which means we don't need microseconds
1051     if (decimals > 6 || decimals < 0)
1052     {
1053         decimals = 0;
1054     }
1055 
1056     int msec = 0;
1057 
1058     if ((datetimevalue & 0xfffff) > 0)
1059     {
1060         msec = (unsigned)((datetimevalue) & 0xfffff);
1061     }
1062 
1063     snprintf( buf, buflen, "%04d-%02d-%02d %02d:%02d:%02d",
1064               (unsigned)((datetimevalue >> 48) & 0xffff),
1065               (unsigned)((datetimevalue >> 44) & 0xf),
1066               (unsigned)((datetimevalue >> 38) & 0x3f),
1067               (unsigned)((datetimevalue >> 32) & 0x3f),
1068               (unsigned)((datetimevalue >> 26) & 0x3f),
1069               (unsigned)((datetimevalue >> 20) & 0x3f)
1070             );
1071 
1072     if (msec || decimals)
1073     {
1074         snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec);
1075     }
1076 }
1077 
timestampToString(long long timestampvalue,char * buf,unsigned int buflen,const std::string & timezone,long decimals)1078 inline void DataConvert::timestampToString( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone, long decimals )
1079 {
1080     // 10 is default which means we don't need microseconds
1081     if (decimals > 6 || decimals < 0)
1082     {
1083         decimals = 0;
1084     }
1085 
1086     TimeStamp timestamp(timestampvalue);
1087     int64_t seconds = timestamp.second;
1088 
1089     MySQLTime time;
1090     gmtSecToMySQLTime(seconds, time, timezone);
1091 
1092     snprintf( buf, buflen, "%04d-%02d-%02d %02d:%02d:%02d",
1093               time.year, time.month, time.day,
1094               time.hour, time.minute, time.second
1095             );
1096 
1097     if (timestamp.msecond || decimals)
1098     {
1099         snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, timestamp.msecond);
1100     }
1101 }
1102 
timeToString(long long timevalue,char * buf,unsigned int buflen,long decimals)1103 inline void DataConvert::timeToString( long long timevalue, char* buf, unsigned int buflen, long decimals )
1104 {
1105     // 10 is default which means we don't need microseconds
1106     if (decimals > 6 || decimals < 0)
1107     {
1108         decimals = 0;
1109     }
1110 
1111     // Handle negative correctly
1112     int hour = 0, msec = 0;
1113 
1114     if ((timevalue >> 40) & 0x800)
1115     {
1116         hour = 0xfffff000;
1117     }
1118 
1119     hour |= ((timevalue >> 40) & 0xfff);
1120 
1121     if ((timevalue & 0xffffff) > 0)
1122     {
1123         msec = (unsigned)((timevalue) & 0xffffff);
1124     }
1125 
1126     if ((hour >= 0) && (timevalue >> 63))
1127     {
1128         buf[0] = '-';
1129         buf++;
1130         buflen--;
1131     }
1132 
1133     snprintf( buf, buflen, "%02d:%02d:%02d",
1134               hour,
1135               (unsigned)((timevalue >> 32) & 0xff),
1136               (unsigned)((timevalue >> 24) & 0xff)
1137             );
1138 
1139     if (msec || decimals)
1140     {
1141         // Pad start with zeros
1142         snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec);
1143     }
1144 }
1145 
dateToString1(int datevalue,char * buf,unsigned int buflen)1146 inline void DataConvert::dateToString1( int datevalue, char* buf, unsigned int buflen)
1147 {
1148     snprintf( buf, buflen, "%04d%02d%02d",
1149               (unsigned)((datevalue >> 16) & 0xffff),
1150               (unsigned)((datevalue >> 12) & 0xf),
1151               (unsigned)((datevalue >> 6) & 0x3f)
1152             );
1153 }
1154 
datetimeToString1(long long datetimevalue,char * buf,unsigned int buflen)1155 inline void DataConvert::datetimeToString1( long long datetimevalue, char* buf, unsigned int buflen )
1156 {
1157     snprintf( buf, buflen, "%04d%02d%02d%02d%02d%02d",
1158               (unsigned)((datetimevalue >> 48) & 0xffff),
1159               (unsigned)((datetimevalue >> 44) & 0xf),
1160               (unsigned)((datetimevalue >> 38) & 0x3f),
1161               (unsigned)((datetimevalue >> 32) & 0x3f),
1162               (unsigned)((datetimevalue >> 26) & 0x3f),
1163               (unsigned)((datetimevalue >> 20) & 0x3f)
1164             );
1165 }
1166 
timestampToString1(long long timestampvalue,char * buf,unsigned int buflen,const std::string & timezone)1167 inline void DataConvert::timestampToString1( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone )
1168 {
1169     TimeStamp timestamp(timestampvalue);
1170     int64_t seconds = timestamp.second;
1171 
1172     MySQLTime time;
1173     gmtSecToMySQLTime(seconds, time, timezone);
1174 
1175     snprintf( buf, buflen, "%04d%02d%02d%02d%02d%02d",
1176               time.year, time.month, time.day,
1177               time.hour, time.minute, time.second
1178             );
1179 }
1180 
timeToString1(long long timevalue,char * buf,unsigned int buflen)1181 inline void DataConvert::timeToString1( long long timevalue, char* buf, unsigned int buflen )
1182 {
1183     // Handle negative correctly
1184     int hour = 0;
1185 
1186     if ((timevalue >> 40) & 0x800)
1187     {
1188         hour = 0xfffff000;
1189     }
1190 
1191     hour |= ((timevalue >> 40) & 0xfff);
1192 
1193     if ((hour >= 0) && (timevalue >> 63))
1194     {
1195         buf[0] = '-';
1196         buf++;
1197         buflen--;
1198     }
1199     // this snprintf call causes a compiler warning b/c buffer size is less
1200     // then maximum string size.
1201 #if defined(__GNUC__) && __GNUC__ >= 7
1202 #pragma GCC diagnostic push
1203 #pragma GCC diagnostic ignored "-Wformat-truncation="
1204     snprintf( buf, buflen, "%02d%02d%02d",
1205               hour,
1206               (unsigned)((timevalue >> 32) & 0xff),
1207               (unsigned)((timevalue >> 14) & 0xff)
1208             );
1209 #pragma GCC diagnostic pop
1210 #else
1211     snprintf( buf, buflen, "%02d%02d%02d",
1212               hour,
1213               (unsigned)((timevalue >> 32) & 0xff),
1214               (unsigned)((timevalue >> 14) & 0xff)
1215             );
1216 #endif
1217 }
1218 
decimalToString(int64_t value,uint8_t scale,execplan::CalpontSystemCatalog::ColDataType colDataType)1219 inline std::string DataConvert::decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType)
1220 {
1221     char buf[80];
1222     DataConvert::decimalToString(value, scale, buf, 80, colDataType);
1223     return std::string(buf);
1224 }
1225 
decimalToString(int64_t int_val,uint8_t scale,char * buf,unsigned int buflen,execplan::CalpontSystemCatalog::ColDataType colDataType)1226 inline void DataConvert::decimalToString(int64_t int_val, uint8_t scale, char* buf, unsigned int buflen,
1227         execplan::CalpontSystemCatalog::ColDataType colDataType)
1228 {
1229     // Need to convert a string with a binary unsigned number in it to a 64-bit signed int
1230 
1231     // MySQL seems to round off values unless we use the string store method. Groan.
1232     // Taken from ha_mcs_impl.cpp
1233 
1234     //biggest Calpont supports is DECIMAL(18,x), or 18 total digits+dp+sign for column
1235     // Need 19 digits maxium to hold a sum result of 18 digits decimal column.
1236     if (isUnsigned(colDataType))
1237     {
1238 #ifndef __LP64__
1239         snprintf(buf, buflen, "%llu", static_cast<uint64_t>(int_val));
1240 #else
1241         snprintf(buf, buflen, "%lu", static_cast<uint64_t>(int_val));
1242 #endif
1243     }
1244     else
1245     {
1246 #ifndef __LP64__
1247         snprintf(buf, buflen, "%lld", int_val);
1248 #else
1249         snprintf(buf, buflen, "%ld", int_val);
1250 #endif
1251     }
1252 
1253     if (scale == 0)
1254         return;
1255 
1256     //we want to move the last dt_scale chars right by one spot to insert the dp
1257     //we want to move the trailing null as well, so it's really dt_scale+1 chars
1258     size_t l1 = strlen(buf);
1259     char* ptr = &buf[0];
1260 
1261     if (int_val < 0)
1262     {
1263         ptr++;
1264         idbassert(l1 >= 2);
1265         l1--;
1266     }
1267 
1268     //need to make sure we have enough leading zeros for this to work...
1269     //at this point scale is always > 0
1270     size_t l2 = 1;
1271 
1272     if ((unsigned)scale > l1)
1273     {
1274         const char* zeros = "00000000000000000000"; //20 0's
1275         size_t diff = 0;
1276 
1277         if (int_val != 0)
1278             diff = scale - l1; //this will always be > 0
1279         else
1280             diff = scale;
1281 
1282         memmove((ptr + diff), ptr, l1 + 1); //also move null
1283         memcpy(ptr, zeros, diff);
1284 
1285         if (int_val != 0)
1286             l1 = 0;
1287         else
1288             l1 = 1;
1289     }
1290     else if ((unsigned)scale == l1)
1291     {
1292         l1 = 0;
1293         l2 = 2;
1294     }
1295     else
1296     {
1297         l1 -= scale;
1298     }
1299 
1300     memmove((ptr + l1 + l2), (ptr + l1), scale + 1); //also move null
1301 
1302     if (l2 == 2)
1303         *(ptr + l1++) = '0';
1304 
1305     *(ptr + l1) = '.';
1306 }
1307 
trimWhitespace(int64_t & charData)1308 inline void DataConvert::trimWhitespace(int64_t& charData)
1309 {
1310     // Trims whitespace characters off non-dict character data
1311     char* ch_data = (char*) &charData;
1312 
1313     for (int8_t i = 7; i > 0; i--)
1314     {
1315         if (ch_data[i] == ' ' || ch_data[i] == '\0')
1316             ch_data[i] = '\0';
1317         else
1318             break;
1319     }
1320 }
1321 
1322 } // namespace dataconvert
1323 
1324 #undef EXPORT
1325 
1326 #endif //DATACONVERT_H
1327 
1328