1 /* Copyright (C) 2014 InfiniDB, Inc.
2
3 This program is free software; you can redistribute it and/or
4 modify it under the terms of the GNU General Public License
5 as published by the Free Software Foundation; version 2 of
6 the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16 MA 02110-1301, USA. */
17
18 /****************************************************************************
19 * $Id: dataconvert.h 3693 2013-04-05 16:11:30Z chao $
20 *
21 *
22 ****************************************************************************/
23 /** @file */
24
25 #ifndef DATACONVERT_H
26 #define DATACONVERT_H
27
28 #include <unistd.h>
29 #include <string>
30 #include <boost/any.hpp>
31 #include <vector>
32 #ifdef _MSC_VER
33 #include <winsock2.h>
34 #include <ws2tcpip.h>
35 #include <stdio.h>
36 #else
37 #include <netinet/in.h>
38 #endif
39
40 #ifdef __linux__
41 #define POSIX_REGEX
42 #endif
43
44 #ifdef POSIX_REGEX
45 #include <regex.h>
46 #else
47 #include <boost/regex.hpp>
48 #endif
49
50 #include "calpontsystemcatalog.h"
51 #include "columnresult.h"
52 #include "exceptclasses.h"
53
54 // remove this block if the htonll is defined in library
55 #ifdef __linux__
56 #include <endian.h>
57 #if __BYTE_ORDER == __BIG_ENDIAN // 4312
htonll(uint64_t n)58 inline uint64_t htonll(uint64_t n)
59 {
60 return n;
61 }
62 #elif __BYTE_ORDER == __LITTLE_ENDIAN // 1234
htonll(uint64_t n)63 inline uint64_t htonll(uint64_t n)
64 {
65 return ((((uint64_t) htonl(n & 0xFFFFFFFFLLU)) << 32) | (htonl((n & 0xFFFFFFFF00000000LLU) >> 32)));
66 }
67 #else // __BYTE_ORDER == __PDP_ENDIAN 3412
68 inline uint64_t htonll(uint64_t n);
69 // don't know 34127856 or 78563412, hope never be required to support this byte order.
70 #endif
71 #else //!__linux__
72 #if _MSC_VER < 1600
73 //Assume we're on little-endian
htonll(uint64_t n)74 inline uint64_t htonll(uint64_t n)
75 {
76 return ((((uint64_t) htonl(n & 0xFFFFFFFFULL)) << 32) | (htonl((n & 0xFFFFFFFF00000000ULL) >> 32)));
77 }
78 #endif //_MSC_VER
79 #endif //__linux__
80
81 // this method evalutes the uint64 that stores a char[] to expected value
uint64ToStr(uint64_t n)82 inline uint64_t uint64ToStr(uint64_t n)
83 {
84 return htonll(n);
85 }
86
87
88 #if defined(_MSC_VER) && defined(xxxDATACONVERT_DLLEXPORT)
89 #define EXPORT __declspec(dllexport)
90 #else
91 #define EXPORT
92 #endif
93
94 const int64_t IDB_pow[19] =
95 {
96 1,
97 10,
98 100,
99 1000,
100 10000,
101 100000,
102 1000000,
103 10000000,
104 100000000,
105 1000000000,
106 10000000000LL,
107 100000000000LL,
108 1000000000000LL,
109 10000000000000LL,
110 100000000000000LL,
111 1000000000000000LL,
112 10000000000000000LL,
113 100000000000000000LL,
114 1000000000000000000LL
115 };
116
117
118 const int32_t SECS_PER_MIN = 60;
119 const int32_t MINS_PER_HOUR = 60;
120 const int32_t HOURS_PER_DAY = 24;
121 const int32_t DAYS_PER_WEEK = 7;
122 const int32_t DAYS_PER_NYEAR = 365;
123 const int32_t DAYS_PER_LYEAR = 366;
124 const int32_t SECS_PER_HOUR = SECS_PER_MIN * MINS_PER_HOUR;
125 const int32_t SECS_PER_DAY = SECS_PER_HOUR * HOURS_PER_DAY;
126 const int32_t EPOCH_YEAR = 1970;
127 const int32_t MONS_PER_YEAR = 12;
128 const int32_t MAX_TIMESTAMP_YEAR = 2038;
129 const int32_t MIN_TIMESTAMP_YEAR = 1969;
130 const int32_t MAX_TIMESTAMP_VALUE = (1ULL << 31) - 1;
131 const int32_t MIN_TIMESTAMP_VALUE = 0;
132
133
134 namespace dataconvert
135 {
136
137 enum CalpontDateTimeFormat
138 {
139 CALPONTDATE_ENUM = 1, // date format is: "YYYY-MM-DD"
140 CALPONTDATETIME_ENUM = 2, // date format is: "YYYY-MM-DD HH:MI:SS"
141 CALPONTTIME_ENUM = 3
142 };
143
144 /** @brief a structure that represents a timestamp in broken down
145 * representation
146 */
147 struct MySQLTime
148 {
149 unsigned int year, month, day, hour, minute, second;
150 unsigned long second_part;
151 CalpontDateTimeFormat time_type;
resetMySQLTime152 void reset()
153 {
154 year = month = day = 0;
155 hour = minute = second = second_part = 0;
156 time_type = CALPONTDATETIME_ENUM;
157 }
158 };
159
160 /**
161 * This function converts the timezone represented as a string
162 * in the format "+HH:MM" or "-HH:MM" to a signed offset in seconds
163 * Most of this code is taken from tztime.cc:str_to_offset
164 */
165 inline
timeZoneToOffset(const char * str,std::string::size_type length,long * offset)166 bool timeZoneToOffset(const char *str, std::string::size_type length, long *offset)
167 {
168 const char *end = str + length;
169 bool negative;
170 unsigned long number_tmp;
171 long offset_tmp;
172
173 if (length < 4)
174 return 1;
175
176 if (*str == '+')
177 negative = 0;
178 else if (*str == '-')
179 negative = 1;
180 else
181 return 1;
182 str++;
183
184 number_tmp = 0;
185
186 while (str < end && isdigit(*str))
187 {
188 number_tmp = number_tmp * 10 + *str - '0';
189 str++;
190 }
191
192 if (str + 1 >= end || *str != ':')
193 return 1;
194 str++;
195
196 offset_tmp = number_tmp * 60L;
197 number_tmp = 0;
198
199 while (str < end && isdigit(*str))
200 {
201 number_tmp = number_tmp * 10 + *str - '0';
202 str++;
203 }
204
205 if (str != end)
206 return 1;
207
208 offset_tmp = (offset_tmp + number_tmp) * 60L;
209
210 if (negative)
211 offset_tmp = -offset_tmp;
212
213 /*
214 Check if offset is in range prescribed by standard
215 (from -12:59 to 13:00).
216 */
217
218 if (number_tmp > 59 || offset_tmp < -13 * 3600L + 1 ||
219 offset_tmp > 13 * 3600L)
220 return 1;
221
222 *offset = offset_tmp;
223
224 return 0;
225 }
226
227 const int32_t year_lengths[2] =
228 {
229 DAYS_PER_NYEAR, DAYS_PER_LYEAR
230 };
231
232 const unsigned int mon_lengths[2][MONS_PER_YEAR]=
233 {
234 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
235 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
236 };
237
238 const unsigned int mon_starts[2][MONS_PER_YEAR]=
239 {
240 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 },
241 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 }
242 };
243
leapsThruEndOf(int32_t year)244 inline int32_t leapsThruEndOf(int32_t year)
245 {
246 return (year / 4 - year / 100 + year / 400);
247 }
248
isLeapYear(int year)249 inline bool isLeapYear ( int year)
250 {
251 if ( year % 400 == 0 )
252 return true;
253
254 if ( ( year % 4 == 0 ) && ( year % 100 != 0 ) )
255 return true;
256
257 return false;
258 }
259
260 static uint32_t daysInMonth[13] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0};
261
getDaysInMonth(uint32_t month,int year)262 inline uint32_t getDaysInMonth(uint32_t month, int year)
263 {
264 if (month < 1 || month > 12)
265 return 0;
266
267 uint32_t days = daysInMonth[month - 1];
268
269 if ((month == 2) && isLeapYear(year))
270 days++;
271
272 return days;
273 }
274
275 inline
isDateValid(int day,int month,int year)276 bool isDateValid ( int day, int month, int year)
277 {
278 bool valid = true;
279
280 if ( day == 0 && month == 0 && year == 0 )
281 {
282 return true;
283 }
284
285 int daycheck = getDaysInMonth( month, year );
286
287 if ( ( year < 1000 ) || ( year > 9999 ) )
288 valid = false;
289 else if ( month < 1 || month > 12 )
290 valid = false;
291 else if ( day < 1 || day > daycheck )
292 valid = false;
293
294 return ( valid );
295 }
296
297 inline
isDateTimeValid(int hour,int minute,int second,int microSecond)298 bool isDateTimeValid ( int hour, int minute, int second, int microSecond)
299 {
300 bool valid = false;
301
302 if ( hour >= 0 && hour <= 24 )
303 {
304 if ( minute >= 0 && minute < 60 )
305 {
306 if ( second >= 0 && second < 60 )
307 {
308 if ( microSecond >= 0 && microSecond <= 999999 )
309 {
310 valid = true;
311 }
312 }
313 }
314 }
315
316 return valid;
317 }
318
319 inline
isTimeValid(int hour,int minute,int second,int microSecond)320 bool isTimeValid ( int hour, int minute, int second, int microSecond)
321 {
322 bool valid = false;
323
324 if ( hour >= -838 && hour <= 838 )
325 {
326 if ( minute >= 0 && minute < 60 )
327 {
328 if ( second >= 0 && second < 60 )
329 {
330 if ( microSecond >= 0 && microSecond <= 999999 )
331 {
332 valid = true;
333 }
334 }
335 }
336 }
337
338 return valid;
339 }
340
341 inline
isTimestampValid(uint64_t second,uint64_t microsecond)342 bool isTimestampValid ( uint64_t second, uint64_t microsecond )
343 {
344 bool valid = false;
345
346 // MariaDB server currently sets the upper limit on timestamp to
347 // 0x7FFFFFFF. So enforce the same restriction here.
348 // TODO: We however store the seconds portion of the timestamp in
349 // 44 bits, so change this limit when the server supports higher values.
350 if ( second <= MAX_TIMESTAMP_VALUE )
351 {
352 if ( microsecond <= 999999 )
353 {
354 valid = true;
355 }
356 }
357
358 return valid;
359 }
360
361 /**
362 * @brief converts a timestamp (seconds in UTC since Epoch)
363 * to broken-down representation. Most of this code is taken
364 * from sec_to_TIME and Time_zone_system::gmt_sec_to_TIME
365 * functions in tztime.cc in the server
366 *
367 * @param seconds the value to be converted
368 * @param time the broken-down representation of the timestamp
369 * @param timeZone a string with the server timezone of the machine
370 * which initiated the query
371 */
gmtSecToMySQLTime(int64_t seconds,MySQLTime & time,const std::string & timeZone)372 inline void gmtSecToMySQLTime(int64_t seconds, MySQLTime& time,
373 const std::string& timeZone)
374 {
375 if (seconds == 0)
376 {
377 time.reset();
378 return;
379 }
380
381 if (timeZone == "SYSTEM")
382 {
383 struct tm tmp_tm;
384 time_t tmp_t = (time_t)seconds;
385 localtime_r(&tmp_t, &tmp_tm);
386 time.second_part = 0;
387 time.year = (int) ((tmp_tm.tm_year + 1900) % 10000);
388 time.month = (int) tmp_tm.tm_mon + 1;
389 time.day = (int) tmp_tm.tm_mday;
390 time.hour = (int) tmp_tm.tm_hour;
391 time.minute = (int) tmp_tm.tm_min;
392 time.second = (int) tmp_tm.tm_sec;
393 time.time_type = CALPONTDATETIME_ENUM;
394 if (time.second == 60 || time.second == 61)
395 time.second = 59;
396 }
397 else
398 {
399 long offset;
400 if (timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
401 {
402 time.reset();
403 return;
404 }
405
406 int64_t days;
407 int32_t rem;
408 int32_t y;
409 int32_t yleap;
410 const unsigned int *ip;
411
412 days = (int64_t) (seconds / SECS_PER_DAY);
413 rem = (int32_t) (seconds % SECS_PER_DAY);
414
415 rem += offset;
416 while (rem < 0)
417 {
418 rem += SECS_PER_DAY;
419 days--;
420 }
421 while (rem >= SECS_PER_DAY)
422 {
423 rem -= SECS_PER_DAY;
424 days++;
425 }
426 time.hour = (unsigned int) (rem / SECS_PER_HOUR);
427 rem = rem % SECS_PER_HOUR;
428 time.minute = (unsigned int) (rem / SECS_PER_MIN);
429 time.second = (unsigned int) (rem % SECS_PER_MIN);
430
431 y = EPOCH_YEAR;
432 while (days < 0 || days >= (int64_t) (year_lengths[yleap = isLeapYear(y)]))
433 {
434 int32_t newy;
435
436 newy = y + days / DAYS_PER_NYEAR;
437 if (days < 0)
438 newy--;
439 days -= (newy - y) * DAYS_PER_NYEAR +
440 leapsThruEndOf(newy - 1) -
441 leapsThruEndOf(y - 1);
442 y = newy;
443 }
444 time.year = y;
445
446 ip = mon_lengths[yleap];
447 for (time.month = 0; days >= (int64_t) ip[time.month]; time.month++)
448 days -= (int64_t) ip[time.month];
449 time.month++;
450 time.day = (unsigned int) (days + 1);
451
452 time.second_part = 0;
453 time.time_type = CALPONTDATETIME_ENUM;
454 }
455 }
456
457 /**
458 * @brief function that provides a rough estimate if a broken-down
459 * representation of timestamp is in range
460 *
461 * @param t the broken-down representation of timestamp
462 */
validateTimestampRange(const MySQLTime & t)463 inline bool validateTimestampRange(const MySQLTime& t)
464 {
465 if ((t.year > MAX_TIMESTAMP_YEAR || t.year < MIN_TIMESTAMP_YEAR) ||
466 (t.year == MAX_TIMESTAMP_YEAR && (t.month > 1 || t.day > 19)))
467 return false;
468
469 return true;
470 }
471
472 inline
secSinceEpoch(int year,int month,int day,int hour,int min,int sec)473 int64_t secSinceEpoch(int year, int month, int day, int hour, int min, int sec)
474 {
475 int64_t days = (year - EPOCH_YEAR) * DAYS_PER_NYEAR +
476 leapsThruEndOf(year - 1) -
477 leapsThruEndOf(EPOCH_YEAR - 1);
478 days += mon_starts[isLeapYear(year)][month - 1];
479 days += day - 1;
480
481 return ((days * HOURS_PER_DAY + hour) * MINS_PER_HOUR + min) *
482 SECS_PER_MIN + sec;
483 }
484
485 // This is duplicate of funchelpers.h:calc_mysql_daynr,
486 // with one additional function parameter
calc_mysql_daynr(uint32_t year,uint32_t month,uint32_t day,bool & isValid)487 inline uint32_t calc_mysql_daynr( uint32_t year, uint32_t month, uint32_t day, bool& isValid )
488 {
489 int temp;
490 int y = year;
491 long delsum;
492
493 if ( !isDateValid( day, month, year ) )
494 {
495 isValid = false;
496 return 0;
497 }
498
499 delsum = (long) (365 * y + 31 * ((int) month - 1) + (int) day);
500
501 if (month <= 2)
502 y--;
503 else
504 delsum -= (long) ((int) month * 4 + 23) / 10;
505
506 temp = (int) ((y / 100 + 1) * 3) / 4;
507
508 return delsum + (int) y / 4 - temp;
509 }
510
511 /**
512 * @brief converts a timestamp from broken-down representation
513 * to seconds since UTC epoch
514 *
515 * @param time the broken-down representation of the timestamp
516 @param timeZone a string with the server timezone of the machine
517 which initiated the query
518 */
mySQLTimeToGmtSec(const MySQLTime & time,const std::string & timeZone,bool & isValid)519 inline int64_t mySQLTimeToGmtSec(const MySQLTime& time,
520 const std::string& timeZone, bool& isValid)
521 {
522 int64_t seconds;
523
524 if (!validateTimestampRange(time))
525 {
526 isValid = false;
527 return 0;
528 }
529
530 if (timeZone == "SYSTEM")
531 {
532 // This is mirror of code in func_unix_timestamp.cpp
533 uint32_t loop;
534 time_t tmp_t = 0;
535 int shift = 0;
536 struct tm* l_time, tm_tmp;
537 int64_t diff;
538 localtime_r(&tmp_t, &tm_tmp);
539 // Get the system timezone offset at 0 seconds since epoch
540 int64_t my_time_zone = tm_tmp.tm_gmtoff;
541 int day = time.day;
542
543 if ((time.year == MAX_TIMESTAMP_YEAR) && (time.month == 1) && (day > 4))
544 {
545 day -= 2;
546 shift = 2;
547 }
548
549 tmp_t = (time_t)(((calc_mysql_daynr(time.year, time.month, day, isValid) -
550 719528) * 86400L + (int64_t)time.hour * 3600L +
551 (int64_t)(time.minute * 60 + time.second)) - (time_t)my_time_zone);
552 if (!isValid)
553 return 0;
554
555 localtime_r(&tmp_t, &tm_tmp);
556 l_time = &tm_tmp;
557
558 for (loop = 0; loop < 2 && (time.hour != (uint32_t) l_time->tm_hour ||
559 time.minute != (uint32_t) l_time->tm_min ||
560 time.second != (uint32_t)l_time->tm_sec); loop++)
561 {
562 int days = day - l_time->tm_mday;
563
564 if (days < -1)
565 days = 1; /* Month has wrapped */
566 else if (days > 1)
567 days = -1;
568
569 diff = (3600L * (int64_t) (days * 24 + ((int) time.hour - (int) l_time->tm_hour)) +
570 (int64_t) (60 * ((int) time.minute - (int) l_time->tm_min)) +
571 (int64_t) ((int) time.second - (int) l_time->tm_sec));
572 tmp_t += (time_t) diff;
573 localtime_r(&tmp_t, &tm_tmp);
574 l_time = &tm_tmp;
575 }
576
577 if (loop == 2 && time.hour != (uint32_t)l_time->tm_hour)
578 {
579 int days = day - l_time->tm_mday;
580
581 if (days < -1)
582 days = 1; /* Month has wrapped */
583 else if (days > 1)
584 days = -1;
585
586 diff = (3600L * (int64_t) (days * 24 + ((int) time.hour - (int) l_time->tm_hour)) +
587 (int64_t) (60 * ((int) time.minute - (int) l_time->tm_min)) +
588 (int64_t) ((int) time.second - (int) l_time->tm_sec));
589
590 if (diff == 3600)
591 tmp_t += 3600 - time.minute * 60 - time.second; /* Move to next hour */
592 else if (diff == -3600)
593 tmp_t -= time.minute * 60 + time.second; /* Move to previous hour */
594 }
595
596
597 /* shift back, if we were dealing with boundary dates */
598 tmp_t += shift * 86400L;
599
600 seconds = (int64_t)tmp_t;
601 }
602 else
603 {
604 long offset;
605 if (timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
606 {
607 isValid = false;
608 return 0;
609 }
610 seconds = secSinceEpoch(time.year, time.month, time.day,
611 time.hour, time.minute, time.second) - offset;
612 }
613
614 /* make sure we have legit timestamps (i.e. we didn't over/underflow anywhere above) */
615 if (seconds >= MIN_TIMESTAMP_VALUE && seconds <= MAX_TIMESTAMP_VALUE)
616 return seconds;
617
618 isValid = false;
619 return 0;
620
621 }
622
623
624 /** @brief a structure to hold a date
625 */
626 struct Date
627 {
628 unsigned spare : 6;
629 unsigned day : 6;
630 unsigned month : 4;
631 unsigned year : 16;
632 // NULL column value = 0xFFFFFFFE
DateDate633 Date( ) :
634 spare(0x3E), day(0x3F), month(0xF), year(0xFFFF) {}
635 // Construct a Date from a 64 bit integer Calpont date.
DateDate636 Date(uint64_t val) :
637 spare(0x3E), day((val >> 6) & 077), month((val >> 12) & 0xF), year((val >> 16)) {}
638 // Construct using passed in parameters, no value checking
DateDate639 Date(unsigned y, unsigned m, unsigned d) : spare(0x3E), day(d), month(m), year(y) {}
640
641 int32_t convertToMySQLint() const;
642 };
643
644 inline
convertToMySQLint()645 int32_t Date::convertToMySQLint() const
646 {
647 return (int32_t) (year * 10000) + (month * 100) + day;
648 }
649
650 /** @brief a structure to hold a datetime
651 */
652 struct DateTime
653 {
654 unsigned msecond : 20;
655 unsigned second : 6;
656 unsigned minute : 6;
657 unsigned hour : 6;
658 unsigned day : 6;
659 unsigned month : 4;
660 unsigned year : 16;
661 // NULL column value = 0xFFFFFFFFFFFFFFFE
DateTimeDateTime662 DateTime( ) :
663 msecond(0xFFFFE), second(0x3F), minute(0x3F), hour(0x3F), day(0x3F), month(0xF), year(0xFFFF) {}
664 // Construct a DateTime from a 64 bit integer Calpont datetime.
DateTimeDateTime665 DateTime(uint64_t val) :
666 msecond(val & 0xFFFFF), second((val >> 20) & 077), minute((val >> 26) & 077),
667 hour((val >> 32) & 077), day((val >> 38) & 077), month((val >> 44) & 0xF),
668 year(val >> 48) {}
669 // Construct using passed in parameters, no value checking
DateTimeDateTime670 DateTime(unsigned y, unsigned m, unsigned d, unsigned h, unsigned min, unsigned sec, unsigned msec) :
671 msecond(msec), second(sec), minute(min), hour(h), day(d), month(m), year(y) {}
672
673 int64_t convertToMySQLint() const;
674 void reset();
675 };
676
677 inline
convertToMySQLint()678 int64_t DateTime::convertToMySQLint() const
679 {
680 return (int64_t) (year * 10000000000LL) + (month * 100000000) + (day * 1000000) + (hour * 10000) + (minute * 100) + second;
681 }
682
683 inline
reset()684 void DateTime::reset()
685 {
686 msecond = 0xFFFFE;
687 second = 0x3F;
688 minute = 0x3F;
689 hour = 0x3F;
690 day = 0x3F;
691 month = 0xF;
692 year = 0xFFFF;
693 }
694
695 /** @brief a structure to hold a time
696 * range: -838:59:59 ~ 838:59:59
697 */
698 struct Time
699 {
700 signed msecond : 24;
701 signed second : 8;
702 signed minute : 8;
703 signed hour : 12;
704 signed day : 11;
705 signed is_neg : 1;
706
707 // NULL column value = 0xFFFFFFFFFFFFFFFE
TimeTime708 Time() : msecond (0xFFFFFE),
709 second (0xFF),
710 minute (0xFF),
711 hour (0xFFF),
712 day (0x7FF),
713 is_neg (0b1)
714 {}
715
716 // Construct a Time from a 64 bit integer InfiniDB time.
TimeTime717 Time(int64_t val) :
718 msecond(val & 0xffffff),
719 second((val >> 24) & 0xff),
720 minute((val >> 32) & 0xff),
721 hour((val >> 40) & 0xfff),
722 day((val >> 52) & 0x7ff),
723 is_neg(val >> 63)
724 {}
725
TimeTime726 Time(signed d, signed h, signed min, signed sec, signed msec, bool neg) :
727 msecond(msec), second(sec), minute(min), hour(h), day(d), is_neg(neg)
728 {
729 if (h < 0)
730 is_neg = 0b1;
731 }
732
733 int64_t convertToMySQLint() const;
734 void reset();
735 };
736
737 inline
reset()738 void Time::reset()
739 {
740 msecond = 0xFFFFFE;
741 second = 0xFF;
742 minute = 0xFF;
743 hour = 0xFFF;
744 is_neg = 0b1;
745 day = 0x7FF;
746 }
747
748 inline
convertToMySQLint()749 int64_t Time::convertToMySQLint() const
750 {
751 if ((hour >= 0) && is_neg)
752 {
753 return (int64_t) ((hour * 10000) + (minute * 100) + second) * -1;
754 }
755 else if (hour >= 0)
756 {
757 return (int64_t) (hour * 10000) + (minute * 100) + second;
758 }
759 else
760 {
761 return (int64_t) (hour * 10000) - (minute * 100) - second;
762 }
763 }
764
765 /** @brief a structure to hold a timestamp
766 */
767 struct TimeStamp
768 {
769 unsigned msecond : 20;
770 unsigned long long second : 44;
771 // NULL column value = 0xFFFFFFFFFFFFFFFE
TimeStampTimeStamp772 TimeStamp( ) :
773 msecond(0xFFFFE), second(0xFFFFFFFFFFF) {}
774 // Construct a TimeStamp from a 64 bit integer Calpont timestamp.
TimeStampTimeStamp775 TimeStamp(uint64_t val) :
776 msecond(val & 0xFFFFF), second(val >> 20) {}
TimeStampTimeStamp777 TimeStamp(unsigned msec, unsigned long long sec) :
778 msecond(msec), second(sec) {}
779
780 int64_t convertToMySQLint(const std::string& timeZone) const;
781 void reset();
782 };
783
784 inline
convertToMySQLint(const std::string & timeZone)785 int64_t TimeStamp::convertToMySQLint(const std::string& timeZone) const
786 {
787 const int TIMESTAMPTOSTRING1_LEN = 22; // YYYYMMDDHHMMSSmmmmmm\0
788 char buf[TIMESTAMPTOSTRING1_LEN];
789
790 MySQLTime time;
791 gmtSecToMySQLTime(second, time, timeZone);
792
793 sprintf(buf, "%04d%02d%02d%02d%02d%02d", time.year, time.month, time.day, time.hour, time.minute, time.second);
794
795 return (int64_t) atoll(buf);
796 }
797
798 inline
reset()799 void TimeStamp::reset()
800 {
801 msecond = 0xFFFFE;
802 second = 0xFFFFFFFFFFF;
803 }
804
805 inline
string_to_ll(const std::string & data,bool & bSaturate)806 int64_t string_to_ll( const std::string& data, bool& bSaturate )
807 {
808 // This function doesn't take into consideration our special values
809 // for NULL and EMPTY when setting the saturation point. Should it?
810 char* ep = NULL;
811 const char* str = data.c_str();
812 errno = 0;
813 int64_t value = strtoll(str, &ep, 10);
814
815 // (no digits) || (more chars) || (other errors & value = 0)
816 if ((ep == str) || (*ep != '\0') || (errno != 0 && value == 0))
817 throw logging::QueryDataExcept("value is not numerical.", logging::formatErr);
818
819 if (errno == ERANGE && (value == std::numeric_limits<int64_t>::max() || value == std::numeric_limits<int64_t>::min()))
820 bSaturate = true;
821
822 return value;
823 }
824
825 inline
string_to_ull(const std::string & data,bool & bSaturate)826 uint64_t string_to_ull( const std::string& data, bool& bSaturate )
827 {
828 // This function doesn't take into consideration our special values
829 // for NULL and EMPTY when setting the saturation point. Should it?
830 char* ep = NULL;
831 const char* str = data.c_str();
832 errno = 0;
833
834 // check for negative number. saturate to 0;
835 if (data.find('-') != data.npos)
836 {
837 bSaturate = true;
838 return 0;
839 }
840
841 uint64_t value = strtoull(str, &ep, 10);
842
843 // (no digits) || (more chars) || (other errors & value = 0)
844 if ((ep == str) || (*ep != '\0') || (errno != 0 && value == 0))
845 throw logging::QueryDataExcept("value is not numerical.", logging::formatErr);
846
847 if (errno == ERANGE && (value == std::numeric_limits<uint64_t>::max()))
848 bSaturate = true;
849
850 return value;
851 }
852
853 /** @brief DataConvert is a component for converting string data to Calpont format
854 */
855 class DataConvert
856 {
857 public:
858
859 /**
860 * @brief convert a columns data, represnted as a string, to it's native
861 * format
862 *
863 * @param type the columns data type
864 * @param data the columns string representation of it's data
865 */
866 EXPORT static boost::any convertColumnData( const execplan::CalpontSystemCatalog::ColType& colType,
867 const std::string& dataOrig, bool& bSaturate, const std::string& timeZone,
868 bool nulFlag = false, bool noRoundup = false, bool isUpdate = false);
869
870 /**
871 * @brief convert a columns data from native format to a string
872 *
873 * @param type the columns database type
874 * @param data the columns string representation of it's data
875 */
876 EXPORT static std::string dateToString( int datevalue );
877 static inline void dateToString( int datevalue, char* buf, unsigned int buflen );
878
879 /**
880 * @brief convert a columns data from native format to a string
881 *
882 * @param type the columns database type
883 * @param data the columns string representation of it's data
884 */
885 EXPORT static std::string datetimeToString( long long datetimevalue, long decimals = 0 );
886 static inline void datetimeToString( long long datetimevalue, char* buf, unsigned int buflen, long decimals = 0 );
887
888 /**
889 * @brief convert a columns data from native format to a string
890 *
891 * @param type the columns database type
892 * @param data the columns string representation of it's data
893 */
894 EXPORT static std::string timestampToString( long long timestampvalue, const std::string& timezone, long decimals = 0 );
895 static inline void timestampToString( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone, long decimals = 0 );
896
897 /**
898 * @brief convert a columns data from native format to a string
899 *
900 * @param type the columns database type
901 * @param data the columns string representation of it's data
902 */
903 EXPORT static std::string timeToString( long long timevalue, long decimals = 0 );
904 static inline void timeToString( long long timevalue, char* buf, unsigned int buflen, long decimals = 0);
905
906 /**
907 * @brief convert a columns data from native format to a string
908 *
909 * @param type the columns database type
910 * @param data the columns string representation of it's data
911 */
912 EXPORT static std::string dateToString1( int datevalue );
913 static inline void dateToString1( int datevalue, char* buf, unsigned int buflen );
914
915 /**
916 * @brief convert a columns data from native format to a string
917 *
918 * @param type the columns database type
919 * @param data the columns string representation of it's data
920 */
921 EXPORT static std::string datetimeToString1( long long datetimevalue );
922 static inline void datetimeToString1( long long datetimevalue, char* buf, unsigned int buflen );
923
924 /**
925 * @brief convert a columns data from native format to a string
926 *
927 * @param type the columns database type
928 * @param data the columns string representation of it's data
929 */
930 EXPORT static std::string timestampToString1( long long timestampvalue, const std::string& timezone );
931 static inline void timestampToString1( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone );
932
933 /**
934 * @brief convert a columns data from native format to a string
935 *
936 * @param type the columns database type
937 * @param data the columns string representation of it's data
938 */
939 EXPORT static std::string timeToString1( long long timevalue );
940 static inline void timeToString1( long long timevalue, char* buf, unsigned int buflen );
941
942 /**
943 * @brief convert a date column data, represnted as a string, to it's native
944 * format. This function is for bulkload to use.
945 *
946 * @param type the columns data type
947 * @param dataOrig the columns string representation of it's data
948 * @param dateFormat the format the date value in
949 * @param status 0 - success, -1 - fail
950 * @param dataOrgLen length specification of dataOrg
951 */
952 EXPORT static int32_t convertColumnDate( const char* dataOrg,
953 CalpontDateTimeFormat dateFormat,
954 int& status, unsigned int dataOrgLen );
955
956 /**
957 * @brief Is specified date valid; used by binary bulk load
958 */
959 EXPORT static bool isColumnDateValid( int32_t date );
960
961 /**
962 * @brief convert a datetime column data, represented as a string,
963 * to it's native format. This function is for bulkload to use.
964 *
965 * @param type the columns data type
966 * @param dataOrig the columns string representation of it's data
967 * @param datetimeFormat the format the date value in
968 * @param status 0 - success, -1 - fail
969 * @param dataOrgLen length specification of dataOrg
970 */
971 EXPORT static int64_t convertColumnDatetime( const char* dataOrg,
972 CalpontDateTimeFormat datetimeFormat,
973 int& status, unsigned int dataOrgLen );
974
975 /**
976 * @brief convert a timestamp column data, represented as a string,
977 * to it's native format. This function is for bulkload to use.
978 *
979 * @param dataOrg the columns string representation of it's data
980 * @param datetimeFormat the format the date value in
981 * @param status 0 - success, -1 - fail
982 * @param dataOrgLen length specification of dataOrg
983 * @param timeZone the timezone used for conversion to native format
984 */
985 EXPORT static int64_t convertColumnTimestamp( const char* dataOrg,
986 CalpontDateTimeFormat datetimeFormat,
987 int& status, unsigned int dataOrgLen,
988 const std::string& timeZone );
989
990 /**
991 * @brief convert a time column data, represented as a string,
992 * to it's native format. This function is for bulkload to use.
993 *
994 * @param type the columns data type
995 * @param dataOrig the columns string representation of it's data
996 * @param timeFormat the format the time value in
997 * @param status 0 - success, -1 - fail
998 * @param dataOrgLen length specification of dataOrg
999 */
1000 EXPORT static int64_t convertColumnTime( const char* dataOrg,
1001 CalpontDateTimeFormat datetimeFormat,
1002 int& status, unsigned int dataOrgLen );
1003
1004 /**
1005 * @brief Is specified datetime valid; used by binary bulk load
1006 */
1007 EXPORT static bool isColumnDateTimeValid( int64_t dateTime );
1008 EXPORT static bool isColumnTimeValid( int64_t time );
1009 EXPORT static bool isColumnTimeStampValid( int64_t timeStamp );
1010
1011 EXPORT static bool isNullData(execplan::ColumnResult* cr, int rownum, execplan::CalpontSystemCatalog::ColType colType);
1012 static inline std::string decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType);
1013 static inline void decimalToString(int64_t value, uint8_t scale, char* buf, unsigned int buflen, execplan::CalpontSystemCatalog::ColDataType colDataType);
1014 static inline void trimWhitespace(int64_t& charData);
1015
1016 // convert string to date
1017 EXPORT static int64_t stringToDate(const std::string& data);
1018 // convert string to datetime
1019 EXPORT static int64_t stringToDatetime(const std::string& data, bool* isDate = NULL);
1020 // convert string to timestamp
1021 EXPORT static int64_t stringToTimestamp(const std::string& data, const std::string& timeZone);
1022 // convert integer to date
1023 EXPORT static int64_t intToDate(int64_t data);
1024 // convert integer to datetime
1025 EXPORT static int64_t intToDatetime(int64_t data, bool* isDate = NULL);
1026 // convert integer to date
1027 EXPORT static int64_t intToTime(int64_t data, bool fromString = false);
1028 // convert string to date. alias to stringToDate
1029 EXPORT static int64_t dateToInt(const std::string& date);
1030 // convert string to datetime. alias to datetimeToInt
1031 EXPORT static int64_t datetimeToInt(const std::string& datetime);
1032 EXPORT static int64_t timestampToInt(const std::string& timestamp, const std::string& timeZone);
1033 EXPORT static int64_t timeToInt(const std::string& time);
1034 EXPORT static int64_t stringToTime (const std::string& data);
1035 // bug4388, union type conversion
1036 EXPORT static execplan::CalpontSystemCatalog::ColType convertUnionColType(std::vector<execplan::CalpontSystemCatalog::ColType>&);
1037 };
1038
dateToString(int datevalue,char * buf,unsigned int buflen)1039 inline void DataConvert::dateToString( int datevalue, char* buf, unsigned int buflen)
1040 {
1041 snprintf( buf, buflen, "%04d-%02d-%02d",
1042 (unsigned)((datevalue >> 16) & 0xffff),
1043 (unsigned)((datevalue >> 12) & 0xf),
1044 (unsigned)((datevalue >> 6) & 0x3f)
1045 );
1046 }
1047
datetimeToString(long long datetimevalue,char * buf,unsigned int buflen,long decimals)1048 inline void DataConvert::datetimeToString( long long datetimevalue, char* buf, unsigned int buflen, long decimals )
1049 {
1050 // 10 is default which means we don't need microseconds
1051 if (decimals > 6 || decimals < 0)
1052 {
1053 decimals = 0;
1054 }
1055
1056 int msec = 0;
1057
1058 if ((datetimevalue & 0xfffff) > 0)
1059 {
1060 msec = (unsigned)((datetimevalue) & 0xfffff);
1061 }
1062
1063 snprintf( buf, buflen, "%04d-%02d-%02d %02d:%02d:%02d",
1064 (unsigned)((datetimevalue >> 48) & 0xffff),
1065 (unsigned)((datetimevalue >> 44) & 0xf),
1066 (unsigned)((datetimevalue >> 38) & 0x3f),
1067 (unsigned)((datetimevalue >> 32) & 0x3f),
1068 (unsigned)((datetimevalue >> 26) & 0x3f),
1069 (unsigned)((datetimevalue >> 20) & 0x3f)
1070 );
1071
1072 if (msec || decimals)
1073 {
1074 snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec);
1075 }
1076 }
1077
timestampToString(long long timestampvalue,char * buf,unsigned int buflen,const std::string & timezone,long decimals)1078 inline void DataConvert::timestampToString( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone, long decimals )
1079 {
1080 // 10 is default which means we don't need microseconds
1081 if (decimals > 6 || decimals < 0)
1082 {
1083 decimals = 0;
1084 }
1085
1086 TimeStamp timestamp(timestampvalue);
1087 int64_t seconds = timestamp.second;
1088
1089 MySQLTime time;
1090 gmtSecToMySQLTime(seconds, time, timezone);
1091
1092 snprintf( buf, buflen, "%04d-%02d-%02d %02d:%02d:%02d",
1093 time.year, time.month, time.day,
1094 time.hour, time.minute, time.second
1095 );
1096
1097 if (timestamp.msecond || decimals)
1098 {
1099 snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, timestamp.msecond);
1100 }
1101 }
1102
timeToString(long long timevalue,char * buf,unsigned int buflen,long decimals)1103 inline void DataConvert::timeToString( long long timevalue, char* buf, unsigned int buflen, long decimals )
1104 {
1105 // 10 is default which means we don't need microseconds
1106 if (decimals > 6 || decimals < 0)
1107 {
1108 decimals = 0;
1109 }
1110
1111 // Handle negative correctly
1112 int hour = 0, msec = 0;
1113
1114 if ((timevalue >> 40) & 0x800)
1115 {
1116 hour = 0xfffff000;
1117 }
1118
1119 hour |= ((timevalue >> 40) & 0xfff);
1120
1121 if ((timevalue & 0xffffff) > 0)
1122 {
1123 msec = (unsigned)((timevalue) & 0xffffff);
1124 }
1125
1126 if ((hour >= 0) && (timevalue >> 63))
1127 {
1128 buf[0] = '-';
1129 buf++;
1130 buflen--;
1131 }
1132
1133 snprintf( buf, buflen, "%02d:%02d:%02d",
1134 hour,
1135 (unsigned)((timevalue >> 32) & 0xff),
1136 (unsigned)((timevalue >> 24) & 0xff)
1137 );
1138
1139 if (msec || decimals)
1140 {
1141 // Pad start with zeros
1142 snprintf(buf + strlen(buf), buflen - strlen(buf), ".%0*d", (int)decimals, msec);
1143 }
1144 }
1145
dateToString1(int datevalue,char * buf,unsigned int buflen)1146 inline void DataConvert::dateToString1( int datevalue, char* buf, unsigned int buflen)
1147 {
1148 snprintf( buf, buflen, "%04d%02d%02d",
1149 (unsigned)((datevalue >> 16) & 0xffff),
1150 (unsigned)((datevalue >> 12) & 0xf),
1151 (unsigned)((datevalue >> 6) & 0x3f)
1152 );
1153 }
1154
datetimeToString1(long long datetimevalue,char * buf,unsigned int buflen)1155 inline void DataConvert::datetimeToString1( long long datetimevalue, char* buf, unsigned int buflen )
1156 {
1157 snprintf( buf, buflen, "%04d%02d%02d%02d%02d%02d",
1158 (unsigned)((datetimevalue >> 48) & 0xffff),
1159 (unsigned)((datetimevalue >> 44) & 0xf),
1160 (unsigned)((datetimevalue >> 38) & 0x3f),
1161 (unsigned)((datetimevalue >> 32) & 0x3f),
1162 (unsigned)((datetimevalue >> 26) & 0x3f),
1163 (unsigned)((datetimevalue >> 20) & 0x3f)
1164 );
1165 }
1166
timestampToString1(long long timestampvalue,char * buf,unsigned int buflen,const std::string & timezone)1167 inline void DataConvert::timestampToString1( long long timestampvalue, char* buf, unsigned int buflen, const std::string& timezone )
1168 {
1169 TimeStamp timestamp(timestampvalue);
1170 int64_t seconds = timestamp.second;
1171
1172 MySQLTime time;
1173 gmtSecToMySQLTime(seconds, time, timezone);
1174
1175 snprintf( buf, buflen, "%04d%02d%02d%02d%02d%02d",
1176 time.year, time.month, time.day,
1177 time.hour, time.minute, time.second
1178 );
1179 }
1180
timeToString1(long long timevalue,char * buf,unsigned int buflen)1181 inline void DataConvert::timeToString1( long long timevalue, char* buf, unsigned int buflen )
1182 {
1183 // Handle negative correctly
1184 int hour = 0;
1185
1186 if ((timevalue >> 40) & 0x800)
1187 {
1188 hour = 0xfffff000;
1189 }
1190
1191 hour |= ((timevalue >> 40) & 0xfff);
1192
1193 if ((hour >= 0) && (timevalue >> 63))
1194 {
1195 buf[0] = '-';
1196 buf++;
1197 buflen--;
1198 }
1199 // this snprintf call causes a compiler warning b/c buffer size is less
1200 // then maximum string size.
1201 #if defined(__GNUC__) && __GNUC__ >= 7
1202 #pragma GCC diagnostic push
1203 #pragma GCC diagnostic ignored "-Wformat-truncation="
1204 snprintf( buf, buflen, "%02d%02d%02d",
1205 hour,
1206 (unsigned)((timevalue >> 32) & 0xff),
1207 (unsigned)((timevalue >> 14) & 0xff)
1208 );
1209 #pragma GCC diagnostic pop
1210 #else
1211 snprintf( buf, buflen, "%02d%02d%02d",
1212 hour,
1213 (unsigned)((timevalue >> 32) & 0xff),
1214 (unsigned)((timevalue >> 14) & 0xff)
1215 );
1216 #endif
1217 }
1218
decimalToString(int64_t value,uint8_t scale,execplan::CalpontSystemCatalog::ColDataType colDataType)1219 inline std::string DataConvert::decimalToString(int64_t value, uint8_t scale, execplan::CalpontSystemCatalog::ColDataType colDataType)
1220 {
1221 char buf[80];
1222 DataConvert::decimalToString(value, scale, buf, 80, colDataType);
1223 return std::string(buf);
1224 }
1225
decimalToString(int64_t int_val,uint8_t scale,char * buf,unsigned int buflen,execplan::CalpontSystemCatalog::ColDataType colDataType)1226 inline void DataConvert::decimalToString(int64_t int_val, uint8_t scale, char* buf, unsigned int buflen,
1227 execplan::CalpontSystemCatalog::ColDataType colDataType)
1228 {
1229 // Need to convert a string with a binary unsigned number in it to a 64-bit signed int
1230
1231 // MySQL seems to round off values unless we use the string store method. Groan.
1232 // Taken from ha_mcs_impl.cpp
1233
1234 //biggest Calpont supports is DECIMAL(18,x), or 18 total digits+dp+sign for column
1235 // Need 19 digits maxium to hold a sum result of 18 digits decimal column.
1236 if (isUnsigned(colDataType))
1237 {
1238 #ifndef __LP64__
1239 snprintf(buf, buflen, "%llu", static_cast<uint64_t>(int_val));
1240 #else
1241 snprintf(buf, buflen, "%lu", static_cast<uint64_t>(int_val));
1242 #endif
1243 }
1244 else
1245 {
1246 #ifndef __LP64__
1247 snprintf(buf, buflen, "%lld", int_val);
1248 #else
1249 snprintf(buf, buflen, "%ld", int_val);
1250 #endif
1251 }
1252
1253 if (scale == 0)
1254 return;
1255
1256 //we want to move the last dt_scale chars right by one spot to insert the dp
1257 //we want to move the trailing null as well, so it's really dt_scale+1 chars
1258 size_t l1 = strlen(buf);
1259 char* ptr = &buf[0];
1260
1261 if (int_val < 0)
1262 {
1263 ptr++;
1264 idbassert(l1 >= 2);
1265 l1--;
1266 }
1267
1268 //need to make sure we have enough leading zeros for this to work...
1269 //at this point scale is always > 0
1270 size_t l2 = 1;
1271
1272 if ((unsigned)scale > l1)
1273 {
1274 const char* zeros = "00000000000000000000"; //20 0's
1275 size_t diff = 0;
1276
1277 if (int_val != 0)
1278 diff = scale - l1; //this will always be > 0
1279 else
1280 diff = scale;
1281
1282 memmove((ptr + diff), ptr, l1 + 1); //also move null
1283 memcpy(ptr, zeros, diff);
1284
1285 if (int_val != 0)
1286 l1 = 0;
1287 else
1288 l1 = 1;
1289 }
1290 else if ((unsigned)scale == l1)
1291 {
1292 l1 = 0;
1293 l2 = 2;
1294 }
1295 else
1296 {
1297 l1 -= scale;
1298 }
1299
1300 memmove((ptr + l1 + l2), (ptr + l1), scale + 1); //also move null
1301
1302 if (l2 == 2)
1303 *(ptr + l1++) = '0';
1304
1305 *(ptr + l1) = '.';
1306 }
1307
trimWhitespace(int64_t & charData)1308 inline void DataConvert::trimWhitespace(int64_t& charData)
1309 {
1310 // Trims whitespace characters off non-dict character data
1311 char* ch_data = (char*) &charData;
1312
1313 for (int8_t i = 7; i > 0; i--)
1314 {
1315 if (ch_data[i] == ' ' || ch_data[i] == '\0')
1316 ch_data[i] = '\0';
1317 else
1318 break;
1319 }
1320 }
1321
1322 } // namespace dataconvert
1323
1324 #undef EXPORT
1325
1326 #endif //DATACONVERT_H
1327
1328