1 /* -----------------------------------------------------------------------
2 * formatting.c
3 *
4 * src/backend/utils/adt/formatting.c
5 *
6 *
7 * Portions Copyright (c) 1999-2018, PostgreSQL Global Development Group
8 *
9 *
10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11 *
12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14 *
15 *
16 * Cache & Memory:
17 * Routines use (itself) internal cache for format pictures.
18 *
19 * The cache uses a static buffer and is persistent across transactions. If
20 * the format-picture is bigger than the cache buffer, the parser is called
21 * always.
22 *
23 * NOTE for Number version:
24 * All in this version is implemented as keywords ( => not used
25 * suffixes), because a format picture is for *one* item (number)
26 * only. It not is as a timestamp version, where each keyword (can)
27 * has suffix.
28 *
29 * NOTE for Timestamp routines:
30 * In this module the POSIX 'struct tm' type is *not* used, but rather
31 * PgSQL type, which has tm_mon based on one (*non* zero) and
32 * year *not* based on 1900, but is used full year number.
33 * Module supports AD / BC / AM / PM.
34 *
35 * Supported types for to_char():
36 *
37 * Timestamp, Numeric, int4, int8, float4, float8
38 *
39 * Supported types for reverse conversion:
40 *
41 * Timestamp - to_timestamp()
42 * Date - to_date()
43 * Numeric - to_number()
44 *
45 *
46 * Karel Zak
47 *
48 * TODO
49 * - better number building (formatting) / parsing, now it isn't
50 * ideal code
51 * - use Assert()
52 * - add support for abstime
53 * - add support for roman number to standard number conversion
54 * - add support for number spelling
55 * - add support for string to string formatting (we must be better
56 * than Oracle :-),
57 * to_char('Hello', 'X X X X X') -> 'H e l l o'
58 *
59 * -----------------------------------------------------------------------
60 */
61
62 #ifdef DEBUG_TO_FROM_CHAR
63 #define DEBUG_elog_output DEBUG3
64 #endif
65
66 #include "postgres.h"
67
68 #include <ctype.h>
69 #include <unistd.h>
70 #include <math.h>
71 #include <float.h>
72 #include <limits.h>
73
74 /*
75 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
76 * declare them in <wchar.h>.
77 */
78 #ifdef HAVE_WCHAR_H
79 #include <wchar.h>
80 #endif
81 #ifdef HAVE_WCTYPE_H
82 #include <wctype.h>
83 #endif
84
85 #ifdef USE_ICU
86 #include <unicode/ustring.h>
87 #endif
88
89 #include "catalog/pg_collation.h"
90 #include "mb/pg_wchar.h"
91 #include "parser/scansup.h"
92 #include "utils/builtins.h"
93 #include "utils/date.h"
94 #include "utils/datetime.h"
95 #include "utils/formatting.h"
96 #include "utils/int8.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99
100 /* ----------
101 * Routines type
102 * ----------
103 */
104 #define DCH_TYPE 1 /* DATE-TIME version */
105 #define NUM_TYPE 2 /* NUMBER version */
106
107 /* ----------
108 * KeyWord Index (ascii from position 32 (' ') to 126 (~))
109 * ----------
110 */
111 #define KeyWord_INDEX_SIZE ('~' - ' ')
112 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
113
114 /* ----------
115 * Maximal length of one node
116 * ----------
117 */
118 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
119 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
120
121
122 /* ----------
123 * Format parser structs
124 * ----------
125 */
126 typedef struct
127 {
128 char *name; /* suffix string */
129 int len, /* suffix length */
130 id, /* used in node->suffix */
131 type; /* prefix / postfix */
132 } KeySuffix;
133
134 /* ----------
135 * FromCharDateMode
136 * ----------
137 *
138 * This value is used to nominate one of several distinct (and mutually
139 * exclusive) date conventions that a keyword can belong to.
140 */
141 typedef enum
142 {
143 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
144 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
145 FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
146 } FromCharDateMode;
147
148 typedef struct
149 {
150 const char *name;
151 int len;
152 int id;
153 bool is_digit;
154 FromCharDateMode date_mode;
155 } KeyWord;
156
157 typedef struct
158 {
159 int type; /* NODE_TYPE_XXX, see below */
160 const KeyWord *key; /* if type is ACTION */
161 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
162 int suffix; /* keyword prefix/suffix code, if any */
163 } FormatNode;
164
165 #define NODE_TYPE_END 1
166 #define NODE_TYPE_ACTION 2
167 #define NODE_TYPE_CHAR 3
168
169 #define SUFFTYPE_PREFIX 1
170 #define SUFFTYPE_POSTFIX 2
171
172 #define CLOCK_24_HOUR 0
173 #define CLOCK_12_HOUR 1
174
175
176 /* ----------
177 * Full months
178 * ----------
179 */
180 static const char *const months_full[] = {
181 "January", "February", "March", "April", "May", "June", "July",
182 "August", "September", "October", "November", "December", NULL
183 };
184
185 static const char *const days_short[] = {
186 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
187 };
188
189 /* ----------
190 * AD / BC
191 * ----------
192 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
193 * positive and map year == -1 to year zero, and shift all negative
194 * years up one. For interval years, we just return the year.
195 */
196 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
197
198 #define A_D_STR "A.D."
199 #define a_d_STR "a.d."
200 #define AD_STR "AD"
201 #define ad_STR "ad"
202
203 #define B_C_STR "B.C."
204 #define b_c_STR "b.c."
205 #define BC_STR "BC"
206 #define bc_STR "bc"
207
208 /*
209 * AD / BC strings for seq_search.
210 *
211 * These are given in two variants, a long form with periods and a standard
212 * form without.
213 *
214 * The array is laid out such that matches for AD have an even index, and
215 * matches for BC have an odd index. So the boolean value for BC is given by
216 * taking the array index of the match, modulo 2.
217 */
218 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
219 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
220
221 /* ----------
222 * AM / PM
223 * ----------
224 */
225 #define A_M_STR "A.M."
226 #define a_m_STR "a.m."
227 #define AM_STR "AM"
228 #define am_STR "am"
229
230 #define P_M_STR "P.M."
231 #define p_m_STR "p.m."
232 #define PM_STR "PM"
233 #define pm_STR "pm"
234
235 /*
236 * AM / PM strings for seq_search.
237 *
238 * These are given in two variants, a long form with periods and a standard
239 * form without.
240 *
241 * The array is laid out such that matches for AM have an even index, and
242 * matches for PM have an odd index. So the boolean value for PM is given by
243 * taking the array index of the match, modulo 2.
244 */
245 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
246 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
247
248 /* ----------
249 * Months in roman-numeral
250 * (Must be in reverse order for seq_search (in FROM_CHAR), because
251 * 'VIII' must have higher precedence than 'V')
252 * ----------
253 */
254 static const char *const rm_months_upper[] =
255 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
256
257 static const char *const rm_months_lower[] =
258 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
259
260 /* ----------
261 * Roman numbers
262 * ----------
263 */
264 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
265 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
266 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
267
268 /* ----------
269 * Ordinal postfixes
270 * ----------
271 */
272 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
273 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
274
275 /* ----------
276 * Flags & Options:
277 * ----------
278 */
279 #define TH_UPPER 1
280 #define TH_LOWER 2
281
282 /* ----------
283 * Number description struct
284 * ----------
285 */
286 typedef struct
287 {
288 int pre, /* (count) numbers before decimal */
289 post, /* (count) numbers after decimal */
290 lsign, /* want locales sign */
291 flag, /* number parameters */
292 pre_lsign_num, /* tmp value for lsign */
293 multi, /* multiplier for 'V' */
294 zero_start, /* position of first zero */
295 zero_end, /* position of last zero */
296 need_locale; /* needs it locale */
297 } NUMDesc;
298
299 /* ----------
300 * Flags for NUMBER version
301 * ----------
302 */
303 #define NUM_F_DECIMAL (1 << 1)
304 #define NUM_F_LDECIMAL (1 << 2)
305 #define NUM_F_ZERO (1 << 3)
306 #define NUM_F_BLANK (1 << 4)
307 #define NUM_F_FILLMODE (1 << 5)
308 #define NUM_F_LSIGN (1 << 6)
309 #define NUM_F_BRACKET (1 << 7)
310 #define NUM_F_MINUS (1 << 8)
311 #define NUM_F_PLUS (1 << 9)
312 #define NUM_F_ROMAN (1 << 10)
313 #define NUM_F_MULTI (1 << 11)
314 #define NUM_F_PLUS_POST (1 << 12)
315 #define NUM_F_MINUS_POST (1 << 13)
316 #define NUM_F_EEEE (1 << 14)
317
318 #define NUM_LSIGN_PRE (-1)
319 #define NUM_LSIGN_POST 1
320 #define NUM_LSIGN_NONE 0
321
322 /* ----------
323 * Tests
324 * ----------
325 */
326 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
327 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
328 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
329 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
330 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
331 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
332 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
333 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
334 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
335 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
336 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
337 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
338
339 /* ----------
340 * Format picture cache
341 *
342 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
343 * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
344 *
345 * For simplicity, the cache entries are fixed-size, so they allow for the
346 * worst case of a FormatNode for each byte in the picture string.
347 *
348 * The max number of entries in the caches is DCH_CACHE_ENTRIES
349 * resp. NUM_CACHE_ENTRIES.
350 * ----------
351 */
352 #define NUM_CACHE_SIZE 64
353 #define NUM_CACHE_ENTRIES 20
354 #define DCH_CACHE_SIZE 128
355 #define DCH_CACHE_ENTRIES 20
356
357 typedef struct
358 {
359 FormatNode format[DCH_CACHE_SIZE + 1];
360 char str[DCH_CACHE_SIZE + 1];
361 bool valid;
362 int age;
363 } DCHCacheEntry;
364
365 typedef struct
366 {
367 FormatNode format[NUM_CACHE_SIZE + 1];
368 char str[NUM_CACHE_SIZE + 1];
369 bool valid;
370 int age;
371 NUMDesc Num;
372 } NUMCacheEntry;
373
374 /* global cache for date/time format pictures */
375 static DCHCacheEntry DCHCache[DCH_CACHE_ENTRIES];
376 static int n_DCHCache = 0; /* current number of entries */
377 static int DCHCounter = 0; /* aging-event counter */
378
379 /* global cache for number format pictures */
380 static NUMCacheEntry NUMCache[NUM_CACHE_ENTRIES];
381 static int n_NUMCache = 0; /* current number of entries */
382 static int NUMCounter = 0; /* aging-event counter */
383
384 /* ----------
385 * For char->date/time conversion
386 * ----------
387 */
388 typedef struct
389 {
390 FromCharDateMode mode;
391 int hh,
392 pm,
393 mi,
394 ss,
395 ssss,
396 d, /* stored as 1-7, Sunday = 1, 0 means missing */
397 dd,
398 ddd,
399 mm,
400 ms,
401 year,
402 bc,
403 ww,
404 w,
405 cc,
406 j,
407 us,
408 yysz, /* is it YY or YYYY ? */
409 clock, /* 12 or 24 hour clock? */
410 tzsign, /* +1, -1 or 0 if timezone info is absent */
411 tzh,
412 tzm;
413 } TmFromChar;
414
415 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
416
417 /* ----------
418 * Debug
419 * ----------
420 */
421 #ifdef DEBUG_TO_FROM_CHAR
422 #define DEBUG_TMFC(_X) \
423 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
424 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
425 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
426 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
427 (_X)->yysz, (_X)->clock)
428 #define DEBUG_TM(_X) \
429 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
430 (_X)->tm_sec, (_X)->tm_year,\
431 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
432 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
433 #else
434 #define DEBUG_TMFC(_X)
435 #define DEBUG_TM(_X)
436 #endif
437
438 /* ----------
439 * Datetime to char conversion
440 * ----------
441 */
442 typedef struct TmToChar
443 {
444 struct pg_tm tm; /* classic 'tm' struct */
445 fsec_t fsec; /* fractional seconds */
446 const char *tzn; /* timezone */
447 } TmToChar;
448
449 #define tmtcTm(_X) (&(_X)->tm)
450 #define tmtcTzn(_X) ((_X)->tzn)
451 #define tmtcFsec(_X) ((_X)->fsec)
452
453 #define ZERO_tm(_X) \
454 do { \
455 (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
456 (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
457 (_X)->tm_mday = (_X)->tm_mon = 1; \
458 (_X)->tm_zone = NULL; \
459 } while(0)
460
461 #define ZERO_tmtc(_X) \
462 do { \
463 ZERO_tm( tmtcTm(_X) ); \
464 tmtcFsec(_X) = 0; \
465 tmtcTzn(_X) = NULL; \
466 } while(0)
467
468 /*
469 * to_char(time) appears to to_char() as an interval, so this check
470 * is really for interval and time data types.
471 */
472 #define INVALID_FOR_INTERVAL \
473 do { \
474 if (is_interval) \
475 ereport(ERROR, \
476 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
477 errmsg("invalid format specification for an interval value"), \
478 errhint("Intervals are not tied to specific calendar dates."))); \
479 } while(0)
480
481 /*****************************************************************************
482 * KeyWord definitions
483 *****************************************************************************/
484
485 /* ----------
486 * Suffixes:
487 * ----------
488 */
489 #define DCH_S_FM 0x01
490 #define DCH_S_TH 0x02
491 #define DCH_S_th 0x04
492 #define DCH_S_SP 0x08
493 #define DCH_S_TM 0x10
494
495 /* ----------
496 * Suffix tests
497 * ----------
498 */
499 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
500 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
501 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
502 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
503
504 /* Oracle toggles FM behavior, we don't; see docs. */
505 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
506 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
507 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
508
509 /* ----------
510 * Suffixes definition for DATE-TIME TO/FROM CHAR
511 * ----------
512 */
513 #define TM_SUFFIX_LEN 2
514
515 static const KeySuffix DCH_suff[] = {
516 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
517 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
518 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
519 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
520 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
521 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
522 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
523 /* last */
524 {NULL, 0, 0, 0}
525 };
526
527
528 /* ----------
529 * Format-pictures (KeyWord).
530 *
531 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
532 * complicated -to-> easy:
533 *
534 * (example: "DDD","DD","Day","D" )
535 *
536 * (this specific sort needs the algorithm for sequential search for strings,
537 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
538 * or "HH12"? You must first try "HH12", because "HH" is in string, but
539 * it is not good.
540 *
541 * (!)
542 * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
543 * (!)
544 *
545 * For fast search is used the 'int index[]', index is ascii table from position
546 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
547 * position or -1 if char is not used in the KeyWord. Search example for
548 * string "MM":
549 * 1) see in index to index['M' - 32],
550 * 2) take keywords position (enum DCH_MI) from index
551 * 3) run sequential search in keywords[] from this position
552 *
553 * ----------
554 */
555
556 typedef enum
557 {
558 DCH_A_D,
559 DCH_A_M,
560 DCH_AD,
561 DCH_AM,
562 DCH_B_C,
563 DCH_BC,
564 DCH_CC,
565 DCH_DAY,
566 DCH_DDD,
567 DCH_DD,
568 DCH_DY,
569 DCH_Day,
570 DCH_Dy,
571 DCH_D,
572 DCH_FX, /* global suffix */
573 DCH_HH24,
574 DCH_HH12,
575 DCH_HH,
576 DCH_IDDD,
577 DCH_ID,
578 DCH_IW,
579 DCH_IYYY,
580 DCH_IYY,
581 DCH_IY,
582 DCH_I,
583 DCH_J,
584 DCH_MI,
585 DCH_MM,
586 DCH_MONTH,
587 DCH_MON,
588 DCH_MS,
589 DCH_Month,
590 DCH_Mon,
591 DCH_OF,
592 DCH_P_M,
593 DCH_PM,
594 DCH_Q,
595 DCH_RM,
596 DCH_SSSS,
597 DCH_SS,
598 DCH_TZH,
599 DCH_TZM,
600 DCH_TZ,
601 DCH_US,
602 DCH_WW,
603 DCH_W,
604 DCH_Y_YYY,
605 DCH_YYYY,
606 DCH_YYY,
607 DCH_YY,
608 DCH_Y,
609 DCH_a_d,
610 DCH_a_m,
611 DCH_ad,
612 DCH_am,
613 DCH_b_c,
614 DCH_bc,
615 DCH_cc,
616 DCH_day,
617 DCH_ddd,
618 DCH_dd,
619 DCH_dy,
620 DCH_d,
621 DCH_fx,
622 DCH_hh24,
623 DCH_hh12,
624 DCH_hh,
625 DCH_iddd,
626 DCH_id,
627 DCH_iw,
628 DCH_iyyy,
629 DCH_iyy,
630 DCH_iy,
631 DCH_i,
632 DCH_j,
633 DCH_mi,
634 DCH_mm,
635 DCH_month,
636 DCH_mon,
637 DCH_ms,
638 DCH_p_m,
639 DCH_pm,
640 DCH_q,
641 DCH_rm,
642 DCH_ssss,
643 DCH_ss,
644 DCH_tz,
645 DCH_us,
646 DCH_ww,
647 DCH_w,
648 DCH_y_yyy,
649 DCH_yyyy,
650 DCH_yyy,
651 DCH_yy,
652 DCH_y,
653
654 /* last */
655 _DCH_last_
656 } DCH_poz;
657
658 typedef enum
659 {
660 NUM_COMMA,
661 NUM_DEC,
662 NUM_0,
663 NUM_9,
664 NUM_B,
665 NUM_C,
666 NUM_D,
667 NUM_E,
668 NUM_FM,
669 NUM_G,
670 NUM_L,
671 NUM_MI,
672 NUM_PL,
673 NUM_PR,
674 NUM_RN,
675 NUM_SG,
676 NUM_SP,
677 NUM_S,
678 NUM_TH,
679 NUM_V,
680 NUM_b,
681 NUM_c,
682 NUM_d,
683 NUM_e,
684 NUM_fm,
685 NUM_g,
686 NUM_l,
687 NUM_mi,
688 NUM_pl,
689 NUM_pr,
690 NUM_rn,
691 NUM_sg,
692 NUM_sp,
693 NUM_s,
694 NUM_th,
695 NUM_v,
696
697 /* last */
698 _NUM_last_
699 } NUM_poz;
700
701 /* ----------
702 * KeyWords for DATE-TIME version
703 * ----------
704 */
705 static const KeyWord DCH_keywords[] = {
706 /* name, len, id, is_digit, date_mode */
707 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
708 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
709 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
710 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
711 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
712 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
713 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
714 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
715 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
716 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
717 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
718 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
719 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
720 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
721 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* F */
722 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
723 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
724 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
725 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
726 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
727 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
728 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
729 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
730 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
731 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
732 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
733 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
734 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
735 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
736 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
737 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
738 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
739 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
740 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
741 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
742 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
743 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
744 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
745 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
746 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
747 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
748 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
749 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
750 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
751 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
752 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
753 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
754 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
755 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
756 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
757 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
758 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
759 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
760 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
761 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
762 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
763 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
764 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
765 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
766 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
767 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
768 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
769 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
770 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* f */
771 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
772 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
773 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
774 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
775 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
776 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
777 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
778 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
779 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
780 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
781 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
782 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
783 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
784 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
785 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
786 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
787 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
788 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
789 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
790 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
791 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
792 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
793 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
794 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
795 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
796 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
797 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
798 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
799 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
800 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
801 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
802
803 /* last */
804 {NULL, 0, 0, 0, 0}
805 };
806
807 /* ----------
808 * KeyWords for NUMBER version
809 *
810 * The is_digit and date_mode fields are not relevant here.
811 * ----------
812 */
813 static const KeyWord NUM_keywords[] = {
814 /* name, len, id is in Index */
815 {",", 1, NUM_COMMA}, /* , */
816 {".", 1, NUM_DEC}, /* . */
817 {"0", 1, NUM_0}, /* 0 */
818 {"9", 1, NUM_9}, /* 9 */
819 {"B", 1, NUM_B}, /* B */
820 {"C", 1, NUM_C}, /* C */
821 {"D", 1, NUM_D}, /* D */
822 {"EEEE", 4, NUM_E}, /* E */
823 {"FM", 2, NUM_FM}, /* F */
824 {"G", 1, NUM_G}, /* G */
825 {"L", 1, NUM_L}, /* L */
826 {"MI", 2, NUM_MI}, /* M */
827 {"PL", 2, NUM_PL}, /* P */
828 {"PR", 2, NUM_PR},
829 {"RN", 2, NUM_RN}, /* R */
830 {"SG", 2, NUM_SG}, /* S */
831 {"SP", 2, NUM_SP},
832 {"S", 1, NUM_S},
833 {"TH", 2, NUM_TH}, /* T */
834 {"V", 1, NUM_V}, /* V */
835 {"b", 1, NUM_B}, /* b */
836 {"c", 1, NUM_C}, /* c */
837 {"d", 1, NUM_D}, /* d */
838 {"eeee", 4, NUM_E}, /* e */
839 {"fm", 2, NUM_FM}, /* f */
840 {"g", 1, NUM_G}, /* g */
841 {"l", 1, NUM_L}, /* l */
842 {"mi", 2, NUM_MI}, /* m */
843 {"pl", 2, NUM_PL}, /* p */
844 {"pr", 2, NUM_PR},
845 {"rn", 2, NUM_rn}, /* r */
846 {"sg", 2, NUM_SG}, /* s */
847 {"sp", 2, NUM_SP},
848 {"s", 1, NUM_S},
849 {"th", 2, NUM_th}, /* t */
850 {"v", 1, NUM_V}, /* v */
851
852 /* last */
853 {NULL, 0, 0}
854 };
855
856
857 /* ----------
858 * KeyWords index for DATE-TIME version
859 * ----------
860 */
861 static const int DCH_index[KeyWord_INDEX_SIZE] = {
862 /*
863 0 1 2 3 4 5 6 7 8 9
864 */
865 /*---- first 0..31 chars are skipped ----*/
866
867 -1, -1, -1, -1, -1, -1, -1, -1,
868 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
869 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
870 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
871 DCH_FX, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
872 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
873 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
874 DCH_day, -1, DCH_fx, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
875 -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_ssss, DCH_tz, DCH_us, -1, DCH_ww,
876 -1, DCH_y_yyy, -1, -1, -1, -1
877
878 /*---- chars over 126 are skipped ----*/
879 };
880
881 /* ----------
882 * KeyWords index for NUMBER version
883 * ----------
884 */
885 static const int NUM_index[KeyWord_INDEX_SIZE] = {
886 /*
887 0 1 2 3 4 5 6 7 8 9
888 */
889 /*---- first 0..31 chars are skipped ----*/
890
891 -1, -1, -1, -1, -1, -1, -1, -1,
892 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
893 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
894 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
895 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
896 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
897 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
898 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
899 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
900 -1, -1, -1, -1, -1, -1
901
902 /*---- chars over 126 are skipped ----*/
903 };
904
905 /* ----------
906 * Number processor struct
907 * ----------
908 */
909 typedef struct NUMProc
910 {
911 bool is_to_char;
912 NUMDesc *Num; /* number description */
913
914 int sign, /* '-' or '+' */
915 sign_wrote, /* was sign write */
916 num_count, /* number of write digits */
917 num_in, /* is inside number */
918 num_curr, /* current position in number */
919 out_pre_spaces, /* spaces before first digit */
920
921 read_dec, /* to_number - was read dec. point */
922 read_post, /* to_number - number of dec. digit */
923 read_pre; /* to_number - number non-dec. digit */
924
925 char *number, /* string with number */
926 *number_p, /* pointer to current number position */
927 *inout, /* in / out buffer */
928 *inout_p, /* pointer to current inout position */
929 *last_relevant, /* last relevant number after decimal point */
930
931 *L_negative_sign, /* Locale */
932 *L_positive_sign,
933 *decimal,
934 *L_thousands_sep,
935 *L_currency_symbol;
936 } NUMProc;
937
938
939 /* ----------
940 * Functions
941 * ----------
942 */
943 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
944 const int *index);
945 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
946 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
947 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
948 const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
949
950 static void DCH_to_char(FormatNode *node, bool is_interval,
951 TmToChar *in, char *out, Oid collid);
952 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out);
953
954 #ifdef DEBUG_TO_FROM_CHAR
955 static void dump_index(const KeyWord *k, const int *index);
956 static void dump_node(FormatNode *node, int max);
957 #endif
958
959 static const char *get_th(char *num, int type);
960 static char *str_numth(char *dest, char *num, int type);
961 static int adjust_partial_year_to_2020(int year);
962 static int strspace_len(const char *str);
963 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode);
964 static void from_char_set_int(int *dest, const int value, const FormatNode *node);
965 static int from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node);
966 static int from_char_parse_int(int *dest, const char **src, FormatNode *node);
967 static int seq_search(const char *name, const char *const *array, int *len);
968 static int from_char_seq_search(int *dest, const char **src,
969 const char *const *array,
970 FormatNode *node);
971 static void do_to_timestamp(text *date_txt, text *fmt,
972 struct pg_tm *tm, fsec_t *fsec);
973 static char *fill_str(char *str, int c, int max);
974 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
975 static char *int_to_roman(int number);
976 static void NUM_prepare_locale(NUMProc *Np);
977 static char *get_last_relevant_decnum(char *num);
978 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
979 static void NUM_numpart_to_char(NUMProc *Np, int id);
980 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
981 char *number, int input_len, int to_char_out_pre_spaces,
982 int sign, bool is_to_char, Oid collid);
983 static DCHCacheEntry *DCH_cache_getnew(const char *str);
984 static DCHCacheEntry *DCH_cache_search(const char *str);
985 static DCHCacheEntry *DCH_cache_fetch(const char *str);
986 static NUMCacheEntry *NUM_cache_getnew(const char *str);
987 static NUMCacheEntry *NUM_cache_search(const char *str);
988 static NUMCacheEntry *NUM_cache_fetch(const char *str);
989
990
991 /* ----------
992 * Fast sequential search, use index for data selection which
993 * go to seq. cycle (it is very fast for unwanted strings)
994 * (can't be used binary search in format parsing)
995 * ----------
996 */
997 static const KeyWord *
index_seq_search(const char * str,const KeyWord * kw,const int * index)998 index_seq_search(const char *str, const KeyWord *kw, const int *index)
999 {
1000 int poz;
1001
1002 if (!KeyWord_INDEX_FILTER(*str))
1003 return NULL;
1004
1005 if ((poz = *(index + (*str - ' '))) > -1)
1006 {
1007 const KeyWord *k = kw + poz;
1008
1009 do
1010 {
1011 if (strncmp(str, k->name, k->len) == 0)
1012 return k;
1013 k++;
1014 if (!k->name)
1015 return NULL;
1016 } while (*str == *k->name);
1017 }
1018 return NULL;
1019 }
1020
1021 static const KeySuffix *
suff_search(const char * str,const KeySuffix * suf,int type)1022 suff_search(const char *str, const KeySuffix *suf, int type)
1023 {
1024 const KeySuffix *s;
1025
1026 for (s = suf; s->name != NULL; s++)
1027 {
1028 if (s->type != type)
1029 continue;
1030
1031 if (strncmp(str, s->name, s->len) == 0)
1032 return s;
1033 }
1034 return NULL;
1035 }
1036
1037 /* ----------
1038 * Prepare NUMDesc (number description struct) via FormatNode struct
1039 * ----------
1040 */
1041 static void
NUMDesc_prepare(NUMDesc * num,FormatNode * n)1042 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1043 {
1044 if (n->type != NODE_TYPE_ACTION)
1045 return;
1046
1047 if (IS_EEEE(num) && n->key->id != NUM_E)
1048 ereport(ERROR,
1049 (errcode(ERRCODE_SYNTAX_ERROR),
1050 errmsg("\"EEEE\" must be the last pattern used")));
1051
1052 switch (n->key->id)
1053 {
1054 case NUM_9:
1055 if (IS_BRACKET(num))
1056 ereport(ERROR,
1057 (errcode(ERRCODE_SYNTAX_ERROR),
1058 errmsg("\"9\" must be ahead of \"PR\"")));
1059 if (IS_MULTI(num))
1060 {
1061 ++num->multi;
1062 break;
1063 }
1064 if (IS_DECIMAL(num))
1065 ++num->post;
1066 else
1067 ++num->pre;
1068 break;
1069
1070 case NUM_0:
1071 if (IS_BRACKET(num))
1072 ereport(ERROR,
1073 (errcode(ERRCODE_SYNTAX_ERROR),
1074 errmsg("\"0\" must be ahead of \"PR\"")));
1075 if (!IS_ZERO(num) && !IS_DECIMAL(num))
1076 {
1077 num->flag |= NUM_F_ZERO;
1078 num->zero_start = num->pre + 1;
1079 }
1080 if (!IS_DECIMAL(num))
1081 ++num->pre;
1082 else
1083 ++num->post;
1084
1085 num->zero_end = num->pre + num->post;
1086 break;
1087
1088 case NUM_B:
1089 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1090 num->flag |= NUM_F_BLANK;
1091 break;
1092
1093 case NUM_D:
1094 num->flag |= NUM_F_LDECIMAL;
1095 num->need_locale = true;
1096 /* FALLTHROUGH */
1097 case NUM_DEC:
1098 if (IS_DECIMAL(num))
1099 ereport(ERROR,
1100 (errcode(ERRCODE_SYNTAX_ERROR),
1101 errmsg("multiple decimal points")));
1102 if (IS_MULTI(num))
1103 ereport(ERROR,
1104 (errcode(ERRCODE_SYNTAX_ERROR),
1105 errmsg("cannot use \"V\" and decimal point together")));
1106 num->flag |= NUM_F_DECIMAL;
1107 break;
1108
1109 case NUM_FM:
1110 num->flag |= NUM_F_FILLMODE;
1111 break;
1112
1113 case NUM_S:
1114 if (IS_LSIGN(num))
1115 ereport(ERROR,
1116 (errcode(ERRCODE_SYNTAX_ERROR),
1117 errmsg("cannot use \"S\" twice")));
1118 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1119 ereport(ERROR,
1120 (errcode(ERRCODE_SYNTAX_ERROR),
1121 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1122 if (!IS_DECIMAL(num))
1123 {
1124 num->lsign = NUM_LSIGN_PRE;
1125 num->pre_lsign_num = num->pre;
1126 num->need_locale = true;
1127 num->flag |= NUM_F_LSIGN;
1128 }
1129 else if (num->lsign == NUM_LSIGN_NONE)
1130 {
1131 num->lsign = NUM_LSIGN_POST;
1132 num->need_locale = true;
1133 num->flag |= NUM_F_LSIGN;
1134 }
1135 break;
1136
1137 case NUM_MI:
1138 if (IS_LSIGN(num))
1139 ereport(ERROR,
1140 (errcode(ERRCODE_SYNTAX_ERROR),
1141 errmsg("cannot use \"S\" and \"MI\" together")));
1142 num->flag |= NUM_F_MINUS;
1143 if (IS_DECIMAL(num))
1144 num->flag |= NUM_F_MINUS_POST;
1145 break;
1146
1147 case NUM_PL:
1148 if (IS_LSIGN(num))
1149 ereport(ERROR,
1150 (errcode(ERRCODE_SYNTAX_ERROR),
1151 errmsg("cannot use \"S\" and \"PL\" together")));
1152 num->flag |= NUM_F_PLUS;
1153 if (IS_DECIMAL(num))
1154 num->flag |= NUM_F_PLUS_POST;
1155 break;
1156
1157 case NUM_SG:
1158 if (IS_LSIGN(num))
1159 ereport(ERROR,
1160 (errcode(ERRCODE_SYNTAX_ERROR),
1161 errmsg("cannot use \"S\" and \"SG\" together")));
1162 num->flag |= NUM_F_MINUS;
1163 num->flag |= NUM_F_PLUS;
1164 break;
1165
1166 case NUM_PR:
1167 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1168 ereport(ERROR,
1169 (errcode(ERRCODE_SYNTAX_ERROR),
1170 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1171 num->flag |= NUM_F_BRACKET;
1172 break;
1173
1174 case NUM_rn:
1175 case NUM_RN:
1176 num->flag |= NUM_F_ROMAN;
1177 break;
1178
1179 case NUM_L:
1180 case NUM_G:
1181 num->need_locale = true;
1182 break;
1183
1184 case NUM_V:
1185 if (IS_DECIMAL(num))
1186 ereport(ERROR,
1187 (errcode(ERRCODE_SYNTAX_ERROR),
1188 errmsg("cannot use \"V\" and decimal point together")));
1189 num->flag |= NUM_F_MULTI;
1190 break;
1191
1192 case NUM_E:
1193 if (IS_EEEE(num))
1194 ereport(ERROR,
1195 (errcode(ERRCODE_SYNTAX_ERROR),
1196 errmsg("cannot use \"EEEE\" twice")));
1197 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1198 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1199 IS_ROMAN(num) || IS_MULTI(num))
1200 ereport(ERROR,
1201 (errcode(ERRCODE_SYNTAX_ERROR),
1202 errmsg("\"EEEE\" is incompatible with other formats"),
1203 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1204 num->flag |= NUM_F_EEEE;
1205 break;
1206 }
1207 }
1208
1209 /* ----------
1210 * Format parser, search small keywords and keyword's suffixes, and make
1211 * format-node tree.
1212 *
1213 * for DATE-TIME & NUMBER version
1214 * ----------
1215 */
1216 static void
parse_format(FormatNode * node,const char * str,const KeyWord * kw,const KeySuffix * suf,const int * index,int ver,NUMDesc * Num)1217 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1218 const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
1219 {
1220 FormatNode *n;
1221
1222 #ifdef DEBUG_TO_FROM_CHAR
1223 elog(DEBUG_elog_output, "to_char/number(): run parser");
1224 #endif
1225
1226 n = node;
1227
1228 while (*str)
1229 {
1230 int suffix = 0;
1231 const KeySuffix *s;
1232
1233 /*
1234 * Prefix
1235 */
1236 if (ver == DCH_TYPE &&
1237 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1238 {
1239 suffix |= s->id;
1240 if (s->len)
1241 str += s->len;
1242 }
1243
1244 /*
1245 * Keyword
1246 */
1247 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1248 {
1249 n->type = NODE_TYPE_ACTION;
1250 n->suffix = suffix;
1251 if (n->key->len)
1252 str += n->key->len;
1253
1254 /*
1255 * NUM version: Prepare global NUMDesc struct
1256 */
1257 if (ver == NUM_TYPE)
1258 NUMDesc_prepare(Num, n);
1259
1260 /*
1261 * Postfix
1262 */
1263 if (ver == DCH_TYPE && *str &&
1264 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1265 {
1266 n->suffix |= s->id;
1267 if (s->len)
1268 str += s->len;
1269 }
1270
1271 n++;
1272 }
1273 else if (*str)
1274 {
1275 int chlen;
1276
1277 /*
1278 * Process double-quoted literal string, if any
1279 */
1280 if (*str == '"')
1281 {
1282 str++;
1283 while (*str)
1284 {
1285 if (*str == '"')
1286 {
1287 str++;
1288 break;
1289 }
1290 /* backslash quotes the next character, if any */
1291 if (*str == '\\' && *(str + 1))
1292 str++;
1293 chlen = pg_mblen(str);
1294 n->type = NODE_TYPE_CHAR;
1295 memcpy(n->character, str, chlen);
1296 n->character[chlen] = '\0';
1297 n->key = NULL;
1298 n->suffix = 0;
1299 n++;
1300 str += chlen;
1301 }
1302 }
1303 else
1304 {
1305 /*
1306 * Outside double-quoted strings, backslash is only special if
1307 * it immediately precedes a double quote.
1308 */
1309 if (*str == '\\' && *(str + 1) == '"')
1310 str++;
1311 chlen = pg_mblen(str);
1312 n->type = NODE_TYPE_CHAR;
1313 memcpy(n->character, str, chlen);
1314 n->character[chlen] = '\0';
1315 n->key = NULL;
1316 n->suffix = 0;
1317 n++;
1318 str += chlen;
1319 }
1320 }
1321 }
1322
1323 n->type = NODE_TYPE_END;
1324 n->suffix = 0;
1325 }
1326
1327 /* ----------
1328 * DEBUG: Dump the FormatNode Tree (debug)
1329 * ----------
1330 */
1331 #ifdef DEBUG_TO_FROM_CHAR
1332
1333 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1334 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1335
1336 static void
dump_node(FormatNode * node,int max)1337 dump_node(FormatNode *node, int max)
1338 {
1339 FormatNode *n;
1340 int a;
1341
1342 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1343
1344 for (a = 0, n = node; a <= max; n++, a++)
1345 {
1346 if (n->type == NODE_TYPE_ACTION)
1347 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1348 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1349 else if (n->type == NODE_TYPE_CHAR)
1350 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1351 a, n->character);
1352 else if (n->type == NODE_TYPE_END)
1353 {
1354 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1355 return;
1356 }
1357 else
1358 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1359 }
1360 }
1361 #endif /* DEBUG */
1362
1363 /*****************************************************************************
1364 * Private utils
1365 *****************************************************************************/
1366
1367 /* ----------
1368 * Return ST/ND/RD/TH for simple (1..9) numbers
1369 * type --> 0 upper, 1 lower
1370 * ----------
1371 */
1372 static const char *
get_th(char * num,int type)1373 get_th(char *num, int type)
1374 {
1375 int len = strlen(num),
1376 last,
1377 seclast;
1378
1379 last = *(num + (len - 1));
1380 if (!isdigit((unsigned char) last))
1381 ereport(ERROR,
1382 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1383 errmsg("\"%s\" is not a number", num)));
1384
1385 /*
1386 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1387 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1388 */
1389 if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1390 last = 0;
1391
1392 switch (last)
1393 {
1394 case '1':
1395 if (type == TH_UPPER)
1396 return numTH[0];
1397 return numth[0];
1398 case '2':
1399 if (type == TH_UPPER)
1400 return numTH[1];
1401 return numth[1];
1402 case '3':
1403 if (type == TH_UPPER)
1404 return numTH[2];
1405 return numth[2];
1406 default:
1407 if (type == TH_UPPER)
1408 return numTH[3];
1409 return numth[3];
1410 }
1411 }
1412
1413 /* ----------
1414 * Convert string-number to ordinal string-number
1415 * type --> 0 upper, 1 lower
1416 * ----------
1417 */
1418 static char *
str_numth(char * dest,char * num,int type)1419 str_numth(char *dest, char *num, int type)
1420 {
1421 if (dest != num)
1422 strcpy(dest, num);
1423 strcat(dest, get_th(num, type));
1424 return dest;
1425 }
1426
1427 /*****************************************************************************
1428 * upper/lower/initcap functions
1429 *****************************************************************************/
1430
1431 #ifdef USE_ICU
1432
1433 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1434 const UChar *src, int32_t srcLength,
1435 const char *locale,
1436 UErrorCode *pErrorCode);
1437
1438 static int32_t
icu_convert_case(ICU_Convert_Func func,pg_locale_t mylocale,UChar ** buff_dest,UChar * buff_source,int32_t len_source)1439 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1440 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1441 {
1442 UErrorCode status;
1443 int32_t len_dest;
1444
1445 len_dest = len_source; /* try first with same length */
1446 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1447 status = U_ZERO_ERROR;
1448 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1449 mylocale->info.icu.locale, &status);
1450 if (status == U_BUFFER_OVERFLOW_ERROR)
1451 {
1452 /* try again with adjusted length */
1453 pfree(*buff_dest);
1454 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1455 status = U_ZERO_ERROR;
1456 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1457 mylocale->info.icu.locale, &status);
1458 }
1459 if (U_FAILURE(status))
1460 ereport(ERROR,
1461 (errmsg("case conversion failed: %s", u_errorName(status))));
1462 return len_dest;
1463 }
1464
1465 static int32_t
u_strToTitle_default_BI(UChar * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,const char * locale,UErrorCode * pErrorCode)1466 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1467 const UChar *src, int32_t srcLength,
1468 const char *locale,
1469 UErrorCode *pErrorCode)
1470 {
1471 return u_strToTitle(dest, destCapacity, src, srcLength,
1472 NULL, locale, pErrorCode);
1473 }
1474
1475 #endif /* USE_ICU */
1476
1477 /*
1478 * If the system provides the needed functions for wide-character manipulation
1479 * (which are all standardized by C99), then we implement upper/lower/initcap
1480 * using wide-character functions, if necessary. Otherwise we use the
1481 * traditional <ctype.h> functions, which of course will not work as desired
1482 * in multibyte character sets. Note that in either case we are effectively
1483 * assuming that the database character encoding matches the encoding implied
1484 * by LC_CTYPE.
1485 *
1486 * If the system provides locale_t and associated functions (which are
1487 * standardized by Open Group's XBD), we can support collations that are
1488 * neither default nor C. The code is written to handle both combinations
1489 * of have-wide-characters and have-locale_t, though it's rather unlikely
1490 * a platform would have the latter without the former.
1491 */
1492
1493 /*
1494 * collation-aware, wide-character-aware lower function
1495 *
1496 * We pass the number of bytes so we can pass varlena and char*
1497 * to this function. The result is a palloc'd, null-terminated string.
1498 */
1499 char *
str_tolower(const char * buff,size_t nbytes,Oid collid)1500 str_tolower(const char *buff, size_t nbytes, Oid collid)
1501 {
1502 char *result;
1503
1504 if (!buff)
1505 return NULL;
1506
1507 /* C/POSIX collations use this path regardless of database encoding */
1508 if (lc_ctype_is_c(collid))
1509 {
1510 result = asc_tolower(buff, nbytes);
1511 }
1512 else
1513 {
1514 pg_locale_t mylocale = 0;
1515
1516 if (collid != DEFAULT_COLLATION_OID)
1517 {
1518 if (!OidIsValid(collid))
1519 {
1520 /*
1521 * This typically means that the parser could not resolve a
1522 * conflict of implicit collations, so report it that way.
1523 */
1524 ereport(ERROR,
1525 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1526 errmsg("could not determine which collation to use for lower() function"),
1527 errhint("Use the COLLATE clause to set the collation explicitly.")));
1528 }
1529 mylocale = pg_newlocale_from_collation(collid);
1530 }
1531
1532 #ifdef USE_ICU
1533 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1534 {
1535 int32_t len_uchar;
1536 int32_t len_conv;
1537 UChar *buff_uchar;
1538 UChar *buff_conv;
1539
1540 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1541 len_conv = icu_convert_case(u_strToLower, mylocale,
1542 &buff_conv, buff_uchar, len_uchar);
1543 icu_from_uchar(&result, buff_conv, len_conv);
1544 pfree(buff_uchar);
1545 pfree(buff_conv);
1546 }
1547 else
1548 #endif
1549 {
1550 if (pg_database_encoding_max_length() > 1)
1551 {
1552 wchar_t *workspace;
1553 size_t curr_char;
1554 size_t result_size;
1555
1556 /* Overflow paranoia */
1557 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1558 ereport(ERROR,
1559 (errcode(ERRCODE_OUT_OF_MEMORY),
1560 errmsg("out of memory")));
1561
1562 /* Output workspace cannot have more codes than input bytes */
1563 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1564
1565 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1566
1567 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1568 {
1569 #ifdef HAVE_LOCALE_T
1570 if (mylocale)
1571 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1572 else
1573 #endif
1574 workspace[curr_char] = towlower(workspace[curr_char]);
1575 }
1576
1577 /*
1578 * Make result large enough; case change might change number
1579 * of bytes
1580 */
1581 result_size = curr_char * pg_database_encoding_max_length() + 1;
1582 result = palloc(result_size);
1583
1584 wchar2char(result, workspace, result_size, mylocale);
1585 pfree(workspace);
1586 }
1587 else
1588 {
1589 char *p;
1590
1591 result = pnstrdup(buff, nbytes);
1592
1593 /*
1594 * Note: we assume that tolower_l() will not be so broken as
1595 * to need an isupper_l() guard test. When using the default
1596 * collation, we apply the traditional Postgres behavior that
1597 * forces ASCII-style treatment of I/i, but in non-default
1598 * collations you get exactly what the collation says.
1599 */
1600 for (p = result; *p; p++)
1601 {
1602 #ifdef HAVE_LOCALE_T
1603 if (mylocale)
1604 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1605 else
1606 #endif
1607 *p = pg_tolower((unsigned char) *p);
1608 }
1609 }
1610 }
1611 }
1612
1613 return result;
1614 }
1615
1616 /*
1617 * collation-aware, wide-character-aware upper function
1618 *
1619 * We pass the number of bytes so we can pass varlena and char*
1620 * to this function. The result is a palloc'd, null-terminated string.
1621 */
1622 char *
str_toupper(const char * buff,size_t nbytes,Oid collid)1623 str_toupper(const char *buff, size_t nbytes, Oid collid)
1624 {
1625 char *result;
1626
1627 if (!buff)
1628 return NULL;
1629
1630 /* C/POSIX collations use this path regardless of database encoding */
1631 if (lc_ctype_is_c(collid))
1632 {
1633 result = asc_toupper(buff, nbytes);
1634 }
1635 else
1636 {
1637 pg_locale_t mylocale = 0;
1638
1639 if (collid != DEFAULT_COLLATION_OID)
1640 {
1641 if (!OidIsValid(collid))
1642 {
1643 /*
1644 * This typically means that the parser could not resolve a
1645 * conflict of implicit collations, so report it that way.
1646 */
1647 ereport(ERROR,
1648 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1649 errmsg("could not determine which collation to use for upper() function"),
1650 errhint("Use the COLLATE clause to set the collation explicitly.")));
1651 }
1652 mylocale = pg_newlocale_from_collation(collid);
1653 }
1654
1655 #ifdef USE_ICU
1656 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1657 {
1658 int32_t len_uchar,
1659 len_conv;
1660 UChar *buff_uchar;
1661 UChar *buff_conv;
1662
1663 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1664 len_conv = icu_convert_case(u_strToUpper, mylocale,
1665 &buff_conv, buff_uchar, len_uchar);
1666 icu_from_uchar(&result, buff_conv, len_conv);
1667 pfree(buff_uchar);
1668 pfree(buff_conv);
1669 }
1670 else
1671 #endif
1672 {
1673 if (pg_database_encoding_max_length() > 1)
1674 {
1675 wchar_t *workspace;
1676 size_t curr_char;
1677 size_t result_size;
1678
1679 /* Overflow paranoia */
1680 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1681 ereport(ERROR,
1682 (errcode(ERRCODE_OUT_OF_MEMORY),
1683 errmsg("out of memory")));
1684
1685 /* Output workspace cannot have more codes than input bytes */
1686 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1687
1688 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1689
1690 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1691 {
1692 #ifdef HAVE_LOCALE_T
1693 if (mylocale)
1694 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1695 else
1696 #endif
1697 workspace[curr_char] = towupper(workspace[curr_char]);
1698 }
1699
1700 /*
1701 * Make result large enough; case change might change number
1702 * of bytes
1703 */
1704 result_size = curr_char * pg_database_encoding_max_length() + 1;
1705 result = palloc(result_size);
1706
1707 wchar2char(result, workspace, result_size, mylocale);
1708 pfree(workspace);
1709 }
1710 else
1711 {
1712 char *p;
1713
1714 result = pnstrdup(buff, nbytes);
1715
1716 /*
1717 * Note: we assume that toupper_l() will not be so broken as
1718 * to need an islower_l() guard test. When using the default
1719 * collation, we apply the traditional Postgres behavior that
1720 * forces ASCII-style treatment of I/i, but in non-default
1721 * collations you get exactly what the collation says.
1722 */
1723 for (p = result; *p; p++)
1724 {
1725 #ifdef HAVE_LOCALE_T
1726 if (mylocale)
1727 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1728 else
1729 #endif
1730 *p = pg_toupper((unsigned char) *p);
1731 }
1732 }
1733 }
1734 }
1735
1736 return result;
1737 }
1738
1739 /*
1740 * collation-aware, wide-character-aware initcap function
1741 *
1742 * We pass the number of bytes so we can pass varlena and char*
1743 * to this function. The result is a palloc'd, null-terminated string.
1744 */
1745 char *
str_initcap(const char * buff,size_t nbytes,Oid collid)1746 str_initcap(const char *buff, size_t nbytes, Oid collid)
1747 {
1748 char *result;
1749 int wasalnum = false;
1750
1751 if (!buff)
1752 return NULL;
1753
1754 /* C/POSIX collations use this path regardless of database encoding */
1755 if (lc_ctype_is_c(collid))
1756 {
1757 result = asc_initcap(buff, nbytes);
1758 }
1759 else
1760 {
1761 pg_locale_t mylocale = 0;
1762
1763 if (collid != DEFAULT_COLLATION_OID)
1764 {
1765 if (!OidIsValid(collid))
1766 {
1767 /*
1768 * This typically means that the parser could not resolve a
1769 * conflict of implicit collations, so report it that way.
1770 */
1771 ereport(ERROR,
1772 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1773 errmsg("could not determine which collation to use for initcap() function"),
1774 errhint("Use the COLLATE clause to set the collation explicitly.")));
1775 }
1776 mylocale = pg_newlocale_from_collation(collid);
1777 }
1778
1779 #ifdef USE_ICU
1780 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1781 {
1782 int32_t len_uchar,
1783 len_conv;
1784 UChar *buff_uchar;
1785 UChar *buff_conv;
1786
1787 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1788 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1789 &buff_conv, buff_uchar, len_uchar);
1790 icu_from_uchar(&result, buff_conv, len_conv);
1791 pfree(buff_uchar);
1792 pfree(buff_conv);
1793 }
1794 else
1795 #endif
1796 {
1797 if (pg_database_encoding_max_length() > 1)
1798 {
1799 wchar_t *workspace;
1800 size_t curr_char;
1801 size_t result_size;
1802
1803 /* Overflow paranoia */
1804 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1805 ereport(ERROR,
1806 (errcode(ERRCODE_OUT_OF_MEMORY),
1807 errmsg("out of memory")));
1808
1809 /* Output workspace cannot have more codes than input bytes */
1810 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1811
1812 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1813
1814 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1815 {
1816 #ifdef HAVE_LOCALE_T
1817 if (mylocale)
1818 {
1819 if (wasalnum)
1820 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1821 else
1822 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1823 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1824 }
1825 else
1826 #endif
1827 {
1828 if (wasalnum)
1829 workspace[curr_char] = towlower(workspace[curr_char]);
1830 else
1831 workspace[curr_char] = towupper(workspace[curr_char]);
1832 wasalnum = iswalnum(workspace[curr_char]);
1833 }
1834 }
1835
1836 /*
1837 * Make result large enough; case change might change number
1838 * of bytes
1839 */
1840 result_size = curr_char * pg_database_encoding_max_length() + 1;
1841 result = palloc(result_size);
1842
1843 wchar2char(result, workspace, result_size, mylocale);
1844 pfree(workspace);
1845 }
1846 else
1847 {
1848 char *p;
1849
1850 result = pnstrdup(buff, nbytes);
1851
1852 /*
1853 * Note: we assume that toupper_l()/tolower_l() will not be so
1854 * broken as to need guard tests. When using the default
1855 * collation, we apply the traditional Postgres behavior that
1856 * forces ASCII-style treatment of I/i, but in non-default
1857 * collations you get exactly what the collation says.
1858 */
1859 for (p = result; *p; p++)
1860 {
1861 #ifdef HAVE_LOCALE_T
1862 if (mylocale)
1863 {
1864 if (wasalnum)
1865 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1866 else
1867 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1868 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
1869 }
1870 else
1871 #endif
1872 {
1873 if (wasalnum)
1874 *p = pg_tolower((unsigned char) *p);
1875 else
1876 *p = pg_toupper((unsigned char) *p);
1877 wasalnum = isalnum((unsigned char) *p);
1878 }
1879 }
1880 }
1881 }
1882 }
1883
1884 return result;
1885 }
1886
1887 /*
1888 * ASCII-only lower function
1889 *
1890 * We pass the number of bytes so we can pass varlena and char*
1891 * to this function. The result is a palloc'd, null-terminated string.
1892 */
1893 char *
asc_tolower(const char * buff,size_t nbytes)1894 asc_tolower(const char *buff, size_t nbytes)
1895 {
1896 char *result;
1897 char *p;
1898
1899 if (!buff)
1900 return NULL;
1901
1902 result = pnstrdup(buff, nbytes);
1903
1904 for (p = result; *p; p++)
1905 *p = pg_ascii_tolower((unsigned char) *p);
1906
1907 return result;
1908 }
1909
1910 /*
1911 * ASCII-only upper function
1912 *
1913 * We pass the number of bytes so we can pass varlena and char*
1914 * to this function. The result is a palloc'd, null-terminated string.
1915 */
1916 char *
asc_toupper(const char * buff,size_t nbytes)1917 asc_toupper(const char *buff, size_t nbytes)
1918 {
1919 char *result;
1920 char *p;
1921
1922 if (!buff)
1923 return NULL;
1924
1925 result = pnstrdup(buff, nbytes);
1926
1927 for (p = result; *p; p++)
1928 *p = pg_ascii_toupper((unsigned char) *p);
1929
1930 return result;
1931 }
1932
1933 /*
1934 * ASCII-only initcap function
1935 *
1936 * We pass the number of bytes so we can pass varlena and char*
1937 * to this function. The result is a palloc'd, null-terminated string.
1938 */
1939 char *
asc_initcap(const char * buff,size_t nbytes)1940 asc_initcap(const char *buff, size_t nbytes)
1941 {
1942 char *result;
1943 char *p;
1944 int wasalnum = false;
1945
1946 if (!buff)
1947 return NULL;
1948
1949 result = pnstrdup(buff, nbytes);
1950
1951 for (p = result; *p; p++)
1952 {
1953 char c;
1954
1955 if (wasalnum)
1956 *p = c = pg_ascii_tolower((unsigned char) *p);
1957 else
1958 *p = c = pg_ascii_toupper((unsigned char) *p);
1959 /* we don't trust isalnum() here */
1960 wasalnum = ((c >= 'A' && c <= 'Z') ||
1961 (c >= 'a' && c <= 'z') ||
1962 (c >= '0' && c <= '9'));
1963 }
1964
1965 return result;
1966 }
1967
1968 /* convenience routines for when the input is null-terminated */
1969
1970 static char *
str_tolower_z(const char * buff,Oid collid)1971 str_tolower_z(const char *buff, Oid collid)
1972 {
1973 return str_tolower(buff, strlen(buff), collid);
1974 }
1975
1976 static char *
str_toupper_z(const char * buff,Oid collid)1977 str_toupper_z(const char *buff, Oid collid)
1978 {
1979 return str_toupper(buff, strlen(buff), collid);
1980 }
1981
1982 static char *
str_initcap_z(const char * buff,Oid collid)1983 str_initcap_z(const char *buff, Oid collid)
1984 {
1985 return str_initcap(buff, strlen(buff), collid);
1986 }
1987
1988 static char *
asc_tolower_z(const char * buff)1989 asc_tolower_z(const char *buff)
1990 {
1991 return asc_tolower(buff, strlen(buff));
1992 }
1993
1994 static char *
asc_toupper_z(const char * buff)1995 asc_toupper_z(const char *buff)
1996 {
1997 return asc_toupper(buff, strlen(buff));
1998 }
1999
2000 /* asc_initcap_z is not currently needed */
2001
2002
2003 /* ----------
2004 * Skip TM / th in FROM_CHAR
2005 *
2006 * If S_THth is on, skip two chars, assuming there are two available
2007 * ----------
2008 */
2009 #define SKIP_THth(ptr, _suf) \
2010 do { \
2011 if (S_THth(_suf)) \
2012 { \
2013 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2014 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2015 } \
2016 } while (0)
2017
2018
2019 #ifdef DEBUG_TO_FROM_CHAR
2020 /* -----------
2021 * DEBUG: Call for debug and for index checking; (Show ASCII char
2022 * and defined keyword for each used position
2023 * ----------
2024 */
2025 static void
dump_index(const KeyWord * k,const int * index)2026 dump_index(const KeyWord *k, const int *index)
2027 {
2028 int i,
2029 count = 0,
2030 free_i = 0;
2031
2032 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2033
2034 for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2035 {
2036 if (index[i] != -1)
2037 {
2038 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2039 count++;
2040 }
2041 else
2042 {
2043 free_i++;
2044 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2045 }
2046 }
2047 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2048 count, free_i);
2049 }
2050 #endif /* DEBUG */
2051
2052 /* ----------
2053 * Return true if next format picture is not digit value
2054 * ----------
2055 */
2056 static bool
is_next_separator(FormatNode * n)2057 is_next_separator(FormatNode *n)
2058 {
2059 if (n->type == NODE_TYPE_END)
2060 return false;
2061
2062 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2063 return true;
2064
2065 /*
2066 * Next node
2067 */
2068 n++;
2069
2070 /* end of format string is treated like a non-digit separator */
2071 if (n->type == NODE_TYPE_END)
2072 return true;
2073
2074 if (n->type == NODE_TYPE_ACTION)
2075 {
2076 if (n->key->is_digit)
2077 return false;
2078
2079 return true;
2080 }
2081 else if (n->character[1] == '\0' &&
2082 isdigit((unsigned char) n->character[0]))
2083 return false;
2084
2085 return true; /* some non-digit input (separator) */
2086 }
2087
2088
2089 static int
adjust_partial_year_to_2020(int year)2090 adjust_partial_year_to_2020(int year)
2091 {
2092 /*
2093 * Adjust all dates toward 2020; this is effectively what happens when we
2094 * assume '70' is 1970 and '69' is 2069.
2095 */
2096 /* Force 0-69 into the 2000's */
2097 if (year < 70)
2098 return year + 2000;
2099 /* Force 70-99 into the 1900's */
2100 else if (year < 100)
2101 return year + 1900;
2102 /* Force 100-519 into the 2000's */
2103 else if (year < 520)
2104 return year + 2000;
2105 /* Force 520-999 into the 1000's */
2106 else if (year < 1000)
2107 return year + 1000;
2108 else
2109 return year;
2110 }
2111
2112
2113 static int
strspace_len(const char * str)2114 strspace_len(const char *str)
2115 {
2116 int len = 0;
2117
2118 while (*str && isspace((unsigned char) *str))
2119 {
2120 str++;
2121 len++;
2122 }
2123 return len;
2124 }
2125
2126 /*
2127 * Set the date mode of a from-char conversion.
2128 *
2129 * Puke if the date mode has already been set, and the caller attempts to set
2130 * it to a conflicting mode.
2131 */
2132 static void
from_char_set_mode(TmFromChar * tmfc,const FromCharDateMode mode)2133 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode)
2134 {
2135 if (mode != FROM_CHAR_DATE_NONE)
2136 {
2137 if (tmfc->mode == FROM_CHAR_DATE_NONE)
2138 tmfc->mode = mode;
2139 else if (tmfc->mode != mode)
2140 ereport(ERROR,
2141 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2142 errmsg("invalid combination of date conventions"),
2143 errhint("Do not mix Gregorian and ISO week date "
2144 "conventions in a formatting template.")));
2145 }
2146 }
2147
2148 /*
2149 * Set the integer pointed to by 'dest' to the given value.
2150 *
2151 * Puke if the destination integer has previously been set to some other
2152 * non-zero value.
2153 */
2154 static void
from_char_set_int(int * dest,const int value,const FormatNode * node)2155 from_char_set_int(int *dest, const int value, const FormatNode *node)
2156 {
2157 if (*dest != 0 && *dest != value)
2158 ereport(ERROR,
2159 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2160 errmsg("conflicting values for \"%s\" field in formatting string",
2161 node->key->name),
2162 errdetail("This value contradicts a previous setting for "
2163 "the same field type.")));
2164 *dest = value;
2165 }
2166
2167 /*
2168 * Read a single integer from the source string, into the int pointed to by
2169 * 'dest'. If 'dest' is NULL, the result is discarded.
2170 *
2171 * In fixed-width mode (the node does not have the FM suffix), consume at most
2172 * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2173 *
2174 * We use strtol() to recover the integer value from the source string, in
2175 * accordance with the given FormatNode.
2176 *
2177 * If the conversion completes successfully, src will have been advanced to
2178 * point at the character immediately following the last character used in the
2179 * conversion.
2180 *
2181 * Return the number of characters consumed.
2182 *
2183 * Note that from_char_parse_int() provides a more convenient wrapper where
2184 * the length of the field is the same as the length of the format keyword (as
2185 * with DD and MI).
2186 */
2187 static int
from_char_parse_int_len(int * dest,const char ** src,const int len,FormatNode * node)2188 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node)
2189 {
2190 long result;
2191 char copy[DCH_MAX_ITEM_SIZ + 1];
2192 const char *init = *src;
2193 int used;
2194
2195 /*
2196 * Skip any whitespace before parsing the integer.
2197 */
2198 *src += strspace_len(*src);
2199
2200 Assert(len <= DCH_MAX_ITEM_SIZ);
2201 used = (int) strlcpy(copy, *src, len + 1);
2202
2203 if (S_FM(node->suffix) || is_next_separator(node))
2204 {
2205 /*
2206 * This node is in Fill Mode, or the next node is known to be a
2207 * non-digit value, so we just slurp as many characters as we can get.
2208 */
2209 char *endptr;
2210
2211 errno = 0;
2212 result = strtol(init, &endptr, 10);
2213 *src = endptr;
2214 }
2215 else
2216 {
2217 /*
2218 * We need to pull exactly the number of characters given in 'len' out
2219 * of the string, and convert those.
2220 */
2221 char *last;
2222
2223 if (used < len)
2224 ereport(ERROR,
2225 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2226 errmsg("source string too short for \"%s\" formatting field",
2227 node->key->name),
2228 errdetail("Field requires %d characters, but only %d "
2229 "remain.",
2230 len, used),
2231 errhint("If your source string is not fixed-width, try "
2232 "using the \"FM\" modifier.")));
2233
2234 errno = 0;
2235 result = strtol(copy, &last, 10);
2236 used = last - copy;
2237
2238 if (used > 0 && used < len)
2239 ereport(ERROR,
2240 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2241 errmsg("invalid value \"%s\" for \"%s\"",
2242 copy, node->key->name),
2243 errdetail("Field requires %d characters, but only %d "
2244 "could be parsed.", len, used),
2245 errhint("If your source string is not fixed-width, try "
2246 "using the \"FM\" modifier.")));
2247
2248 *src += used;
2249 }
2250
2251 if (*src == init)
2252 ereport(ERROR,
2253 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2254 errmsg("invalid value \"%s\" for \"%s\"",
2255 copy, node->key->name),
2256 errdetail("Value must be an integer.")));
2257
2258 if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2259 ereport(ERROR,
2260 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2261 errmsg("value for \"%s\" in source string is out of range",
2262 node->key->name),
2263 errdetail("Value must be in the range %d to %d.",
2264 INT_MIN, INT_MAX)));
2265
2266 if (dest != NULL)
2267 from_char_set_int(dest, (int) result, node);
2268 return *src - init;
2269 }
2270
2271 /*
2272 * Call from_char_parse_int_len(), using the length of the format keyword as
2273 * the expected length of the field.
2274 *
2275 * Don't call this function if the field differs in length from the format
2276 * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2277 * In such cases, call from_char_parse_int_len() instead to specify the
2278 * required length explicitly.
2279 */
2280 static int
from_char_parse_int(int * dest,const char ** src,FormatNode * node)2281 from_char_parse_int(int *dest, const char **src, FormatNode *node)
2282 {
2283 return from_char_parse_int_len(dest, src, node->key->len, node);
2284 }
2285
2286 /*
2287 * Sequentially search null-terminated "array" for a case-insensitive match
2288 * to the initial character(s) of "name".
2289 *
2290 * Returns array index of match, or -1 for no match.
2291 *
2292 * *len is set to the length of the match, or 0 for no match.
2293 *
2294 * Case-insensitivity is defined per pg_tolower, so this is only
2295 * suitable for comparisons to ASCII strings.
2296 */
2297 static int
seq_search(const char * name,const char * const * array,int * len)2298 seq_search(const char *name, const char *const *array, int *len)
2299 {
2300 unsigned char firstc;
2301 const char *const *a;
2302
2303 *len = 0;
2304
2305 /* empty string can't match anything */
2306 if (!*name)
2307 return -1;
2308
2309 /* we handle first char specially to gain some speed */
2310 firstc = pg_tolower((unsigned char) *name);
2311
2312 for (a = array; *a != NULL; a++)
2313 {
2314 const char *p;
2315 const char *n;
2316
2317 /* compare first chars */
2318 if (pg_tolower((unsigned char) **a) != firstc)
2319 continue;
2320
2321 /* compare rest of string */
2322 for (p = *a + 1, n = name + 1;; p++, n++)
2323 {
2324 /* return success if we matched whole array entry */
2325 if (*p == '\0')
2326 {
2327 *len = n - name;
2328 return a - array;
2329 }
2330 /* else, must have another character in "name" ... */
2331 if (*n == '\0')
2332 break;
2333 /* ... and it must match */
2334 if (pg_tolower((unsigned char) *p) !=
2335 pg_tolower((unsigned char) *n))
2336 break;
2337 }
2338 }
2339
2340 return -1;
2341 }
2342
2343 /*
2344 * Perform a sequential search in 'array' for an entry matching the first
2345 * character(s) of the 'src' string case-insensitively.
2346 *
2347 * If a match is found, copy the array index of the match into the integer
2348 * pointed to by 'dest', advance 'src' to the end of the part of the string
2349 * which matched, and return the number of characters consumed.
2350 *
2351 * If the string doesn't match, throw an error.
2352 *
2353 * 'node' is used only for error reports: node->key->name identifies the
2354 * field type we were searching for.
2355 */
2356 static int
from_char_seq_search(int * dest,const char ** src,const char * const * array,FormatNode * node)2357 from_char_seq_search(int *dest, const char **src, const char *const *array,
2358 FormatNode *node)
2359 {
2360 int len;
2361
2362 *dest = seq_search(*src, array, &len);
2363
2364 if (len <= 0)
2365 {
2366 /*
2367 * In the error report, truncate the string at the next whitespace (if
2368 * any) to avoid including irrelevant data.
2369 */
2370 char *copy = pstrdup(*src);
2371 char *c;
2372
2373 for (c = copy; *c; c++)
2374 {
2375 if (scanner_isspace(*c))
2376 {
2377 *c = '\0';
2378 break;
2379 }
2380 }
2381
2382 ereport(ERROR,
2383 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2384 errmsg("invalid value \"%s\" for \"%s\"",
2385 copy, node->key->name),
2386 errdetail("The given value did not match any of the allowed "
2387 "values for this field.")));
2388 }
2389 *src += len;
2390 return len;
2391 }
2392
2393 /* ----------
2394 * Process a TmToChar struct as denoted by a list of FormatNodes.
2395 * The formatted data is written to the string pointed to by 'out'.
2396 * ----------
2397 */
2398 static void
DCH_to_char(FormatNode * node,bool is_interval,TmToChar * in,char * out,Oid collid)2399 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2400 {
2401 FormatNode *n;
2402 char *s;
2403 struct pg_tm *tm = &in->tm;
2404 int i;
2405
2406 /* cache localized days and months */
2407 cache_locale_time();
2408
2409 s = out;
2410 for (n = node; n->type != NODE_TYPE_END; n++)
2411 {
2412 if (n->type != NODE_TYPE_ACTION)
2413 {
2414 strcpy(s, n->character);
2415 s += strlen(s);
2416 continue;
2417 }
2418
2419 switch (n->key->id)
2420 {
2421 case DCH_A_M:
2422 case DCH_P_M:
2423 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2424 ? P_M_STR : A_M_STR);
2425 s += strlen(s);
2426 break;
2427 case DCH_AM:
2428 case DCH_PM:
2429 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2430 ? PM_STR : AM_STR);
2431 s += strlen(s);
2432 break;
2433 case DCH_a_m:
2434 case DCH_p_m:
2435 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2436 ? p_m_STR : a_m_STR);
2437 s += strlen(s);
2438 break;
2439 case DCH_am:
2440 case DCH_pm:
2441 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2442 ? pm_STR : am_STR);
2443 s += strlen(s);
2444 break;
2445 case DCH_HH:
2446 case DCH_HH12:
2447
2448 /*
2449 * display time as shown on a 12-hour clock, even for
2450 * intervals
2451 */
2452 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2453 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2454 tm->tm_hour % (HOURS_PER_DAY / 2));
2455 if (S_THth(n->suffix))
2456 str_numth(s, s, S_TH_TYPE(n->suffix));
2457 s += strlen(s);
2458 break;
2459 case DCH_HH24:
2460 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2461 tm->tm_hour);
2462 if (S_THth(n->suffix))
2463 str_numth(s, s, S_TH_TYPE(n->suffix));
2464 s += strlen(s);
2465 break;
2466 case DCH_MI:
2467 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2468 tm->tm_min);
2469 if (S_THth(n->suffix))
2470 str_numth(s, s, S_TH_TYPE(n->suffix));
2471 s += strlen(s);
2472 break;
2473 case DCH_SS:
2474 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2475 tm->tm_sec);
2476 if (S_THth(n->suffix))
2477 str_numth(s, s, S_TH_TYPE(n->suffix));
2478 s += strlen(s);
2479 break;
2480 case DCH_MS: /* millisecond */
2481 sprintf(s, "%03d", (int) (in->fsec / INT64CONST(1000)));
2482 if (S_THth(n->suffix))
2483 str_numth(s, s, S_TH_TYPE(n->suffix));
2484 s += strlen(s);
2485 break;
2486 case DCH_US: /* microsecond */
2487 sprintf(s, "%06d", (int) in->fsec);
2488 if (S_THth(n->suffix))
2489 str_numth(s, s, S_TH_TYPE(n->suffix));
2490 s += strlen(s);
2491 break;
2492 case DCH_SSSS:
2493 sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2494 tm->tm_min * SECS_PER_MINUTE +
2495 tm->tm_sec);
2496 if (S_THth(n->suffix))
2497 str_numth(s, s, S_TH_TYPE(n->suffix));
2498 s += strlen(s);
2499 break;
2500 case DCH_tz:
2501 INVALID_FOR_INTERVAL;
2502 if (tmtcTzn(in))
2503 {
2504 /* We assume here that timezone names aren't localized */
2505 char *p = asc_tolower_z(tmtcTzn(in));
2506
2507 strcpy(s, p);
2508 pfree(p);
2509 s += strlen(s);
2510 }
2511 break;
2512 case DCH_TZ:
2513 INVALID_FOR_INTERVAL;
2514 if (tmtcTzn(in))
2515 {
2516 strcpy(s, tmtcTzn(in));
2517 s += strlen(s);
2518 }
2519 break;
2520 case DCH_TZH:
2521 INVALID_FOR_INTERVAL;
2522 sprintf(s, "%c%02d",
2523 (tm->tm_gmtoff >= 0) ? '+' : '-',
2524 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2525 s += strlen(s);
2526 break;
2527 case DCH_TZM:
2528 INVALID_FOR_INTERVAL;
2529 sprintf(s, "%02d",
2530 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2531 s += strlen(s);
2532 break;
2533 case DCH_OF:
2534 INVALID_FOR_INTERVAL;
2535 sprintf(s, "%c%0*d",
2536 (tm->tm_gmtoff >= 0) ? '+' : '-',
2537 S_FM(n->suffix) ? 0 : 2,
2538 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2539 s += strlen(s);
2540 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2541 {
2542 sprintf(s, ":%02d",
2543 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2544 s += strlen(s);
2545 }
2546 break;
2547 case DCH_A_D:
2548 case DCH_B_C:
2549 INVALID_FOR_INTERVAL;
2550 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2551 s += strlen(s);
2552 break;
2553 case DCH_AD:
2554 case DCH_BC:
2555 INVALID_FOR_INTERVAL;
2556 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2557 s += strlen(s);
2558 break;
2559 case DCH_a_d:
2560 case DCH_b_c:
2561 INVALID_FOR_INTERVAL;
2562 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2563 s += strlen(s);
2564 break;
2565 case DCH_ad:
2566 case DCH_bc:
2567 INVALID_FOR_INTERVAL;
2568 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2569 s += strlen(s);
2570 break;
2571 case DCH_MONTH:
2572 INVALID_FOR_INTERVAL;
2573 if (!tm->tm_mon)
2574 break;
2575 if (S_TM(n->suffix))
2576 {
2577 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2578
2579 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2580 strcpy(s, str);
2581 else
2582 ereport(ERROR,
2583 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2584 errmsg("localized string format value too long")));
2585 }
2586 else
2587 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2588 asc_toupper_z(months_full[tm->tm_mon - 1]));
2589 s += strlen(s);
2590 break;
2591 case DCH_Month:
2592 INVALID_FOR_INTERVAL;
2593 if (!tm->tm_mon)
2594 break;
2595 if (S_TM(n->suffix))
2596 {
2597 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2598
2599 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2600 strcpy(s, str);
2601 else
2602 ereport(ERROR,
2603 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2604 errmsg("localized string format value too long")));
2605 }
2606 else
2607 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2608 months_full[tm->tm_mon - 1]);
2609 s += strlen(s);
2610 break;
2611 case DCH_month:
2612 INVALID_FOR_INTERVAL;
2613 if (!tm->tm_mon)
2614 break;
2615 if (S_TM(n->suffix))
2616 {
2617 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2618
2619 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2620 strcpy(s, str);
2621 else
2622 ereport(ERROR,
2623 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2624 errmsg("localized string format value too long")));
2625 }
2626 else
2627 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2628 asc_tolower_z(months_full[tm->tm_mon - 1]));
2629 s += strlen(s);
2630 break;
2631 case DCH_MON:
2632 INVALID_FOR_INTERVAL;
2633 if (!tm->tm_mon)
2634 break;
2635 if (S_TM(n->suffix))
2636 {
2637 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2638
2639 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2640 strcpy(s, str);
2641 else
2642 ereport(ERROR,
2643 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2644 errmsg("localized string format value too long")));
2645 }
2646 else
2647 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2648 s += strlen(s);
2649 break;
2650 case DCH_Mon:
2651 INVALID_FOR_INTERVAL;
2652 if (!tm->tm_mon)
2653 break;
2654 if (S_TM(n->suffix))
2655 {
2656 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2657
2658 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2659 strcpy(s, str);
2660 else
2661 ereport(ERROR,
2662 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2663 errmsg("localized string format value too long")));
2664 }
2665 else
2666 strcpy(s, months[tm->tm_mon - 1]);
2667 s += strlen(s);
2668 break;
2669 case DCH_mon:
2670 INVALID_FOR_INTERVAL;
2671 if (!tm->tm_mon)
2672 break;
2673 if (S_TM(n->suffix))
2674 {
2675 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2676
2677 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2678 strcpy(s, str);
2679 else
2680 ereport(ERROR,
2681 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2682 errmsg("localized string format value too long")));
2683 }
2684 else
2685 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2686 s += strlen(s);
2687 break;
2688 case DCH_MM:
2689 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2690 tm->tm_mon);
2691 if (S_THth(n->suffix))
2692 str_numth(s, s, S_TH_TYPE(n->suffix));
2693 s += strlen(s);
2694 break;
2695 case DCH_DAY:
2696 INVALID_FOR_INTERVAL;
2697 if (S_TM(n->suffix))
2698 {
2699 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2700
2701 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2702 strcpy(s, str);
2703 else
2704 ereport(ERROR,
2705 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2706 errmsg("localized string format value too long")));
2707 }
2708 else
2709 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2710 asc_toupper_z(days[tm->tm_wday]));
2711 s += strlen(s);
2712 break;
2713 case DCH_Day:
2714 INVALID_FOR_INTERVAL;
2715 if (S_TM(n->suffix))
2716 {
2717 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2718
2719 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2720 strcpy(s, str);
2721 else
2722 ereport(ERROR,
2723 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2724 errmsg("localized string format value too long")));
2725 }
2726 else
2727 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2728 days[tm->tm_wday]);
2729 s += strlen(s);
2730 break;
2731 case DCH_day:
2732 INVALID_FOR_INTERVAL;
2733 if (S_TM(n->suffix))
2734 {
2735 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
2736
2737 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2738 strcpy(s, str);
2739 else
2740 ereport(ERROR,
2741 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2742 errmsg("localized string format value too long")));
2743 }
2744 else
2745 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2746 asc_tolower_z(days[tm->tm_wday]));
2747 s += strlen(s);
2748 break;
2749 case DCH_DY:
2750 INVALID_FOR_INTERVAL;
2751 if (S_TM(n->suffix))
2752 {
2753 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
2754
2755 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2756 strcpy(s, str);
2757 else
2758 ereport(ERROR,
2759 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2760 errmsg("localized string format value too long")));
2761 }
2762 else
2763 strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
2764 s += strlen(s);
2765 break;
2766 case DCH_Dy:
2767 INVALID_FOR_INTERVAL;
2768 if (S_TM(n->suffix))
2769 {
2770 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
2771
2772 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2773 strcpy(s, str);
2774 else
2775 ereport(ERROR,
2776 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2777 errmsg("localized string format value too long")));
2778 }
2779 else
2780 strcpy(s, days_short[tm->tm_wday]);
2781 s += strlen(s);
2782 break;
2783 case DCH_dy:
2784 INVALID_FOR_INTERVAL;
2785 if (S_TM(n->suffix))
2786 {
2787 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
2788
2789 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2790 strcpy(s, str);
2791 else
2792 ereport(ERROR,
2793 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2794 errmsg("localized string format value too long")));
2795 }
2796 else
2797 strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
2798 s += strlen(s);
2799 break;
2800 case DCH_DDD:
2801 case DCH_IDDD:
2802 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
2803 (n->key->id == DCH_DDD) ?
2804 tm->tm_yday :
2805 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
2806 if (S_THth(n->suffix))
2807 str_numth(s, s, S_TH_TYPE(n->suffix));
2808 s += strlen(s);
2809 break;
2810 case DCH_DD:
2811 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
2812 if (S_THth(n->suffix))
2813 str_numth(s, s, S_TH_TYPE(n->suffix));
2814 s += strlen(s);
2815 break;
2816 case DCH_D:
2817 INVALID_FOR_INTERVAL;
2818 sprintf(s, "%d", tm->tm_wday + 1);
2819 if (S_THth(n->suffix))
2820 str_numth(s, s, S_TH_TYPE(n->suffix));
2821 s += strlen(s);
2822 break;
2823 case DCH_ID:
2824 INVALID_FOR_INTERVAL;
2825 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
2826 if (S_THth(n->suffix))
2827 str_numth(s, s, S_TH_TYPE(n->suffix));
2828 s += strlen(s);
2829 break;
2830 case DCH_WW:
2831 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2832 (tm->tm_yday - 1) / 7 + 1);
2833 if (S_THth(n->suffix))
2834 str_numth(s, s, S_TH_TYPE(n->suffix));
2835 s += strlen(s);
2836 break;
2837 case DCH_IW:
2838 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2839 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
2840 if (S_THth(n->suffix))
2841 str_numth(s, s, S_TH_TYPE(n->suffix));
2842 s += strlen(s);
2843 break;
2844 case DCH_Q:
2845 if (!tm->tm_mon)
2846 break;
2847 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
2848 if (S_THth(n->suffix))
2849 str_numth(s, s, S_TH_TYPE(n->suffix));
2850 s += strlen(s);
2851 break;
2852 case DCH_CC:
2853 if (is_interval) /* straight calculation */
2854 i = tm->tm_year / 100;
2855 else
2856 {
2857 if (tm->tm_year > 0)
2858 /* Century 20 == 1901 - 2000 */
2859 i = (tm->tm_year - 1) / 100 + 1;
2860 else
2861 /* Century 6BC == 600BC - 501BC */
2862 i = tm->tm_year / 100 - 1;
2863 }
2864 if (i <= 99 && i >= -99)
2865 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
2866 else
2867 sprintf(s, "%d", i);
2868 if (S_THth(n->suffix))
2869 str_numth(s, s, S_TH_TYPE(n->suffix));
2870 s += strlen(s);
2871 break;
2872 case DCH_Y_YYY:
2873 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
2874 sprintf(s, "%d,%03d", i,
2875 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
2876 if (S_THth(n->suffix))
2877 str_numth(s, s, S_TH_TYPE(n->suffix));
2878 s += strlen(s);
2879 break;
2880 case DCH_YYYY:
2881 case DCH_IYYY:
2882 sprintf(s, "%0*d",
2883 S_FM(n->suffix) ? 0 :
2884 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
2885 (n->key->id == DCH_YYYY ?
2886 ADJUST_YEAR(tm->tm_year, is_interval) :
2887 ADJUST_YEAR(date2isoyear(tm->tm_year,
2888 tm->tm_mon,
2889 tm->tm_mday),
2890 is_interval)));
2891 if (S_THth(n->suffix))
2892 str_numth(s, s, S_TH_TYPE(n->suffix));
2893 s += strlen(s);
2894 break;
2895 case DCH_YYY:
2896 case DCH_IYY:
2897 sprintf(s, "%0*d",
2898 S_FM(n->suffix) ? 0 :
2899 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
2900 (n->key->id == DCH_YYY ?
2901 ADJUST_YEAR(tm->tm_year, is_interval) :
2902 ADJUST_YEAR(date2isoyear(tm->tm_year,
2903 tm->tm_mon,
2904 tm->tm_mday),
2905 is_interval)) % 1000);
2906 if (S_THth(n->suffix))
2907 str_numth(s, s, S_TH_TYPE(n->suffix));
2908 s += strlen(s);
2909 break;
2910 case DCH_YY:
2911 case DCH_IY:
2912 sprintf(s, "%0*d",
2913 S_FM(n->suffix) ? 0 :
2914 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
2915 (n->key->id == DCH_YY ?
2916 ADJUST_YEAR(tm->tm_year, is_interval) :
2917 ADJUST_YEAR(date2isoyear(tm->tm_year,
2918 tm->tm_mon,
2919 tm->tm_mday),
2920 is_interval)) % 100);
2921 if (S_THth(n->suffix))
2922 str_numth(s, s, S_TH_TYPE(n->suffix));
2923 s += strlen(s);
2924 break;
2925 case DCH_Y:
2926 case DCH_I:
2927 sprintf(s, "%1d",
2928 (n->key->id == DCH_Y ?
2929 ADJUST_YEAR(tm->tm_year, is_interval) :
2930 ADJUST_YEAR(date2isoyear(tm->tm_year,
2931 tm->tm_mon,
2932 tm->tm_mday),
2933 is_interval)) % 10);
2934 if (S_THth(n->suffix))
2935 str_numth(s, s, S_TH_TYPE(n->suffix));
2936 s += strlen(s);
2937 break;
2938 case DCH_RM:
2939 /* FALLTHROUGH */
2940 case DCH_rm:
2941
2942 /*
2943 * For intervals, values like '12 month' will be reduced to 0
2944 * month and some years. These should be processed.
2945 */
2946 if (!tm->tm_mon && !tm->tm_year)
2947 break;
2948 else
2949 {
2950 int mon = 0;
2951 const char *const *months;
2952
2953 if (n->key->id == DCH_RM)
2954 months = rm_months_upper;
2955 else
2956 months = rm_months_lower;
2957
2958 /*
2959 * Compute the position in the roman-numeral array. Note
2960 * that the contents of the array are reversed, December
2961 * being first and January last.
2962 */
2963 if (tm->tm_mon == 0)
2964 {
2965 /*
2966 * This case is special, and tracks the case of full
2967 * interval years.
2968 */
2969 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
2970 }
2971 else if (tm->tm_mon < 0)
2972 {
2973 /*
2974 * Negative case. In this case, the calculation is
2975 * reversed, where -1 means December, -2 November,
2976 * etc.
2977 */
2978 mon = -1 * (tm->tm_mon + 1);
2979 }
2980 else
2981 {
2982 /*
2983 * Common case, with a strictly positive value. The
2984 * position in the array matches with the value of
2985 * tm_mon.
2986 */
2987 mon = MONTHS_PER_YEAR - tm->tm_mon;
2988 }
2989
2990 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2991 months[mon]);
2992 s += strlen(s);
2993 }
2994 break;
2995 case DCH_W:
2996 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
2997 if (S_THth(n->suffix))
2998 str_numth(s, s, S_TH_TYPE(n->suffix));
2999 s += strlen(s);
3000 break;
3001 case DCH_J:
3002 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3003 if (S_THth(n->suffix))
3004 str_numth(s, s, S_TH_TYPE(n->suffix));
3005 s += strlen(s);
3006 break;
3007 }
3008 }
3009
3010 *s = '\0';
3011 }
3012
3013 /* ----------
3014 * Process a string as denoted by a list of FormatNodes.
3015 * The TmFromChar struct pointed to by 'out' is populated with the results.
3016 *
3017 * Note: we currently don't have any to_interval() function, so there
3018 * is no need here for INVALID_FOR_INTERVAL checks.
3019 * ----------
3020 */
3021 static void
DCH_from_char(FormatNode * node,const char * in,TmFromChar * out)3022 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out)
3023 {
3024 FormatNode *n;
3025 const char *s;
3026 int len,
3027 value;
3028 bool fx_mode = false;
3029
3030 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3031 {
3032 if (n->type != NODE_TYPE_ACTION)
3033 {
3034 /*
3035 * Separator, so consume one character from input string. Notice
3036 * we don't insist that the consumed character match the format's
3037 * character.
3038 */
3039 s += pg_mblen(s);
3040 continue;
3041 }
3042
3043 /* Ignore spaces before fields when not in FX (fixed width) mode */
3044 if (!fx_mode && n->key->id != DCH_FX)
3045 {
3046 while (*s != '\0' && isspace((unsigned char) *s))
3047 s++;
3048 }
3049
3050 from_char_set_mode(out, n->key->date_mode);
3051
3052 switch (n->key->id)
3053 {
3054 case DCH_FX:
3055 fx_mode = true;
3056 break;
3057 case DCH_A_M:
3058 case DCH_P_M:
3059 case DCH_a_m:
3060 case DCH_p_m:
3061 from_char_seq_search(&value, &s, ampm_strings_long,
3062 n);
3063 from_char_set_int(&out->pm, value % 2, n);
3064 out->clock = CLOCK_12_HOUR;
3065 break;
3066 case DCH_AM:
3067 case DCH_PM:
3068 case DCH_am:
3069 case DCH_pm:
3070 from_char_seq_search(&value, &s, ampm_strings,
3071 n);
3072 from_char_set_int(&out->pm, value % 2, n);
3073 out->clock = CLOCK_12_HOUR;
3074 break;
3075 case DCH_HH:
3076 case DCH_HH12:
3077 from_char_parse_int_len(&out->hh, &s, 2, n);
3078 out->clock = CLOCK_12_HOUR;
3079 SKIP_THth(s, n->suffix);
3080 break;
3081 case DCH_HH24:
3082 from_char_parse_int_len(&out->hh, &s, 2, n);
3083 SKIP_THth(s, n->suffix);
3084 break;
3085 case DCH_MI:
3086 from_char_parse_int(&out->mi, &s, n);
3087 SKIP_THth(s, n->suffix);
3088 break;
3089 case DCH_SS:
3090 from_char_parse_int(&out->ss, &s, n);
3091 SKIP_THth(s, n->suffix);
3092 break;
3093 case DCH_MS: /* millisecond */
3094 len = from_char_parse_int_len(&out->ms, &s, 3, n);
3095
3096 /*
3097 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3098 */
3099 out->ms *= len == 1 ? 100 :
3100 len == 2 ? 10 : 1;
3101
3102 SKIP_THth(s, n->suffix);
3103 break;
3104 case DCH_US: /* microsecond */
3105 len = from_char_parse_int_len(&out->us, &s, 6, n);
3106
3107 out->us *= len == 1 ? 100000 :
3108 len == 2 ? 10000 :
3109 len == 3 ? 1000 :
3110 len == 4 ? 100 :
3111 len == 5 ? 10 : 1;
3112
3113 SKIP_THth(s, n->suffix);
3114 break;
3115 case DCH_SSSS:
3116 from_char_parse_int(&out->ssss, &s, n);
3117 SKIP_THth(s, n->suffix);
3118 break;
3119 case DCH_tz:
3120 case DCH_TZ:
3121 case DCH_OF:
3122 ereport(ERROR,
3123 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3124 errmsg("formatting field \"%s\" is only supported in to_char",
3125 n->key->name)));
3126 break;
3127 case DCH_TZH:
3128 out->tzsign = *s == '-' ? -1 : +1;
3129
3130 if (*s == '+' || *s == '-' || *s == ' ')
3131 s++;
3132
3133 from_char_parse_int_len(&out->tzh, &s, 2, n);
3134 break;
3135 case DCH_TZM:
3136 /* assign positive timezone sign if TZH was not seen before */
3137 if (!out->tzsign)
3138 out->tzsign = +1;
3139 from_char_parse_int_len(&out->tzm, &s, 2, n);
3140 break;
3141 case DCH_A_D:
3142 case DCH_B_C:
3143 case DCH_a_d:
3144 case DCH_b_c:
3145 from_char_seq_search(&value, &s, adbc_strings_long,
3146 n);
3147 from_char_set_int(&out->bc, value % 2, n);
3148 break;
3149 case DCH_AD:
3150 case DCH_BC:
3151 case DCH_ad:
3152 case DCH_bc:
3153 from_char_seq_search(&value, &s, adbc_strings,
3154 n);
3155 from_char_set_int(&out->bc, value % 2, n);
3156 break;
3157 case DCH_MONTH:
3158 case DCH_Month:
3159 case DCH_month:
3160 from_char_seq_search(&value, &s, months_full,
3161 n);
3162 from_char_set_int(&out->mm, value + 1, n);
3163 break;
3164 case DCH_MON:
3165 case DCH_Mon:
3166 case DCH_mon:
3167 from_char_seq_search(&value, &s, months,
3168 n);
3169 from_char_set_int(&out->mm, value + 1, n);
3170 break;
3171 case DCH_MM:
3172 from_char_parse_int(&out->mm, &s, n);
3173 SKIP_THth(s, n->suffix);
3174 break;
3175 case DCH_DAY:
3176 case DCH_Day:
3177 case DCH_day:
3178 from_char_seq_search(&value, &s, days,
3179 n);
3180 from_char_set_int(&out->d, value, n);
3181 out->d++;
3182 break;
3183 case DCH_DY:
3184 case DCH_Dy:
3185 case DCH_dy:
3186 from_char_seq_search(&value, &s, days_short,
3187 n);
3188 from_char_set_int(&out->d, value, n);
3189 out->d++;
3190 break;
3191 case DCH_DDD:
3192 from_char_parse_int(&out->ddd, &s, n);
3193 SKIP_THth(s, n->suffix);
3194 break;
3195 case DCH_IDDD:
3196 from_char_parse_int_len(&out->ddd, &s, 3, n);
3197 SKIP_THth(s, n->suffix);
3198 break;
3199 case DCH_DD:
3200 from_char_parse_int(&out->dd, &s, n);
3201 SKIP_THth(s, n->suffix);
3202 break;
3203 case DCH_D:
3204 from_char_parse_int(&out->d, &s, n);
3205 SKIP_THth(s, n->suffix);
3206 break;
3207 case DCH_ID:
3208 from_char_parse_int_len(&out->d, &s, 1, n);
3209 /* Shift numbering to match Gregorian where Sunday = 1 */
3210 if (++out->d > 7)
3211 out->d = 1;
3212 SKIP_THth(s, n->suffix);
3213 break;
3214 case DCH_WW:
3215 case DCH_IW:
3216 from_char_parse_int(&out->ww, &s, n);
3217 SKIP_THth(s, n->suffix);
3218 break;
3219 case DCH_Q:
3220
3221 /*
3222 * We ignore 'Q' when converting to date because it is unclear
3223 * which date in the quarter to use, and some people specify
3224 * both quarter and month, so if it was honored it might
3225 * conflict with the supplied month. That is also why we don't
3226 * throw an error.
3227 *
3228 * We still parse the source string for an integer, but it
3229 * isn't stored anywhere in 'out'.
3230 */
3231 from_char_parse_int((int *) NULL, &s, n);
3232 SKIP_THth(s, n->suffix);
3233 break;
3234 case DCH_CC:
3235 from_char_parse_int(&out->cc, &s, n);
3236 SKIP_THth(s, n->suffix);
3237 break;
3238 case DCH_Y_YYY:
3239 {
3240 int matched,
3241 years,
3242 millennia,
3243 nch;
3244
3245 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3246 if (matched < 2)
3247 ereport(ERROR,
3248 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3249 errmsg("invalid input string for \"Y,YYY\"")));
3250 years += (millennia * 1000);
3251 from_char_set_int(&out->year, years, n);
3252 out->yysz = 4;
3253 s += nch;
3254 SKIP_THth(s, n->suffix);
3255 }
3256 break;
3257 case DCH_YYYY:
3258 case DCH_IYYY:
3259 from_char_parse_int(&out->year, &s, n);
3260 out->yysz = 4;
3261 SKIP_THth(s, n->suffix);
3262 break;
3263 case DCH_YYY:
3264 case DCH_IYY:
3265 if (from_char_parse_int(&out->year, &s, n) < 4)
3266 out->year = adjust_partial_year_to_2020(out->year);
3267 out->yysz = 3;
3268 SKIP_THth(s, n->suffix);
3269 break;
3270 case DCH_YY:
3271 case DCH_IY:
3272 if (from_char_parse_int(&out->year, &s, n) < 4)
3273 out->year = adjust_partial_year_to_2020(out->year);
3274 out->yysz = 2;
3275 SKIP_THth(s, n->suffix);
3276 break;
3277 case DCH_Y:
3278 case DCH_I:
3279 if (from_char_parse_int(&out->year, &s, n) < 4)
3280 out->year = adjust_partial_year_to_2020(out->year);
3281 out->yysz = 1;
3282 SKIP_THth(s, n->suffix);
3283 break;
3284 case DCH_RM:
3285 case DCH_rm:
3286 from_char_seq_search(&value, &s, rm_months_lower,
3287 n);
3288 from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3289 break;
3290 case DCH_W:
3291 from_char_parse_int(&out->w, &s, n);
3292 SKIP_THth(s, n->suffix);
3293 break;
3294 case DCH_J:
3295 from_char_parse_int(&out->j, &s, n);
3296 SKIP_THth(s, n->suffix);
3297 break;
3298 }
3299 }
3300 }
3301
3302 /* select a DCHCacheEntry to hold the given format picture */
3303 static DCHCacheEntry *
DCH_cache_getnew(const char * str)3304 DCH_cache_getnew(const char *str)
3305 {
3306 DCHCacheEntry *ent;
3307
3308 /* counter overflow check - paranoia? */
3309 if (DCHCounter >= (INT_MAX - DCH_CACHE_ENTRIES))
3310 {
3311 DCHCounter = 0;
3312
3313 for (ent = DCHCache; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3314 ent->age = (++DCHCounter);
3315 }
3316
3317 /*
3318 * If cache is full, remove oldest entry (or recycle first not-valid one)
3319 */
3320 if (n_DCHCache >= DCH_CACHE_ENTRIES)
3321 {
3322 DCHCacheEntry *old = DCHCache + 0;
3323
3324 #ifdef DEBUG_TO_FROM_CHAR
3325 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3326 #endif
3327 if (old->valid)
3328 {
3329 for (ent = DCHCache + 1; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3330 {
3331 if (!ent->valid)
3332 {
3333 old = ent;
3334 break;
3335 }
3336 if (ent->age < old->age)
3337 old = ent;
3338 }
3339 }
3340 #ifdef DEBUG_TO_FROM_CHAR
3341 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3342 #endif
3343 old->valid = false;
3344 StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3345 old->age = (++DCHCounter);
3346 /* caller is expected to fill format, then set valid */
3347 return old;
3348 }
3349 else
3350 {
3351 #ifdef DEBUG_TO_FROM_CHAR
3352 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3353 #endif
3354 ent = DCHCache + n_DCHCache;
3355 ent->valid = false;
3356 StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3357 ent->age = (++DCHCounter);
3358 /* caller is expected to fill format, then set valid */
3359 ++n_DCHCache;
3360 return ent;
3361 }
3362 }
3363
3364 /* look for an existing DCHCacheEntry matching the given format picture */
3365 static DCHCacheEntry *
DCH_cache_search(const char * str)3366 DCH_cache_search(const char *str)
3367 {
3368 int i;
3369 DCHCacheEntry *ent;
3370
3371 /* counter overflow check - paranoia? */
3372 if (DCHCounter >= (INT_MAX - DCH_CACHE_ENTRIES))
3373 {
3374 DCHCounter = 0;
3375
3376 for (ent = DCHCache; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3377 ent->age = (++DCHCounter);
3378 }
3379
3380 for (i = 0, ent = DCHCache; i < n_DCHCache; i++, ent++)
3381 {
3382 if (ent->valid && strcmp(ent->str, str) == 0)
3383 {
3384 ent->age = (++DCHCounter);
3385 return ent;
3386 }
3387 }
3388
3389 return NULL;
3390 }
3391
3392 /* Find or create a DCHCacheEntry for the given format picture */
3393 static DCHCacheEntry *
DCH_cache_fetch(const char * str)3394 DCH_cache_fetch(const char *str)
3395 {
3396 DCHCacheEntry *ent;
3397
3398 if ((ent = DCH_cache_search(str)) == NULL)
3399 {
3400 /*
3401 * Not in the cache, must run parser and save a new format-picture to
3402 * the cache. Do not mark the cache entry valid until parsing
3403 * succeeds.
3404 */
3405 ent = DCH_cache_getnew(str);
3406
3407 parse_format(ent->format, str, DCH_keywords,
3408 DCH_suff, DCH_index, DCH_TYPE, NULL);
3409
3410 ent->valid = true;
3411 }
3412 return ent;
3413 }
3414
3415 /*
3416 * Format a date/time or interval into a string according to fmt.
3417 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3418 * for formatting.
3419 */
3420 static text *
datetime_to_char_body(TmToChar * tmtc,text * fmt,bool is_interval,Oid collid)3421 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3422 {
3423 FormatNode *format;
3424 char *fmt_str,
3425 *result;
3426 bool incache;
3427 int fmt_len;
3428 text *res;
3429
3430 /*
3431 * Convert fmt to C string
3432 */
3433 fmt_str = text_to_cstring(fmt);
3434 fmt_len = strlen(fmt_str);
3435
3436 /*
3437 * Allocate workspace for result as C string
3438 */
3439 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3440 *result = '\0';
3441
3442 if (fmt_len > DCH_CACHE_SIZE)
3443 {
3444 /*
3445 * Allocate new memory if format picture is bigger than static cache
3446 * and do not use cache (call parser always)
3447 */
3448 incache = false;
3449
3450 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3451
3452 parse_format(format, fmt_str, DCH_keywords,
3453 DCH_suff, DCH_index, DCH_TYPE, NULL);
3454 }
3455 else
3456 {
3457 /*
3458 * Use cache buffers
3459 */
3460 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3461
3462 incache = true;
3463 format = ent->format;
3464 }
3465
3466 /* The real work is here */
3467 DCH_to_char(format, is_interval, tmtc, result, collid);
3468
3469 if (!incache)
3470 pfree(format);
3471
3472 pfree(fmt_str);
3473
3474 /* convert C-string result to TEXT format */
3475 res = cstring_to_text(result);
3476
3477 pfree(result);
3478 return res;
3479 }
3480
3481 /****************************************************************************
3482 * Public routines
3483 ***************************************************************************/
3484
3485 /* -------------------
3486 * TIMESTAMP to_char()
3487 * -------------------
3488 */
3489 Datum
timestamp_to_char(PG_FUNCTION_ARGS)3490 timestamp_to_char(PG_FUNCTION_ARGS)
3491 {
3492 Timestamp dt = PG_GETARG_TIMESTAMP(0);
3493 text *fmt = PG_GETARG_TEXT_PP(1),
3494 *res;
3495 TmToChar tmtc;
3496 struct pg_tm *tm;
3497 int thisdate;
3498
3499 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3500 PG_RETURN_NULL();
3501
3502 ZERO_tmtc(&tmtc);
3503 tm = tmtcTm(&tmtc);
3504
3505 if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
3506 ereport(ERROR,
3507 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3508 errmsg("timestamp out of range")));
3509
3510 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3511 tm->tm_wday = (thisdate + 1) % 7;
3512 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3513
3514 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3515 PG_RETURN_NULL();
3516
3517 PG_RETURN_TEXT_P(res);
3518 }
3519
3520 Datum
timestamptz_to_char(PG_FUNCTION_ARGS)3521 timestamptz_to_char(PG_FUNCTION_ARGS)
3522 {
3523 TimestampTz dt = PG_GETARG_TIMESTAMP(0);
3524 text *fmt = PG_GETARG_TEXT_PP(1),
3525 *res;
3526 TmToChar tmtc;
3527 int tz;
3528 struct pg_tm *tm;
3529 int thisdate;
3530
3531 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3532 PG_RETURN_NULL();
3533
3534 ZERO_tmtc(&tmtc);
3535 tm = tmtcTm(&tmtc);
3536
3537 if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
3538 ereport(ERROR,
3539 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3540 errmsg("timestamp out of range")));
3541
3542 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3543 tm->tm_wday = (thisdate + 1) % 7;
3544 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3545
3546 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3547 PG_RETURN_NULL();
3548
3549 PG_RETURN_TEXT_P(res);
3550 }
3551
3552
3553 /* -------------------
3554 * INTERVAL to_char()
3555 * -------------------
3556 */
3557 Datum
interval_to_char(PG_FUNCTION_ARGS)3558 interval_to_char(PG_FUNCTION_ARGS)
3559 {
3560 Interval *it = PG_GETARG_INTERVAL_P(0);
3561 text *fmt = PG_GETARG_TEXT_PP(1),
3562 *res;
3563 TmToChar tmtc;
3564 struct pg_tm *tm;
3565
3566 if (VARSIZE_ANY_EXHDR(fmt) <= 0)
3567 PG_RETURN_NULL();
3568
3569 ZERO_tmtc(&tmtc);
3570 tm = tmtcTm(&tmtc);
3571
3572 if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
3573 PG_RETURN_NULL();
3574
3575 /* wday is meaningless, yday approximates the total span in days */
3576 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
3577
3578 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
3579 PG_RETURN_NULL();
3580
3581 PG_RETURN_TEXT_P(res);
3582 }
3583
3584 /* ---------------------
3585 * TO_TIMESTAMP()
3586 *
3587 * Make Timestamp from date_str which is formatted at argument 'fmt'
3588 * ( to_timestamp is reverse to_char() )
3589 * ---------------------
3590 */
3591 Datum
to_timestamp(PG_FUNCTION_ARGS)3592 to_timestamp(PG_FUNCTION_ARGS)
3593 {
3594 text *date_txt = PG_GETARG_TEXT_PP(0);
3595 text *fmt = PG_GETARG_TEXT_PP(1);
3596 Timestamp result;
3597 int tz;
3598 struct pg_tm tm;
3599 fsec_t fsec;
3600
3601 do_to_timestamp(date_txt, fmt, &tm, &fsec);
3602
3603 /* Use the specified time zone, if any. */
3604 if (tm.tm_zone)
3605 {
3606 int dterr = DecodeTimezone((char *) tm.tm_zone, &tz);
3607
3608 if (dterr)
3609 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
3610 }
3611 else
3612 tz = DetermineTimeZoneOffset(&tm, session_timezone);
3613
3614 if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
3615 ereport(ERROR,
3616 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3617 errmsg("timestamp out of range")));
3618
3619 PG_RETURN_TIMESTAMP(result);
3620 }
3621
3622 /* ----------
3623 * TO_DATE
3624 * Make Date from date_str which is formated at argument 'fmt'
3625 * ----------
3626 */
3627 Datum
to_date(PG_FUNCTION_ARGS)3628 to_date(PG_FUNCTION_ARGS)
3629 {
3630 text *date_txt = PG_GETARG_TEXT_PP(0);
3631 text *fmt = PG_GETARG_TEXT_PP(1);
3632 DateADT result;
3633 struct pg_tm tm;
3634 fsec_t fsec;
3635
3636 do_to_timestamp(date_txt, fmt, &tm, &fsec);
3637
3638 /* Prevent overflow in Julian-day routines */
3639 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
3640 ereport(ERROR,
3641 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3642 errmsg("date out of range: \"%s\"",
3643 text_to_cstring(date_txt))));
3644
3645 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
3646
3647 /* Now check for just-out-of-range dates */
3648 if (!IS_VALID_DATE(result))
3649 ereport(ERROR,
3650 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3651 errmsg("date out of range: \"%s\"",
3652 text_to_cstring(date_txt))));
3653
3654 PG_RETURN_DATEADT(result);
3655 }
3656
3657 /*
3658 * do_to_timestamp: shared code for to_timestamp and to_date
3659 *
3660 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm
3661 * and fractional seconds.
3662 *
3663 * We parse 'fmt' into a list of FormatNodes, which is then passed to
3664 * DCH_from_char to populate a TmFromChar with the parsed contents of
3665 * 'date_txt'.
3666 *
3667 * The TmFromChar is then analysed and converted into the final results in
3668 * struct 'tm' and 'fsec'.
3669 */
3670 static void
do_to_timestamp(text * date_txt,text * fmt,struct pg_tm * tm,fsec_t * fsec)3671 do_to_timestamp(text *date_txt, text *fmt,
3672 struct pg_tm *tm, fsec_t *fsec)
3673 {
3674 FormatNode *format;
3675 TmFromChar tmfc;
3676 int fmt_len;
3677 char *date_str;
3678 int fmask;
3679
3680 date_str = text_to_cstring(date_txt);
3681
3682 ZERO_tmfc(&tmfc);
3683 ZERO_tm(tm);
3684 *fsec = 0;
3685 fmask = 0; /* bit mask for ValidateDate() */
3686
3687 fmt_len = VARSIZE_ANY_EXHDR(fmt);
3688
3689 if (fmt_len)
3690 {
3691 char *fmt_str;
3692 bool incache;
3693
3694 fmt_str = text_to_cstring(fmt);
3695
3696 if (fmt_len > DCH_CACHE_SIZE)
3697 {
3698 /*
3699 * Allocate new memory if format picture is bigger than static
3700 * cache and do not use cache (call parser always)
3701 */
3702 incache = false;
3703
3704 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3705
3706 parse_format(format, fmt_str, DCH_keywords,
3707 DCH_suff, DCH_index, DCH_TYPE, NULL);
3708 }
3709 else
3710 {
3711 /*
3712 * Use cache buffers
3713 */
3714 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3715
3716 incache = true;
3717 format = ent->format;
3718 }
3719
3720 #ifdef DEBUG_TO_FROM_CHAR
3721 /* dump_node(format, fmt_len); */
3722 /* dump_index(DCH_keywords, DCH_index); */
3723 #endif
3724
3725 DCH_from_char(format, date_str, &tmfc);
3726
3727 pfree(fmt_str);
3728 if (!incache)
3729 pfree(format);
3730 }
3731
3732 DEBUG_TMFC(&tmfc);
3733
3734 /*
3735 * Convert to_date/to_timestamp input fields to standard 'tm'
3736 */
3737 if (tmfc.ssss)
3738 {
3739 int x = tmfc.ssss;
3740
3741 tm->tm_hour = x / SECS_PER_HOUR;
3742 x %= SECS_PER_HOUR;
3743 tm->tm_min = x / SECS_PER_MINUTE;
3744 x %= SECS_PER_MINUTE;
3745 tm->tm_sec = x;
3746 }
3747
3748 if (tmfc.ss)
3749 tm->tm_sec = tmfc.ss;
3750 if (tmfc.mi)
3751 tm->tm_min = tmfc.mi;
3752 if (tmfc.hh)
3753 tm->tm_hour = tmfc.hh;
3754
3755 if (tmfc.clock == CLOCK_12_HOUR)
3756 {
3757 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
3758 ereport(ERROR,
3759 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3760 errmsg("hour \"%d\" is invalid for the 12-hour clock",
3761 tm->tm_hour),
3762 errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
3763
3764 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
3765 tm->tm_hour += HOURS_PER_DAY / 2;
3766 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
3767 tm->tm_hour = 0;
3768 }
3769
3770 if (tmfc.year)
3771 {
3772 /*
3773 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
3774 * the year in the given century. Keep in mind that the 21st century
3775 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
3776 * 600BC to 501BC.
3777 */
3778 if (tmfc.cc && tmfc.yysz <= 2)
3779 {
3780 if (tmfc.bc)
3781 tmfc.cc = -tmfc.cc;
3782 tm->tm_year = tmfc.year % 100;
3783 if (tm->tm_year)
3784 {
3785 if (tmfc.cc >= 0)
3786 tm->tm_year += (tmfc.cc - 1) * 100;
3787 else
3788 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
3789 }
3790 else
3791 {
3792 /* find century year for dates ending in "00" */
3793 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
3794 }
3795 }
3796 else
3797 {
3798 /* If a 4-digit year is provided, we use that and ignore CC. */
3799 tm->tm_year = tmfc.year;
3800 if (tmfc.bc)
3801 tm->tm_year = -tm->tm_year;
3802 /* correct for our representation of BC years */
3803 if (tm->tm_year < 0)
3804 tm->tm_year++;
3805 }
3806 fmask |= DTK_M(YEAR);
3807 }
3808 else if (tmfc.cc)
3809 {
3810 /* use first year of century */
3811 if (tmfc.bc)
3812 tmfc.cc = -tmfc.cc;
3813 if (tmfc.cc >= 0)
3814 /* +1 because 21st century started in 2001 */
3815 tm->tm_year = (tmfc.cc - 1) * 100 + 1;
3816 else
3817 /* +1 because year == 599 is 600 BC */
3818 tm->tm_year = tmfc.cc * 100 + 1;
3819 fmask |= DTK_M(YEAR);
3820 }
3821
3822 if (tmfc.j)
3823 {
3824 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3825 fmask |= DTK_DATE_M;
3826 }
3827
3828 if (tmfc.ww)
3829 {
3830 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3831 {
3832 /*
3833 * If tmfc.d is not set, then the date is left at the beginning of
3834 * the ISO week (Monday).
3835 */
3836 if (tmfc.d)
3837 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3838 else
3839 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3840 fmask |= DTK_DATE_M;
3841 }
3842 else
3843 tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
3844 }
3845
3846 if (tmfc.w)
3847 tmfc.dd = (tmfc.w - 1) * 7 + 1;
3848 if (tmfc.dd)
3849 {
3850 tm->tm_mday = tmfc.dd;
3851 fmask |= DTK_M(DAY);
3852 }
3853 if (tmfc.mm)
3854 {
3855 tm->tm_mon = tmfc.mm;
3856 fmask |= DTK_M(MONTH);
3857 }
3858
3859 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
3860 {
3861 /*
3862 * The month and day field have not been set, so we use the
3863 * day-of-year field to populate them. Depending on the date mode,
3864 * this field may be interpreted as a Gregorian day-of-year, or an ISO
3865 * week date day-of-year.
3866 */
3867
3868 if (!tm->tm_year && !tmfc.bc)
3869 ereport(ERROR,
3870 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3871 errmsg("cannot calculate day of year without year information")));
3872
3873 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3874 {
3875 int j0; /* zeroth day of the ISO year, in Julian */
3876
3877 j0 = isoweek2j(tm->tm_year, 1) - 1;
3878
3879 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3880 fmask |= DTK_DATE_M;
3881 }
3882 else
3883 {
3884 const int *y;
3885 int i;
3886
3887 static const int ysum[2][13] = {
3888 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
3889 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
3890
3891 y = ysum[isleap(tm->tm_year)];
3892
3893 for (i = 1; i <= MONTHS_PER_YEAR; i++)
3894 {
3895 if (tmfc.ddd <= y[i])
3896 break;
3897 }
3898 if (tm->tm_mon <= 1)
3899 tm->tm_mon = i;
3900
3901 if (tm->tm_mday <= 1)
3902 tm->tm_mday = tmfc.ddd - y[i - 1];
3903
3904 fmask |= DTK_M(MONTH) | DTK_M(DAY);
3905 }
3906 }
3907
3908 if (tmfc.ms)
3909 *fsec += tmfc.ms * 1000;
3910 if (tmfc.us)
3911 *fsec += tmfc.us;
3912
3913 /* Range-check date fields according to bit mask computed above */
3914 if (fmask != 0)
3915 {
3916 /* We already dealt with AD/BC, so pass isjulian = true */
3917 int dterr = ValidateDate(fmask, true, false, false, tm);
3918
3919 if (dterr != 0)
3920 {
3921 /*
3922 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
3923 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
3924 * irrelevant hint about datestyle.
3925 */
3926 DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
3927 }
3928 }
3929
3930 /* Range-check time fields too */
3931 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
3932 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
3933 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
3934 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
3935 DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
3936
3937 /* Save parsed time-zone into tm->tm_zone if it was specified */
3938 if (tmfc.tzsign)
3939 {
3940 char *tz;
3941
3942 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
3943 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
3944 DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp");
3945
3946 tz = psprintf("%c%02d:%02d",
3947 tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
3948
3949 tm->tm_zone = tz;
3950 }
3951
3952 DEBUG_TM(tm);
3953
3954 pfree(date_str);
3955 }
3956
3957
3958 /**********************************************************************
3959 * the NUMBER version part
3960 *********************************************************************/
3961
3962
3963 static char *
fill_str(char * str,int c,int max)3964 fill_str(char *str, int c, int max)
3965 {
3966 memset(str, c, max);
3967 *(str + max) = '\0';
3968 return str;
3969 }
3970
3971 #define zeroize_NUM(_n) \
3972 do { \
3973 (_n)->flag = 0; \
3974 (_n)->lsign = 0; \
3975 (_n)->pre = 0; \
3976 (_n)->post = 0; \
3977 (_n)->pre_lsign_num = 0; \
3978 (_n)->need_locale = 0; \
3979 (_n)->multi = 0; \
3980 (_n)->zero_start = 0; \
3981 (_n)->zero_end = 0; \
3982 } while(0)
3983
3984 /* select a NUMCacheEntry to hold the given format picture */
3985 static NUMCacheEntry *
NUM_cache_getnew(const char * str)3986 NUM_cache_getnew(const char *str)
3987 {
3988 NUMCacheEntry *ent;
3989
3990 /* counter overflow check - paranoia? */
3991 if (NUMCounter >= (INT_MAX - NUM_CACHE_ENTRIES))
3992 {
3993 NUMCounter = 0;
3994
3995 for (ent = NUMCache; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
3996 ent->age = (++NUMCounter);
3997 }
3998
3999 /*
4000 * If cache is full, remove oldest entry (or recycle first not-valid one)
4001 */
4002 if (n_NUMCache >= NUM_CACHE_ENTRIES)
4003 {
4004 NUMCacheEntry *old = NUMCache + 0;
4005
4006 #ifdef DEBUG_TO_FROM_CHAR
4007 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4008 #endif
4009 if (old->valid)
4010 {
4011 for (ent = NUMCache + 1; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
4012 {
4013 if (!ent->valid)
4014 {
4015 old = ent;
4016 break;
4017 }
4018 if (ent->age < old->age)
4019 old = ent;
4020 }
4021 }
4022 #ifdef DEBUG_TO_FROM_CHAR
4023 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4024 #endif
4025 old->valid = false;
4026 StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
4027 old->age = (++NUMCounter);
4028 /* caller is expected to fill format and Num, then set valid */
4029 return old;
4030 }
4031 else
4032 {
4033 #ifdef DEBUG_TO_FROM_CHAR
4034 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4035 #endif
4036 ent = NUMCache + n_NUMCache;
4037 ent->valid = false;
4038 StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4039 ent->age = (++NUMCounter);
4040 /* caller is expected to fill format and Num, then set valid */
4041 ++n_NUMCache;
4042 return ent;
4043 }
4044 }
4045
4046 /* look for an existing NUMCacheEntry matching the given format picture */
4047 static NUMCacheEntry *
NUM_cache_search(const char * str)4048 NUM_cache_search(const char *str)
4049 {
4050 int i;
4051 NUMCacheEntry *ent;
4052
4053 /* counter overflow check - paranoia? */
4054 if (NUMCounter >= (INT_MAX - NUM_CACHE_ENTRIES))
4055 {
4056 NUMCounter = 0;
4057
4058 for (ent = NUMCache; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
4059 ent->age = (++NUMCounter);
4060 }
4061
4062 for (i = 0, ent = NUMCache; i < n_NUMCache; i++, ent++)
4063 {
4064 if (ent->valid && strcmp(ent->str, str) == 0)
4065 {
4066 ent->age = (++NUMCounter);
4067 return ent;
4068 }
4069 }
4070
4071 return NULL;
4072 }
4073
4074 /* Find or create a NUMCacheEntry for the given format picture */
4075 static NUMCacheEntry *
NUM_cache_fetch(const char * str)4076 NUM_cache_fetch(const char *str)
4077 {
4078 NUMCacheEntry *ent;
4079
4080 if ((ent = NUM_cache_search(str)) == NULL)
4081 {
4082 /*
4083 * Not in the cache, must run parser and save a new format-picture to
4084 * the cache. Do not mark the cache entry valid until parsing
4085 * succeeds.
4086 */
4087 ent = NUM_cache_getnew(str);
4088
4089 zeroize_NUM(&ent->Num);
4090
4091 parse_format(ent->format, str, NUM_keywords,
4092 NULL, NUM_index, NUM_TYPE, &ent->Num);
4093
4094 ent->valid = true;
4095 }
4096 return ent;
4097 }
4098
4099 /* ----------
4100 * Cache routine for NUM to_char version
4101 * ----------
4102 */
4103 static FormatNode *
NUM_cache(int len,NUMDesc * Num,text * pars_str,bool * shouldFree)4104 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4105 {
4106 FormatNode *format = NULL;
4107 char *str;
4108
4109 str = text_to_cstring(pars_str);
4110
4111 if (len > NUM_CACHE_SIZE)
4112 {
4113 /*
4114 * Allocate new memory if format picture is bigger than static cache
4115 * and do not use cache (call parser always)
4116 */
4117 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4118
4119 *shouldFree = true;
4120
4121 zeroize_NUM(Num);
4122
4123 parse_format(format, str, NUM_keywords,
4124 NULL, NUM_index, NUM_TYPE, Num);
4125 }
4126 else
4127 {
4128 /*
4129 * Use cache buffers
4130 */
4131 NUMCacheEntry *ent = NUM_cache_fetch(str);
4132
4133 *shouldFree = false;
4134
4135 format = ent->format;
4136
4137 /*
4138 * Copy cache to used struct
4139 */
4140 Num->flag = ent->Num.flag;
4141 Num->lsign = ent->Num.lsign;
4142 Num->pre = ent->Num.pre;
4143 Num->post = ent->Num.post;
4144 Num->pre_lsign_num = ent->Num.pre_lsign_num;
4145 Num->need_locale = ent->Num.need_locale;
4146 Num->multi = ent->Num.multi;
4147 Num->zero_start = ent->Num.zero_start;
4148 Num->zero_end = ent->Num.zero_end;
4149 }
4150
4151 #ifdef DEBUG_TO_FROM_CHAR
4152 /* dump_node(format, len); */
4153 dump_index(NUM_keywords, NUM_index);
4154 #endif
4155
4156 pfree(str);
4157 return format;
4158 }
4159
4160
4161 static char *
int_to_roman(int number)4162 int_to_roman(int number)
4163 {
4164 int len = 0,
4165 num = 0;
4166 char *p = NULL,
4167 *result,
4168 numstr[12];
4169
4170 result = (char *) palloc(16);
4171 *result = '\0';
4172
4173 if (number > 3999 || number < 1)
4174 {
4175 fill_str(result, '#', 15);
4176 return result;
4177 }
4178 len = snprintf(numstr, sizeof(numstr), "%d", number);
4179
4180 for (p = numstr; *p != '\0'; p++, --len)
4181 {
4182 num = *p - 49; /* 48 ascii + 1 */
4183 if (num < 0)
4184 continue;
4185
4186 if (len > 3)
4187 {
4188 while (num-- != -1)
4189 strcat(result, "M");
4190 }
4191 else
4192 {
4193 if (len == 3)
4194 strcat(result, rm100[num]);
4195 else if (len == 2)
4196 strcat(result, rm10[num]);
4197 else if (len == 1)
4198 strcat(result, rm1[num]);
4199 }
4200 }
4201 return result;
4202 }
4203
4204
4205
4206 /* ----------
4207 * Locale
4208 * ----------
4209 */
4210 static void
NUM_prepare_locale(NUMProc * Np)4211 NUM_prepare_locale(NUMProc *Np)
4212 {
4213 if (Np->Num->need_locale)
4214 {
4215 struct lconv *lconv;
4216
4217 /*
4218 * Get locales
4219 */
4220 lconv = PGLC_localeconv();
4221
4222 /*
4223 * Positive / Negative number sign
4224 */
4225 if (lconv->negative_sign && *lconv->negative_sign)
4226 Np->L_negative_sign = lconv->negative_sign;
4227 else
4228 Np->L_negative_sign = "-";
4229
4230 if (lconv->positive_sign && *lconv->positive_sign)
4231 Np->L_positive_sign = lconv->positive_sign;
4232 else
4233 Np->L_positive_sign = "+";
4234
4235 /*
4236 * Number decimal point
4237 */
4238 if (lconv->decimal_point && *lconv->decimal_point)
4239 Np->decimal = lconv->decimal_point;
4240
4241 else
4242 Np->decimal = ".";
4243
4244 if (!IS_LDECIMAL(Np->Num))
4245 Np->decimal = ".";
4246
4247 /*
4248 * Number thousands separator
4249 *
4250 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
4251 * but "" for thousands_sep, so we set the thousands_sep too.
4252 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
4253 */
4254 if (lconv->thousands_sep && *lconv->thousands_sep)
4255 Np->L_thousands_sep = lconv->thousands_sep;
4256 /* Make sure thousands separator doesn't match decimal point symbol. */
4257 else if (strcmp(Np->decimal, ",") !=0)
4258 Np->L_thousands_sep = ",";
4259 else
4260 Np->L_thousands_sep = ".";
4261
4262 /*
4263 * Currency symbol
4264 */
4265 if (lconv->currency_symbol && *lconv->currency_symbol)
4266 Np->L_currency_symbol = lconv->currency_symbol;
4267 else
4268 Np->L_currency_symbol = " ";
4269 }
4270 else
4271 {
4272 /*
4273 * Default values
4274 */
4275 Np->L_negative_sign = "-";
4276 Np->L_positive_sign = "+";
4277 Np->decimal = ".";
4278
4279 Np->L_thousands_sep = ",";
4280 Np->L_currency_symbol = " ";
4281 }
4282 }
4283
4284 /* ----------
4285 * Return pointer of last relevant number after decimal point
4286 * 12.0500 --> last relevant is '5'
4287 * 12.0000 --> last relevant is '.'
4288 * If there is no decimal point, return NULL (which will result in same
4289 * behavior as if FM hadn't been specified).
4290 * ----------
4291 */
4292 static char *
get_last_relevant_decnum(char * num)4293 get_last_relevant_decnum(char *num)
4294 {
4295 char *result,
4296 *p = strchr(num, '.');
4297
4298 #ifdef DEBUG_TO_FROM_CHAR
4299 elog(DEBUG_elog_output, "get_last_relevant_decnum()");
4300 #endif
4301
4302 if (!p)
4303 return NULL;
4304
4305 result = p;
4306
4307 while (*(++p))
4308 {
4309 if (*p != '0')
4310 result = p;
4311 }
4312
4313 return result;
4314 }
4315
4316 /*
4317 * These macros are used in NUM_processor() and its subsidiary routines.
4318 * OVERLOAD_TEST: true if we've reached end of input string
4319 * AMOUNT_TEST(s): true if at least s bytes remain in string
4320 */
4321 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
4322 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
4323
4324 /* ----------
4325 * Number extraction for TO_NUMBER()
4326 * ----------
4327 */
4328 static void
NUM_numpart_from_char(NUMProc * Np,int id,int input_len)4329 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
4330 {
4331 bool isread = false;
4332
4333 #ifdef DEBUG_TO_FROM_CHAR
4334 elog(DEBUG_elog_output, " --- scan start --- id=%s",
4335 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
4336 #endif
4337
4338 if (OVERLOAD_TEST)
4339 return;
4340
4341 if (*Np->inout_p == ' ')
4342 Np->inout_p++;
4343
4344 if (OVERLOAD_TEST)
4345 return;
4346
4347 /*
4348 * read sign before number
4349 */
4350 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
4351 (Np->read_pre + Np->read_post) == 0)
4352 {
4353 #ifdef DEBUG_TO_FROM_CHAR
4354 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
4355 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
4356 #endif
4357
4358 /*
4359 * locale sign
4360 */
4361 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
4362 {
4363 int x = 0;
4364
4365 #ifdef DEBUG_TO_FROM_CHAR
4366 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
4367 #endif
4368 if ((x = strlen(Np->L_negative_sign)) &&
4369 AMOUNT_TEST(x) &&
4370 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4371 {
4372 Np->inout_p += x;
4373 *Np->number = '-';
4374 }
4375 else if ((x = strlen(Np->L_positive_sign)) &&
4376 AMOUNT_TEST(x) &&
4377 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4378 {
4379 Np->inout_p += x;
4380 *Np->number = '+';
4381 }
4382 }
4383 else
4384 {
4385 #ifdef DEBUG_TO_FROM_CHAR
4386 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
4387 #endif
4388
4389 /*
4390 * simple + - < >
4391 */
4392 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
4393 *Np->inout_p == '<'))
4394 {
4395 *Np->number = '-'; /* set - */
4396 Np->inout_p++;
4397 }
4398 else if (*Np->inout_p == '+')
4399 {
4400 *Np->number = '+'; /* set + */
4401 Np->inout_p++;
4402 }
4403 }
4404 }
4405
4406 if (OVERLOAD_TEST)
4407 return;
4408
4409 #ifdef DEBUG_TO_FROM_CHAR
4410 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
4411 #endif
4412
4413 /*
4414 * read digit or decimal point
4415 */
4416 if (isdigit((unsigned char) *Np->inout_p))
4417 {
4418 if (Np->read_dec && Np->read_post == Np->Num->post)
4419 return;
4420
4421 *Np->number_p = *Np->inout_p;
4422 Np->number_p++;
4423
4424 if (Np->read_dec)
4425 Np->read_post++;
4426 else
4427 Np->read_pre++;
4428
4429 isread = true;
4430
4431 #ifdef DEBUG_TO_FROM_CHAR
4432 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
4433 #endif
4434 }
4435 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
4436 {
4437 /*
4438 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
4439 * Np->decimal is always just "." if we don't have a D format token.
4440 * So we just unconditionally match to Np->decimal.
4441 */
4442 int x = strlen(Np->decimal);
4443
4444 #ifdef DEBUG_TO_FROM_CHAR
4445 elog(DEBUG_elog_output, "Try read decimal point (%c)",
4446 *Np->inout_p);
4447 #endif
4448 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
4449 {
4450 Np->inout_p += x - 1;
4451 *Np->number_p = '.';
4452 Np->number_p++;
4453 Np->read_dec = true;
4454 isread = true;
4455 }
4456 }
4457
4458 if (OVERLOAD_TEST)
4459 return;
4460
4461 /*
4462 * Read sign behind "last" number
4463 *
4464 * We need sign detection because determine exact position of post-sign is
4465 * difficult:
4466 *
4467 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
4468 * 5.01-
4469 */
4470 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
4471 {
4472 /*
4473 * locale sign (NUM_S) is always anchored behind a last number, if: -
4474 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
4475 * next char is not digit
4476 */
4477 if (IS_LSIGN(Np->Num) && isread &&
4478 (Np->inout_p + 1) < Np->inout + input_len &&
4479 !isdigit((unsigned char) *(Np->inout_p + 1)))
4480 {
4481 int x;
4482 char *tmp = Np->inout_p++;
4483
4484 #ifdef DEBUG_TO_FROM_CHAR
4485 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
4486 #endif
4487 if ((x = strlen(Np->L_negative_sign)) &&
4488 AMOUNT_TEST(x) &&
4489 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4490 {
4491 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4492 *Np->number = '-';
4493 }
4494 else if ((x = strlen(Np->L_positive_sign)) &&
4495 AMOUNT_TEST(x) &&
4496 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4497 {
4498 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4499 *Np->number = '+';
4500 }
4501 if (*Np->number == ' ')
4502 /* no sign read */
4503 Np->inout_p = tmp;
4504 }
4505
4506 /*
4507 * try read non-locale sign, it's happen only if format is not exact
4508 * and we cannot determine sign position of MI/PL/SG, an example:
4509 *
4510 * FM9.999999MI -> 5.01-
4511 *
4512 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
4513 * like to_number('1 -', '9S') where sign is not anchored to last
4514 * number.
4515 */
4516 else if (isread == false && IS_LSIGN(Np->Num) == false &&
4517 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
4518 {
4519 #ifdef DEBUG_TO_FROM_CHAR
4520 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
4521 #endif
4522
4523 /*
4524 * simple + -
4525 */
4526 if (*Np->inout_p == '-' || *Np->inout_p == '+')
4527 /* NUM_processor() do inout_p++ */
4528 *Np->number = *Np->inout_p;
4529 }
4530 }
4531 }
4532
4533 #define IS_PREDEC_SPACE(_n) \
4534 (IS_ZERO((_n)->Num)==false && \
4535 (_n)->number == (_n)->number_p && \
4536 *(_n)->number == '0' && \
4537 (_n)->Num->post != 0)
4538
4539 /* ----------
4540 * Add digit or sign to number-string
4541 * ----------
4542 */
4543 static void
NUM_numpart_to_char(NUMProc * Np,int id)4544 NUM_numpart_to_char(NUMProc *Np, int id)
4545 {
4546 int end;
4547
4548 if (IS_ROMAN(Np->Num))
4549 return;
4550
4551 /* Note: in this elog() output not set '\0' in 'inout' */
4552
4553 #ifdef DEBUG_TO_FROM_CHAR
4554
4555 /*
4556 * Np->num_curr is number of current item in format-picture, it is not
4557 * current position in inout!
4558 */
4559 elog(DEBUG_elog_output,
4560 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
4561 Np->sign_wrote,
4562 Np->num_curr,
4563 Np->number_p,
4564 Np->inout);
4565 #endif
4566 Np->num_in = false;
4567
4568 /*
4569 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
4570 * handle "9.9" --> " .1"
4571 */
4572 if (Np->sign_wrote == false &&
4573 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
4574 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
4575 {
4576 if (IS_LSIGN(Np->Num))
4577 {
4578 if (Np->Num->lsign == NUM_LSIGN_PRE)
4579 {
4580 if (Np->sign == '-')
4581 strcpy(Np->inout_p, Np->L_negative_sign);
4582 else
4583 strcpy(Np->inout_p, Np->L_positive_sign);
4584 Np->inout_p += strlen(Np->inout_p);
4585 Np->sign_wrote = true;
4586 }
4587 }
4588 else if (IS_BRACKET(Np->Num))
4589 {
4590 *Np->inout_p = Np->sign == '+' ? ' ' : '<';
4591 ++Np->inout_p;
4592 Np->sign_wrote = true;
4593 }
4594 else if (Np->sign == '+')
4595 {
4596 if (!IS_FILLMODE(Np->Num))
4597 {
4598 *Np->inout_p = ' '; /* Write + */
4599 ++Np->inout_p;
4600 }
4601 Np->sign_wrote = true;
4602 }
4603 else if (Np->sign == '-')
4604 { /* Write - */
4605 *Np->inout_p = '-';
4606 ++Np->inout_p;
4607 Np->sign_wrote = true;
4608 }
4609 }
4610
4611
4612 /*
4613 * digits / FM / Zero / Dec. point
4614 */
4615 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
4616 {
4617 if (Np->num_curr < Np->out_pre_spaces &&
4618 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
4619 {
4620 /*
4621 * Write blank space
4622 */
4623 if (!IS_FILLMODE(Np->Num))
4624 {
4625 *Np->inout_p = ' '; /* Write ' ' */
4626 ++Np->inout_p;
4627 }
4628 }
4629 else if (IS_ZERO(Np->Num) &&
4630 Np->num_curr < Np->out_pre_spaces &&
4631 Np->Num->zero_start <= Np->num_curr)
4632 {
4633 /*
4634 * Write ZERO
4635 */
4636 *Np->inout_p = '0'; /* Write '0' */
4637 ++Np->inout_p;
4638 Np->num_in = true;
4639 }
4640 else
4641 {
4642 /*
4643 * Write Decimal point
4644 */
4645 if (*Np->number_p == '.')
4646 {
4647 if (!Np->last_relevant || *Np->last_relevant != '.')
4648 {
4649 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4650 Np->inout_p += strlen(Np->inout_p);
4651 }
4652
4653 /*
4654 * Ora 'n' -- FM9.9 --> 'n.'
4655 */
4656 else if (IS_FILLMODE(Np->Num) &&
4657 Np->last_relevant && *Np->last_relevant == '.')
4658 {
4659 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4660 Np->inout_p += strlen(Np->inout_p);
4661 }
4662 }
4663 else
4664 {
4665 /*
4666 * Write Digits
4667 */
4668 if (Np->last_relevant && Np->number_p > Np->last_relevant &&
4669 id != NUM_0)
4670 ;
4671
4672 /*
4673 * '0.1' -- 9.9 --> ' .1'
4674 */
4675 else if (IS_PREDEC_SPACE(Np))
4676 {
4677 if (!IS_FILLMODE(Np->Num))
4678 {
4679 *Np->inout_p = ' ';
4680 ++Np->inout_p;
4681 }
4682
4683 /*
4684 * '0' -- FM9.9 --> '0.'
4685 */
4686 else if (Np->last_relevant && *Np->last_relevant == '.')
4687 {
4688 *Np->inout_p = '0';
4689 ++Np->inout_p;
4690 }
4691 }
4692 else
4693 {
4694 *Np->inout_p = *Np->number_p; /* Write DIGIT */
4695 ++Np->inout_p;
4696 Np->num_in = true;
4697 }
4698 }
4699 /* do no exceed string length */
4700 if (*Np->number_p)
4701 ++Np->number_p;
4702 }
4703
4704 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
4705
4706 if (Np->last_relevant && Np->last_relevant == Np->number_p)
4707 end = Np->num_curr;
4708
4709 if (Np->num_curr + 1 == end)
4710 {
4711 if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
4712 {
4713 *Np->inout_p = Np->sign == '+' ? ' ' : '>';
4714 ++Np->inout_p;
4715 }
4716 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
4717 {
4718 if (Np->sign == '-')
4719 strcpy(Np->inout_p, Np->L_negative_sign);
4720 else
4721 strcpy(Np->inout_p, Np->L_positive_sign);
4722 Np->inout_p += strlen(Np->inout_p);
4723 }
4724 }
4725 }
4726
4727 ++Np->num_curr;
4728 }
4729
4730 /*
4731 * Skip over "n" input characters, but only if they aren't numeric data
4732 */
4733 static void
NUM_eat_non_data_chars(NUMProc * Np,int n,int input_len)4734 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
4735 {
4736 while (n-- > 0)
4737 {
4738 if (OVERLOAD_TEST)
4739 break; /* end of input */
4740 if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
4741 break; /* it's a data character */
4742 Np->inout_p += pg_mblen(Np->inout_p);
4743 }
4744 }
4745
4746 static char *
NUM_processor(FormatNode * node,NUMDesc * Num,char * inout,char * number,int input_len,int to_char_out_pre_spaces,int sign,bool is_to_char,Oid collid)4747 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
4748 char *number, int input_len, int to_char_out_pre_spaces,
4749 int sign, bool is_to_char, Oid collid)
4750 {
4751 FormatNode *n;
4752 NUMProc _Np,
4753 *Np = &_Np;
4754 const char *pattern;
4755 int pattern_len;
4756
4757 MemSet(Np, 0, sizeof(NUMProc));
4758
4759 Np->Num = Num;
4760 Np->is_to_char = is_to_char;
4761 Np->number = number;
4762 Np->inout = inout;
4763 Np->last_relevant = NULL;
4764 Np->read_post = 0;
4765 Np->read_pre = 0;
4766 Np->read_dec = false;
4767
4768 if (Np->Num->zero_start)
4769 --Np->Num->zero_start;
4770
4771 if (IS_EEEE(Np->Num))
4772 {
4773 if (!Np->is_to_char)
4774 ereport(ERROR,
4775 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4776 errmsg("\"EEEE\" not supported for input")));
4777 return strcpy(inout, number);
4778 }
4779
4780 /*
4781 * Roman correction
4782 */
4783 if (IS_ROMAN(Np->Num))
4784 {
4785 if (!Np->is_to_char)
4786 ereport(ERROR,
4787 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4788 errmsg("\"RN\" not supported for input")));
4789
4790 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
4791 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
4792
4793 if (IS_FILLMODE(Np->Num))
4794 {
4795 Np->Num->flag = 0;
4796 Np->Num->flag |= NUM_F_FILLMODE;
4797 }
4798 else
4799 Np->Num->flag = 0;
4800 Np->Num->flag |= NUM_F_ROMAN;
4801 }
4802
4803 /*
4804 * Sign
4805 */
4806 if (is_to_char)
4807 {
4808 Np->sign = sign;
4809
4810 /* MI/PL/SG - write sign itself and not in number */
4811 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
4812 {
4813 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
4814 Np->sign_wrote = false; /* need sign */
4815 else
4816 Np->sign_wrote = true; /* needn't sign */
4817 }
4818 else
4819 {
4820 if (Np->sign != '-')
4821 {
4822 if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
4823 Np->Num->flag &= ~NUM_F_BRACKET;
4824 if (IS_MINUS(Np->Num))
4825 Np->Num->flag &= ~NUM_F_MINUS;
4826 }
4827 else if (Np->sign != '+' && IS_PLUS(Np->Num))
4828 Np->Num->flag &= ~NUM_F_PLUS;
4829
4830 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
4831 Np->sign_wrote = true; /* needn't sign */
4832 else
4833 Np->sign_wrote = false; /* need sign */
4834
4835 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
4836 Np->Num->lsign = NUM_LSIGN_POST;
4837 }
4838 }
4839 else
4840 Np->sign = false;
4841
4842 /*
4843 * Count
4844 */
4845 Np->num_count = Np->Num->post + Np->Num->pre - 1;
4846
4847 if (is_to_char)
4848 {
4849 Np->out_pre_spaces = to_char_out_pre_spaces;
4850
4851 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
4852 {
4853 Np->last_relevant = get_last_relevant_decnum(Np->number);
4854
4855 /*
4856 * If any '0' specifiers are present, make sure we don't strip
4857 * those digits.
4858 */
4859 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
4860 {
4861 char *last_zero;
4862
4863 last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
4864 if (Np->last_relevant < last_zero)
4865 Np->last_relevant = last_zero;
4866 }
4867 }
4868
4869 if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
4870 ++Np->num_count;
4871 }
4872 else
4873 {
4874 Np->out_pre_spaces = 0;
4875 *Np->number = ' '; /* sign space */
4876 *(Np->number + 1) = '\0';
4877 }
4878
4879 Np->num_in = 0;
4880 Np->num_curr = 0;
4881
4882 #ifdef DEBUG_TO_FROM_CHAR
4883 elog(DEBUG_elog_output,
4884 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
4885 Np->sign,
4886 Np->number,
4887 Np->Num->pre,
4888 Np->Num->post,
4889 Np->num_count,
4890 Np->out_pre_spaces,
4891 Np->sign_wrote ? "Yes" : "No",
4892 IS_ZERO(Np->Num) ? "Yes" : "No",
4893 Np->Num->zero_start,
4894 Np->Num->zero_end,
4895 Np->last_relevant ? Np->last_relevant : "<not set>",
4896 IS_BRACKET(Np->Num) ? "Yes" : "No",
4897 IS_PLUS(Np->Num) ? "Yes" : "No",
4898 IS_MINUS(Np->Num) ? "Yes" : "No",
4899 IS_FILLMODE(Np->Num) ? "Yes" : "No",
4900 IS_ROMAN(Np->Num) ? "Yes" : "No",
4901 IS_EEEE(Np->Num) ? "Yes" : "No"
4902 );
4903 #endif
4904
4905 /*
4906 * Locale
4907 */
4908 NUM_prepare_locale(Np);
4909
4910 /*
4911 * Processor direct cycle
4912 */
4913 if (Np->is_to_char)
4914 Np->number_p = Np->number;
4915 else
4916 Np->number_p = Np->number + 1; /* first char is space for sign */
4917
4918 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
4919 {
4920 if (!Np->is_to_char)
4921 {
4922 /*
4923 * Check at least one byte remains to be scanned. (In actions
4924 * below, must use AMOUNT_TEST if we want to read more bytes than
4925 * that.)
4926 */
4927 if (OVERLOAD_TEST)
4928 break;
4929 }
4930
4931 /*
4932 * Format pictures actions
4933 */
4934 if (n->type == NODE_TYPE_ACTION)
4935 {
4936 /*
4937 * Create/read digit/zero/blank/sign/special-case
4938 *
4939 * 'NUM_S' note: The locale sign is anchored to number and we
4940 * read/write it when we work with first or last number
4941 * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
4942 *
4943 * Notice the "Np->inout_p++" at the bottom of the loop. This is
4944 * why most of the actions advance inout_p one less than you might
4945 * expect. In cases where we don't want that increment to happen,
4946 * a switch case ends with "continue" not "break".
4947 */
4948 switch (n->key->id)
4949 {
4950 case NUM_9:
4951 case NUM_0:
4952 case NUM_DEC:
4953 case NUM_D:
4954 if (Np->is_to_char)
4955 {
4956 NUM_numpart_to_char(Np, n->key->id);
4957 continue; /* for() */
4958 }
4959 else
4960 {
4961 NUM_numpart_from_char(Np, n->key->id, input_len);
4962 break; /* switch() case: */
4963 }
4964
4965 case NUM_COMMA:
4966 if (Np->is_to_char)
4967 {
4968 if (!Np->num_in)
4969 {
4970 if (IS_FILLMODE(Np->Num))
4971 continue;
4972 else
4973 *Np->inout_p = ' ';
4974 }
4975 else
4976 *Np->inout_p = ',';
4977 }
4978 else
4979 {
4980 if (!Np->num_in)
4981 {
4982 if (IS_FILLMODE(Np->Num))
4983 continue;
4984 }
4985 if (*Np->inout_p != ',')
4986 continue;
4987 }
4988 break;
4989
4990 case NUM_G:
4991 pattern = Np->L_thousands_sep;
4992 pattern_len = strlen(pattern);
4993 if (Np->is_to_char)
4994 {
4995 if (!Np->num_in)
4996 {
4997 if (IS_FILLMODE(Np->Num))
4998 continue;
4999 else
5000 {
5001 /* just in case there are MB chars */
5002 pattern_len = pg_mbstrlen(pattern);
5003 memset(Np->inout_p, ' ', pattern_len);
5004 Np->inout_p += pattern_len - 1;
5005 }
5006 }
5007 else
5008 {
5009 strcpy(Np->inout_p, pattern);
5010 Np->inout_p += pattern_len - 1;
5011 }
5012 }
5013 else
5014 {
5015 if (!Np->num_in)
5016 {
5017 if (IS_FILLMODE(Np->Num))
5018 continue;
5019 }
5020
5021 /*
5022 * Because L_thousands_sep typically contains data
5023 * characters (either '.' or ','), we can't use
5024 * NUM_eat_non_data_chars here. Instead skip only if
5025 * the input matches L_thousands_sep.
5026 */
5027 if (AMOUNT_TEST(pattern_len) &&
5028 strncmp(Np->inout_p, pattern, pattern_len) == 0)
5029 Np->inout_p += pattern_len - 1;
5030 else
5031 continue;
5032 }
5033 break;
5034
5035 case NUM_L:
5036 pattern = Np->L_currency_symbol;
5037 if (Np->is_to_char)
5038 {
5039 strcpy(Np->inout_p, pattern);
5040 Np->inout_p += strlen(pattern) - 1;
5041 }
5042 else
5043 {
5044 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5045 continue;
5046 }
5047 break;
5048
5049 case NUM_RN:
5050 if (IS_FILLMODE(Np->Num))
5051 {
5052 strcpy(Np->inout_p, Np->number_p);
5053 Np->inout_p += strlen(Np->inout_p) - 1;
5054 }
5055 else
5056 {
5057 sprintf(Np->inout_p, "%15s", Np->number_p);
5058 Np->inout_p += strlen(Np->inout_p) - 1;
5059 }
5060 break;
5061
5062 case NUM_rn:
5063 if (IS_FILLMODE(Np->Num))
5064 {
5065 strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5066 Np->inout_p += strlen(Np->inout_p) - 1;
5067 }
5068 else
5069 {
5070 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5071 Np->inout_p += strlen(Np->inout_p) - 1;
5072 }
5073 break;
5074
5075 case NUM_th:
5076 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5077 Np->sign == '-' || IS_DECIMAL(Np->Num))
5078 continue;
5079
5080 if (Np->is_to_char)
5081 {
5082 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5083 Np->inout_p += 1;
5084 }
5085 else
5086 {
5087 /* All variants of 'th' occupy 2 characters */
5088 NUM_eat_non_data_chars(Np, 2, input_len);
5089 continue;
5090 }
5091 break;
5092
5093 case NUM_TH:
5094 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5095 Np->sign == '-' || IS_DECIMAL(Np->Num))
5096 continue;
5097
5098 if (Np->is_to_char)
5099 {
5100 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5101 Np->inout_p += 1;
5102 }
5103 else
5104 {
5105 /* All variants of 'TH' occupy 2 characters */
5106 NUM_eat_non_data_chars(Np, 2, input_len);
5107 continue;
5108 }
5109 break;
5110
5111 case NUM_MI:
5112 if (Np->is_to_char)
5113 {
5114 if (Np->sign == '-')
5115 *Np->inout_p = '-';
5116 else if (IS_FILLMODE(Np->Num))
5117 continue;
5118 else
5119 *Np->inout_p = ' ';
5120 }
5121 else
5122 {
5123 if (*Np->inout_p == '-')
5124 *Np->number = '-';
5125 else
5126 {
5127 NUM_eat_non_data_chars(Np, 1, input_len);
5128 continue;
5129 }
5130 }
5131 break;
5132
5133 case NUM_PL:
5134 if (Np->is_to_char)
5135 {
5136 if (Np->sign == '+')
5137 *Np->inout_p = '+';
5138 else if (IS_FILLMODE(Np->Num))
5139 continue;
5140 else
5141 *Np->inout_p = ' ';
5142 }
5143 else
5144 {
5145 if (*Np->inout_p == '+')
5146 *Np->number = '+';
5147 else
5148 {
5149 NUM_eat_non_data_chars(Np, 1, input_len);
5150 continue;
5151 }
5152 }
5153 break;
5154
5155 case NUM_SG:
5156 if (Np->is_to_char)
5157 *Np->inout_p = Np->sign;
5158 else
5159 {
5160 if (*Np->inout_p == '-')
5161 *Np->number = '-';
5162 else if (*Np->inout_p == '+')
5163 *Np->number = '+';
5164 else
5165 {
5166 NUM_eat_non_data_chars(Np, 1, input_len);
5167 continue;
5168 }
5169 }
5170 break;
5171
5172 default:
5173 continue;
5174 break;
5175 }
5176 }
5177 else
5178 {
5179 /*
5180 * In TO_CHAR, non-pattern characters in the format are copied to
5181 * the output. In TO_NUMBER, we skip one input character for each
5182 * non-pattern format character, whether or not it matches the
5183 * format character.
5184 */
5185 if (Np->is_to_char)
5186 {
5187 strcpy(Np->inout_p, n->character);
5188 Np->inout_p += strlen(Np->inout_p);
5189 }
5190 else
5191 {
5192 Np->inout_p += pg_mblen(Np->inout_p);
5193 }
5194 continue;
5195 }
5196 Np->inout_p++;
5197 }
5198
5199 if (Np->is_to_char)
5200 {
5201 *Np->inout_p = '\0';
5202 return Np->inout;
5203 }
5204 else
5205 {
5206 if (*(Np->number_p - 1) == '.')
5207 *(Np->number_p - 1) = '\0';
5208 else
5209 *Np->number_p = '\0';
5210
5211 /*
5212 * Correction - precision of dec. number
5213 */
5214 Np->Num->post = Np->read_post;
5215
5216 #ifdef DEBUG_TO_FROM_CHAR
5217 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
5218 #endif
5219 return Np->number;
5220 }
5221 }
5222
5223 /* ----------
5224 * MACRO: Start part of NUM - for all NUM's to_char variants
5225 * (sorry, but I hate copy same code - macro is better..)
5226 * ----------
5227 */
5228 #define NUM_TOCHAR_prepare \
5229 do { \
5230 int len = VARSIZE_ANY_EXHDR(fmt); \
5231 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
5232 PG_RETURN_TEXT_P(cstring_to_text("")); \
5233 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
5234 format = NUM_cache(len, &Num, fmt, &shouldFree); \
5235 } while (0)
5236
5237 /* ----------
5238 * MACRO: Finish part of NUM
5239 * ----------
5240 */
5241 #define NUM_TOCHAR_finish \
5242 do { \
5243 int len; \
5244 \
5245 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
5246 \
5247 if (shouldFree) \
5248 pfree(format); \
5249 \
5250 /* \
5251 * Convert null-terminated representation of result to standard text. \
5252 * The result is usually much bigger than it needs to be, but there \
5253 * seems little point in realloc'ing it smaller. \
5254 */ \
5255 len = strlen(VARDATA(result)); \
5256 SET_VARSIZE(result, len + VARHDRSZ); \
5257 } while (0)
5258
5259 /* -------------------
5260 * NUMERIC to_number() (convert string to numeric)
5261 * -------------------
5262 */
5263 Datum
numeric_to_number(PG_FUNCTION_ARGS)5264 numeric_to_number(PG_FUNCTION_ARGS)
5265 {
5266 text *value = PG_GETARG_TEXT_PP(0);
5267 text *fmt = PG_GETARG_TEXT_PP(1);
5268 NUMDesc Num;
5269 Datum result;
5270 FormatNode *format;
5271 char *numstr;
5272 bool shouldFree;
5273 int len = 0;
5274 int scale,
5275 precision;
5276
5277 len = VARSIZE_ANY_EXHDR(fmt);
5278
5279 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
5280 PG_RETURN_NULL();
5281
5282 format = NUM_cache(len, &Num, fmt, &shouldFree);
5283
5284 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
5285
5286 NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
5287 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
5288
5289 scale = Num.post;
5290 precision = Num.pre + Num.multi + scale;
5291
5292 if (shouldFree)
5293 pfree(format);
5294
5295 result = DirectFunctionCall3(numeric_in,
5296 CStringGetDatum(numstr),
5297 ObjectIdGetDatum(InvalidOid),
5298 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
5299
5300 if (IS_MULTI(&Num))
5301 {
5302 Numeric x;
5303 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5304 Int32GetDatum(10)));
5305 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5306 Int32GetDatum(-Num.multi)));
5307
5308 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
5309 NumericGetDatum(a),
5310 NumericGetDatum(b)));
5311 result = DirectFunctionCall2(numeric_mul,
5312 result,
5313 NumericGetDatum(x));
5314 }
5315
5316 pfree(numstr);
5317 return result;
5318 }
5319
5320 /* ------------------
5321 * NUMERIC to_char()
5322 * ------------------
5323 */
5324 Datum
numeric_to_char(PG_FUNCTION_ARGS)5325 numeric_to_char(PG_FUNCTION_ARGS)
5326 {
5327 Numeric value = PG_GETARG_NUMERIC(0);
5328 text *fmt = PG_GETARG_TEXT_PP(1);
5329 NUMDesc Num;
5330 FormatNode *format;
5331 text *result;
5332 bool shouldFree;
5333 int out_pre_spaces = 0,
5334 sign = 0;
5335 char *numstr,
5336 *orgnum,
5337 *p;
5338 Numeric x;
5339
5340 NUM_TOCHAR_prepare;
5341
5342 /*
5343 * On DateType depend part (numeric)
5344 */
5345 if (IS_ROMAN(&Num))
5346 {
5347 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
5348 NumericGetDatum(value),
5349 Int32GetDatum(0)));
5350 numstr = orgnum =
5351 int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
5352 NumericGetDatum(x))));
5353 }
5354 else if (IS_EEEE(&Num))
5355 {
5356 orgnum = numeric_out_sci(value, Num.post);
5357
5358 /*
5359 * numeric_out_sci() does not emit a sign for positive numbers. We
5360 * need to add a space in this case so that positive and negative
5361 * numbers are aligned. We also have to do the right thing for NaN.
5362 */
5363 if (strcmp(orgnum, "NaN") == 0)
5364 {
5365 /*
5366 * Allow 6 characters for the leading sign, the decimal point,
5367 * "e", the exponent's sign and two exponent digits.
5368 */
5369 numstr = (char *) palloc(Num.pre + Num.post + 7);
5370 fill_str(numstr, '#', Num.pre + Num.post + 6);
5371 *numstr = ' ';
5372 *(numstr + Num.pre + 1) = '.';
5373 }
5374 else if (*orgnum != '-')
5375 {
5376 numstr = (char *) palloc(strlen(orgnum) + 2);
5377 *numstr = ' ';
5378 strcpy(numstr + 1, orgnum);
5379 }
5380 else
5381 {
5382 numstr = orgnum;
5383 }
5384 }
5385 else
5386 {
5387 int numstr_pre_len;
5388 Numeric val = value;
5389
5390 if (IS_MULTI(&Num))
5391 {
5392 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5393 Int32GetDatum(10)));
5394 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5395 Int32GetDatum(Num.multi)));
5396
5397 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
5398 NumericGetDatum(a),
5399 NumericGetDatum(b)));
5400 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
5401 NumericGetDatum(value),
5402 NumericGetDatum(x)));
5403 Num.pre += Num.multi;
5404 }
5405
5406 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
5407 NumericGetDatum(val),
5408 Int32GetDatum(Num.post)));
5409 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
5410 NumericGetDatum(x)));
5411
5412 if (*orgnum == '-')
5413 {
5414 sign = '-';
5415 numstr = orgnum + 1;
5416 }
5417 else
5418 {
5419 sign = '+';
5420 numstr = orgnum;
5421 }
5422
5423 if ((p = strchr(numstr, '.')))
5424 numstr_pre_len = p - numstr;
5425 else
5426 numstr_pre_len = strlen(numstr);
5427
5428 /* needs padding? */
5429 if (numstr_pre_len < Num.pre)
5430 out_pre_spaces = Num.pre - numstr_pre_len;
5431 /* overflowed prefix digit format? */
5432 else if (numstr_pre_len > Num.pre)
5433 {
5434 numstr = (char *) palloc(Num.pre + Num.post + 2);
5435 fill_str(numstr, '#', Num.pre + Num.post + 1);
5436 *(numstr + Num.pre) = '.';
5437 }
5438 }
5439
5440 NUM_TOCHAR_finish;
5441 PG_RETURN_TEXT_P(result);
5442 }
5443
5444 /* ---------------
5445 * INT4 to_char()
5446 * ---------------
5447 */
5448 Datum
int4_to_char(PG_FUNCTION_ARGS)5449 int4_to_char(PG_FUNCTION_ARGS)
5450 {
5451 int32 value = PG_GETARG_INT32(0);
5452 text *fmt = PG_GETARG_TEXT_PP(1);
5453 NUMDesc Num;
5454 FormatNode *format;
5455 text *result;
5456 bool shouldFree;
5457 int out_pre_spaces = 0,
5458 sign = 0;
5459 char *numstr,
5460 *orgnum;
5461
5462 NUM_TOCHAR_prepare;
5463
5464 /*
5465 * On DateType depend part (int32)
5466 */
5467 if (IS_ROMAN(&Num))
5468 numstr = orgnum = int_to_roman(value);
5469 else if (IS_EEEE(&Num))
5470 {
5471 /* we can do it easily because float8 won't lose any precision */
5472 float8 val = (float8) value;
5473
5474 orgnum = (char *) psprintf("%+.*e", Num.post, val);
5475
5476 /*
5477 * Swap a leading positive sign for a space.
5478 */
5479 if (*orgnum == '+')
5480 *orgnum = ' ';
5481
5482 numstr = orgnum;
5483 }
5484 else
5485 {
5486 int numstr_pre_len;
5487
5488 if (IS_MULTI(&Num))
5489 {
5490 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
5491 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
5492 Num.pre += Num.multi;
5493 }
5494 else
5495 {
5496 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
5497 Int32GetDatum(value)));
5498 }
5499
5500 if (*orgnum == '-')
5501 {
5502 sign = '-';
5503 orgnum++;
5504 }
5505 else
5506 sign = '+';
5507
5508 numstr_pre_len = strlen(orgnum);
5509
5510 /* post-decimal digits? Pad out with zeros. */
5511 if (Num.post)
5512 {
5513 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
5514 strcpy(numstr, orgnum);
5515 *(numstr + numstr_pre_len) = '.';
5516 memset(numstr + numstr_pre_len + 1, '0', Num.post);
5517 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
5518 }
5519 else
5520 numstr = orgnum;
5521
5522 /* needs padding? */
5523 if (numstr_pre_len < Num.pre)
5524 out_pre_spaces = Num.pre - numstr_pre_len;
5525 /* overflowed prefix digit format? */
5526 else if (numstr_pre_len > Num.pre)
5527 {
5528 numstr = (char *) palloc(Num.pre + Num.post + 2);
5529 fill_str(numstr, '#', Num.pre + Num.post + 1);
5530 *(numstr + Num.pre) = '.';
5531 }
5532 }
5533
5534 NUM_TOCHAR_finish;
5535 PG_RETURN_TEXT_P(result);
5536 }
5537
5538 /* ---------------
5539 * INT8 to_char()
5540 * ---------------
5541 */
5542 Datum
int8_to_char(PG_FUNCTION_ARGS)5543 int8_to_char(PG_FUNCTION_ARGS)
5544 {
5545 int64 value = PG_GETARG_INT64(0);
5546 text *fmt = PG_GETARG_TEXT_PP(1);
5547 NUMDesc Num;
5548 FormatNode *format;
5549 text *result;
5550 bool shouldFree;
5551 int out_pre_spaces = 0,
5552 sign = 0;
5553 char *numstr,
5554 *orgnum;
5555
5556 NUM_TOCHAR_prepare;
5557
5558 /*
5559 * On DateType depend part (int32)
5560 */
5561 if (IS_ROMAN(&Num))
5562 {
5563 /* Currently don't support int8 conversion to roman... */
5564 numstr = orgnum = int_to_roman(DatumGetInt32(
5565 DirectFunctionCall1(int84, Int64GetDatum(value))));
5566 }
5567 else if (IS_EEEE(&Num))
5568 {
5569 /* to avoid loss of precision, must go via numeric not float8 */
5570 Numeric val;
5571
5572 val = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
5573 Int64GetDatum(value)));
5574 orgnum = numeric_out_sci(val, Num.post);
5575
5576 /*
5577 * numeric_out_sci() does not emit a sign for positive numbers. We
5578 * need to add a space in this case so that positive and negative
5579 * numbers are aligned. We don't have to worry about NaN here.
5580 */
5581 if (*orgnum != '-')
5582 {
5583 numstr = (char *) palloc(strlen(orgnum) + 2);
5584 *numstr = ' ';
5585 strcpy(numstr + 1, orgnum);
5586 }
5587 else
5588 {
5589 numstr = orgnum;
5590 }
5591 }
5592 else
5593 {
5594 int numstr_pre_len;
5595
5596 if (IS_MULTI(&Num))
5597 {
5598 double multi = pow((double) 10, (double) Num.multi);
5599
5600 value = DatumGetInt64(DirectFunctionCall2(int8mul,
5601 Int64GetDatum(value),
5602 DirectFunctionCall1(dtoi8,
5603 Float8GetDatum(multi))));
5604 Num.pre += Num.multi;
5605 }
5606
5607 orgnum = DatumGetCString(DirectFunctionCall1(int8out,
5608 Int64GetDatum(value)));
5609
5610 if (*orgnum == '-')
5611 {
5612 sign = '-';
5613 orgnum++;
5614 }
5615 else
5616 sign = '+';
5617
5618 numstr_pre_len = strlen(orgnum);
5619
5620 /* post-decimal digits? Pad out with zeros. */
5621 if (Num.post)
5622 {
5623 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
5624 strcpy(numstr, orgnum);
5625 *(numstr + numstr_pre_len) = '.';
5626 memset(numstr + numstr_pre_len + 1, '0', Num.post);
5627 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
5628 }
5629 else
5630 numstr = orgnum;
5631
5632 /* needs padding? */
5633 if (numstr_pre_len < Num.pre)
5634 out_pre_spaces = Num.pre - numstr_pre_len;
5635 /* overflowed prefix digit format? */
5636 else if (numstr_pre_len > Num.pre)
5637 {
5638 numstr = (char *) palloc(Num.pre + Num.post + 2);
5639 fill_str(numstr, '#', Num.pre + Num.post + 1);
5640 *(numstr + Num.pre) = '.';
5641 }
5642 }
5643
5644 NUM_TOCHAR_finish;
5645 PG_RETURN_TEXT_P(result);
5646 }
5647
5648 /* -----------------
5649 * FLOAT4 to_char()
5650 * -----------------
5651 */
5652 Datum
float4_to_char(PG_FUNCTION_ARGS)5653 float4_to_char(PG_FUNCTION_ARGS)
5654 {
5655 float4 value = PG_GETARG_FLOAT4(0);
5656 text *fmt = PG_GETARG_TEXT_PP(1);
5657 NUMDesc Num;
5658 FormatNode *format;
5659 text *result;
5660 bool shouldFree;
5661 int out_pre_spaces = 0,
5662 sign = 0;
5663 char *numstr,
5664 *orgnum,
5665 *p;
5666
5667 NUM_TOCHAR_prepare;
5668
5669 if (IS_ROMAN(&Num))
5670 numstr = orgnum = int_to_roman((int) rint(value));
5671 else if (IS_EEEE(&Num))
5672 {
5673 if (isnan(value) || is_infinite(value))
5674 {
5675 /*
5676 * Allow 6 characters for the leading sign, the decimal point,
5677 * "e", the exponent's sign and two exponent digits.
5678 */
5679 numstr = (char *) palloc(Num.pre + Num.post + 7);
5680 fill_str(numstr, '#', Num.pre + Num.post + 6);
5681 *numstr = ' ';
5682 *(numstr + Num.pre + 1) = '.';
5683 }
5684 else
5685 {
5686 numstr = orgnum = psprintf("%+.*e", Num.post, value);
5687
5688 /*
5689 * Swap a leading positive sign for a space.
5690 */
5691 if (*orgnum == '+')
5692 *orgnum = ' ';
5693
5694 numstr = orgnum;
5695 }
5696 }
5697 else
5698 {
5699 float4 val = value;
5700 int numstr_pre_len;
5701
5702 if (IS_MULTI(&Num))
5703 {
5704 float multi = pow((double) 10, (double) Num.multi);
5705
5706 val = value * multi;
5707 Num.pre += Num.multi;
5708 }
5709
5710 orgnum = (char *) psprintf("%.0f", fabs(val));
5711 numstr_pre_len = strlen(orgnum);
5712
5713 /* adjust post digits to fit max float digits */
5714 if (numstr_pre_len >= FLT_DIG)
5715 Num.post = 0;
5716 else if (numstr_pre_len + Num.post > FLT_DIG)
5717 Num.post = FLT_DIG - numstr_pre_len;
5718 orgnum = psprintf("%.*f", Num.post, val);
5719
5720 if (*orgnum == '-')
5721 { /* < 0 */
5722 sign = '-';
5723 numstr = orgnum + 1;
5724 }
5725 else
5726 {
5727 sign = '+';
5728 numstr = orgnum;
5729 }
5730
5731 if ((p = strchr(numstr, '.')))
5732 numstr_pre_len = p - numstr;
5733 else
5734 numstr_pre_len = strlen(numstr);
5735
5736 /* needs padding? */
5737 if (numstr_pre_len < Num.pre)
5738 out_pre_spaces = Num.pre - numstr_pre_len;
5739 /* overflowed prefix digit format? */
5740 else if (numstr_pre_len > Num.pre)
5741 {
5742 numstr = (char *) palloc(Num.pre + Num.post + 2);
5743 fill_str(numstr, '#', Num.pre + Num.post + 1);
5744 *(numstr + Num.pre) = '.';
5745 }
5746 }
5747
5748 NUM_TOCHAR_finish;
5749 PG_RETURN_TEXT_P(result);
5750 }
5751
5752 /* -----------------
5753 * FLOAT8 to_char()
5754 * -----------------
5755 */
5756 Datum
float8_to_char(PG_FUNCTION_ARGS)5757 float8_to_char(PG_FUNCTION_ARGS)
5758 {
5759 float8 value = PG_GETARG_FLOAT8(0);
5760 text *fmt = PG_GETARG_TEXT_PP(1);
5761 NUMDesc Num;
5762 FormatNode *format;
5763 text *result;
5764 bool shouldFree;
5765 int out_pre_spaces = 0,
5766 sign = 0;
5767 char *numstr,
5768 *orgnum,
5769 *p;
5770
5771 NUM_TOCHAR_prepare;
5772
5773 if (IS_ROMAN(&Num))
5774 numstr = orgnum = int_to_roman((int) rint(value));
5775 else if (IS_EEEE(&Num))
5776 {
5777 if (isnan(value) || is_infinite(value))
5778 {
5779 /*
5780 * Allow 6 characters for the leading sign, the decimal point,
5781 * "e", the exponent's sign and two exponent digits.
5782 */
5783 numstr = (char *) palloc(Num.pre + Num.post + 7);
5784 fill_str(numstr, '#', Num.pre + Num.post + 6);
5785 *numstr = ' ';
5786 *(numstr + Num.pre + 1) = '.';
5787 }
5788 else
5789 {
5790 numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value);
5791
5792 /*
5793 * Swap a leading positive sign for a space.
5794 */
5795 if (*orgnum == '+')
5796 *orgnum = ' ';
5797
5798 numstr = orgnum;
5799 }
5800 }
5801 else
5802 {
5803 float8 val = value;
5804 int numstr_pre_len;
5805
5806 if (IS_MULTI(&Num))
5807 {
5808 double multi = pow((double) 10, (double) Num.multi);
5809
5810 val = value * multi;
5811 Num.pre += Num.multi;
5812 }
5813 orgnum = psprintf("%.0f", fabs(val));
5814 numstr_pre_len = strlen(orgnum);
5815
5816 /* adjust post digits to fit max double digits */
5817 if (numstr_pre_len >= DBL_DIG)
5818 Num.post = 0;
5819 else if (numstr_pre_len + Num.post > DBL_DIG)
5820 Num.post = DBL_DIG - numstr_pre_len;
5821 orgnum = psprintf("%.*f", Num.post, val);
5822
5823 if (*orgnum == '-')
5824 { /* < 0 */
5825 sign = '-';
5826 numstr = orgnum + 1;
5827 }
5828 else
5829 {
5830 sign = '+';
5831 numstr = orgnum;
5832 }
5833
5834 if ((p = strchr(numstr, '.')))
5835 numstr_pre_len = p - numstr;
5836 else
5837 numstr_pre_len = strlen(numstr);
5838
5839 /* needs padding? */
5840 if (numstr_pre_len < Num.pre)
5841 out_pre_spaces = Num.pre - numstr_pre_len;
5842 /* overflowed prefix digit format? */
5843 else if (numstr_pre_len > Num.pre)
5844 {
5845 numstr = (char *) palloc(Num.pre + Num.post + 2);
5846 fill_str(numstr, '#', Num.pre + Num.post + 1);
5847 *(numstr + Num.pre) = '.';
5848 }
5849 }
5850
5851 NUM_TOCHAR_finish;
5852 PG_RETURN_TEXT_P(result);
5853 }
5854