1 /* -----------------------------------------------------------------------
2 * formatting.c
3 *
4 * src/backend/utils/adt/formatting.c
5 *
6 *
7 * Portions Copyright (c) 1999-2021, PostgreSQL Global Development Group
8 *
9 *
10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11 *
12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14 *
15 *
16 * Cache & Memory:
17 * Routines use (itself) internal cache for format pictures.
18 *
19 * The cache uses a static buffer and is persistent across transactions. If
20 * the format-picture is bigger than the cache buffer, the parser is called
21 * always.
22 *
23 * NOTE for Number version:
24 * All in this version is implemented as keywords ( => not used
25 * suffixes), because a format picture is for *one* item (number)
26 * only. It not is as a timestamp version, where each keyword (can)
27 * has suffix.
28 *
29 * NOTE for Timestamp routines:
30 * In this module the POSIX 'struct tm' type is *not* used, but rather
31 * PgSQL type, which has tm_mon based on one (*non* zero) and
32 * year *not* based on 1900, but is used full year number.
33 * Module supports AD / BC / AM / PM.
34 *
35 * Supported types for to_char():
36 *
37 * Timestamp, Numeric, int4, int8, float4, float8
38 *
39 * Supported types for reverse conversion:
40 *
41 * Timestamp - to_timestamp()
42 * Date - to_date()
43 * Numeric - to_number()
44 *
45 *
46 * Karel Zak
47 *
48 * TODO
49 * - better number building (formatting) / parsing, now it isn't
50 * ideal code
51 * - use Assert()
52 * - add support for roman number to standard number conversion
53 * - add support for number spelling
54 * - add support for string to string formatting (we must be better
55 * than Oracle :-),
56 * to_char('Hello', 'X X X X X') -> 'H e l l o'
57 *
58 * -----------------------------------------------------------------------
59 */
60
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64
65 #include "postgres.h"
66
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72
73 /*
74 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75 * declare them in <wchar.h>, so include that too.
76 */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99
100 /* ----------
101 * Convenience macros for error handling
102 * ----------
103 *
104 * Two macros below help to handle errors in functions that take
105 * 'bool *have_error' argument. When this argument is not NULL, it's expected
106 * that function will suppress ereports when possible. Instead it should
107 * return some default value and set *have_error flag.
108 *
109 * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
110 * function argument is not NULL, then instead of ereport'ing we set
111 * *have_error flag and go to on_error label. It's supposed that jump
112 * resources will be freed and some 'default' value returned.
113 *
114 * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115 * It's supposed to be used for immediate exit from the function on error
116 * after call of another function with 'bool *have_error' argument.
117 */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120 if (have_error) \
121 { \
122 *have_error = true; \
123 goto on_error; \
124 } \
125 else \
126 { \
127 throw_error; \
128 } \
129 } while (0)
130
131 #define CHECK_ERROR \
132 do { \
133 if (have_error && *have_error) \
134 goto on_error; \
135 } while (0)
136
137 /* ----------
138 * Routines flags
139 * ----------
140 */
141 #define DCH_FLAG 0x1 /* DATE-TIME flag */
142 #define NUM_FLAG 0x2 /* NUMBER flag */
143 #define STD_FLAG 0x4 /* STANDARD flag */
144
145 /* ----------
146 * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147 * ----------
148 */
149 #define KeyWord_INDEX_SIZE ('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151
152 /* ----------
153 * Maximal length of one node
154 * ----------
155 */
156 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
157 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
158
159
160 /* ----------
161 * Format parser structs
162 * ----------
163 */
164 typedef struct
165 {
166 const char *name; /* suffix string */
167 int len, /* suffix length */
168 id, /* used in node->suffix */
169 type; /* prefix / postfix */
170 } KeySuffix;
171
172 /* ----------
173 * FromCharDateMode
174 * ----------
175 *
176 * This value is used to nominate one of several distinct (and mutually
177 * exclusive) date conventions that a keyword can belong to.
178 */
179 typedef enum
180 {
181 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
182 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
183 FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
184 } FromCharDateMode;
185
186 typedef struct
187 {
188 const char *name;
189 int len;
190 int id;
191 bool is_digit;
192 FromCharDateMode date_mode;
193 } KeyWord;
194
195 typedef struct
196 {
197 uint8 type; /* NODE_TYPE_XXX, see below */
198 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
199 uint8 suffix; /* keyword prefix/suffix code, if any */
200 const KeyWord *key; /* if type is ACTION */
201 } FormatNode;
202
203 #define NODE_TYPE_END 1
204 #define NODE_TYPE_ACTION 2
205 #define NODE_TYPE_CHAR 3
206 #define NODE_TYPE_SEPARATOR 4
207 #define NODE_TYPE_SPACE 5
208
209 #define SUFFTYPE_PREFIX 1
210 #define SUFFTYPE_POSTFIX 2
211
212 #define CLOCK_24_HOUR 0
213 #define CLOCK_12_HOUR 1
214
215
216 /* ----------
217 * Full months
218 * ----------
219 */
220 static const char *const months_full[] = {
221 "January", "February", "March", "April", "May", "June", "July",
222 "August", "September", "October", "November", "December", NULL
223 };
224
225 static const char *const days_short[] = {
226 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228
229 /* ----------
230 * AD / BC
231 * ----------
232 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
233 * positive and map year == -1 to year zero, and shift all negative
234 * years up one. For interval years, we just return the year.
235 */
236 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237
238 #define A_D_STR "A.D."
239 #define a_d_STR "a.d."
240 #define AD_STR "AD"
241 #define ad_STR "ad"
242
243 #define B_C_STR "B.C."
244 #define b_c_STR "b.c."
245 #define BC_STR "BC"
246 #define bc_STR "bc"
247
248 /*
249 * AD / BC strings for seq_search.
250 *
251 * These are given in two variants, a long form with periods and a standard
252 * form without.
253 *
254 * The array is laid out such that matches for AD have an even index, and
255 * matches for BC have an odd index. So the boolean value for BC is given by
256 * taking the array index of the match, modulo 2.
257 */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260
261 /* ----------
262 * AM / PM
263 * ----------
264 */
265 #define A_M_STR "A.M."
266 #define a_m_STR "a.m."
267 #define AM_STR "AM"
268 #define am_STR "am"
269
270 #define P_M_STR "P.M."
271 #define p_m_STR "p.m."
272 #define PM_STR "PM"
273 #define pm_STR "pm"
274
275 /*
276 * AM / PM strings for seq_search.
277 *
278 * These are given in two variants, a long form with periods and a standard
279 * form without.
280 *
281 * The array is laid out such that matches for AM have an even index, and
282 * matches for PM have an odd index. So the boolean value for PM is given by
283 * taking the array index of the match, modulo 2.
284 */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287
288 /* ----------
289 * Months in roman-numeral
290 * (Must be in reverse order for seq_search (in FROM_CHAR), because
291 * 'VIII' must have higher precedence than 'V')
292 * ----------
293 */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299
300 /* ----------
301 * Roman numbers
302 * ----------
303 */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307
308 /* ----------
309 * Ordinal postfixes
310 * ----------
311 */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314
315 /* ----------
316 * Flags & Options:
317 * ----------
318 */
319 #define TH_UPPER 1
320 #define TH_LOWER 2
321
322 /* ----------
323 * Number description struct
324 * ----------
325 */
326 typedef struct
327 {
328 int pre, /* (count) numbers before decimal */
329 post, /* (count) numbers after decimal */
330 lsign, /* want locales sign */
331 flag, /* number parameters */
332 pre_lsign_num, /* tmp value for lsign */
333 multi, /* multiplier for 'V' */
334 zero_start, /* position of first zero */
335 zero_end, /* position of last zero */
336 need_locale; /* needs it locale */
337 } NUMDesc;
338
339 /* ----------
340 * Flags for NUMBER version
341 * ----------
342 */
343 #define NUM_F_DECIMAL (1 << 1)
344 #define NUM_F_LDECIMAL (1 << 2)
345 #define NUM_F_ZERO (1 << 3)
346 #define NUM_F_BLANK (1 << 4)
347 #define NUM_F_FILLMODE (1 << 5)
348 #define NUM_F_LSIGN (1 << 6)
349 #define NUM_F_BRACKET (1 << 7)
350 #define NUM_F_MINUS (1 << 8)
351 #define NUM_F_PLUS (1 << 9)
352 #define NUM_F_ROMAN (1 << 10)
353 #define NUM_F_MULTI (1 << 11)
354 #define NUM_F_PLUS_POST (1 << 12)
355 #define NUM_F_MINUS_POST (1 << 13)
356 #define NUM_F_EEEE (1 << 14)
357
358 #define NUM_LSIGN_PRE (-1)
359 #define NUM_LSIGN_POST 1
360 #define NUM_LSIGN_NONE 0
361
362 /* ----------
363 * Tests
364 * ----------
365 */
366 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
378
379 /* ----------
380 * Format picture cache
381 *
382 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383 * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384 *
385 * For simplicity, the cache entries are fixed-size, so they allow for the
386 * worst case of a FormatNode for each byte in the picture string.
387 *
388 * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389 * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390 * we don't waste too much space by palloc'ing them individually. Be sure
391 * to adjust those macros if you add fields to those structs.
392 *
393 * The max number of entries in each cache is DCH_CACHE_ENTRIES
394 * resp. NUM_CACHE_ENTRIES.
395 * ----------
396 */
397 #define DCH_CACHE_OVERHEAD \
398 MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400 MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401
402 #define DCH_CACHE_SIZE \
403 ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405 ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406
407 #define DCH_CACHE_ENTRIES 20
408 #define NUM_CACHE_ENTRIES 20
409
410 typedef struct
411 {
412 FormatNode format[DCH_CACHE_SIZE + 1];
413 char str[DCH_CACHE_SIZE + 1];
414 bool std;
415 bool valid;
416 int age;
417 } DCHCacheEntry;
418
419 typedef struct
420 {
421 FormatNode format[NUM_CACHE_SIZE + 1];
422 char str[NUM_CACHE_SIZE + 1];
423 bool valid;
424 int age;
425 NUMDesc Num;
426 } NUMCacheEntry;
427
428 /* global cache for date/time format pictures */
429 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
430 static int n_DCHCache = 0; /* current number of entries */
431 static int DCHCounter = 0; /* aging-event counter */
432
433 /* global cache for number format pictures */
434 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
435 static int n_NUMCache = 0; /* current number of entries */
436 static int NUMCounter = 0; /* aging-event counter */
437
438 /* ----------
439 * For char->date/time conversion
440 * ----------
441 */
442 typedef struct
443 {
444 FromCharDateMode mode;
445 int hh,
446 pm,
447 mi,
448 ss,
449 ssss,
450 d, /* stored as 1-7, Sunday = 1, 0 means missing */
451 dd,
452 ddd,
453 mm,
454 ms,
455 year,
456 bc,
457 ww,
458 w,
459 cc,
460 j,
461 us,
462 yysz, /* is it YY or YYYY ? */
463 clock, /* 12 or 24 hour clock? */
464 tzsign, /* +1, -1 or 0 if timezone info is absent */
465 tzh,
466 tzm,
467 ff; /* fractional precision */
468 } TmFromChar;
469
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471
472 /* ----------
473 * Debug
474 * ----------
475 */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482 (_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485 (_X)->tm_sec, (_X)->tm_year,\
486 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492
493 /* ----------
494 * Datetime to char conversion
495 * ----------
496 */
497 typedef struct TmToChar
498 {
499 struct pg_tm tm; /* classic 'tm' struct */
500 fsec_t fsec; /* fractional seconds */
501 const char *tzn; /* timezone */
502 } TmToChar;
503
504 #define tmtcTm(_X) (&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X) ((_X)->fsec)
507
508 #define ZERO_tm(_X) \
509 do { \
510 (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511 (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512 (_X)->tm_mday = (_X)->tm_mon = 1; \
513 (_X)->tm_zone = NULL; \
514 } while(0)
515
516 #define ZERO_tmtc(_X) \
517 do { \
518 ZERO_tm( tmtcTm(_X) ); \
519 tmtcFsec(_X) = 0; \
520 tmtcTzn(_X) = NULL; \
521 } while(0)
522
523 /*
524 * to_char(time) appears to to_char() as an interval, so this check
525 * is really for interval and time data types.
526 */
527 #define INVALID_FOR_INTERVAL \
528 do { \
529 if (is_interval) \
530 ereport(ERROR, \
531 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532 errmsg("invalid format specification for an interval value"), \
533 errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535
536 /*****************************************************************************
537 * KeyWord definitions
538 *****************************************************************************/
539
540 /* ----------
541 * Suffixes (FormatNode.suffix is an OR of these codes)
542 * ----------
543 */
544 #define DCH_S_FM 0x01
545 #define DCH_S_TH 0x02
546 #define DCH_S_th 0x04
547 #define DCH_S_SP 0x08
548 #define DCH_S_TM 0x10
549
550 /* ----------
551 * Suffix tests
552 * ----------
553 */
554 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
563
564 /* ----------
565 * Suffixes definition for DATE-TIME TO/FROM CHAR
566 * ----------
567 */
568 #define TM_SUFFIX_LEN 2
569
570 static const KeySuffix DCH_suff[] = {
571 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578 /* last */
579 {NULL, 0, 0, 0}
580 };
581
582
583 /* ----------
584 * Format-pictures (KeyWord).
585 *
586 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587 * complicated -to-> easy:
588 *
589 * (example: "DDD","DD","Day","D" )
590 *
591 * (this specific sort needs the algorithm for sequential search for strings,
592 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593 * or "HH12"? You must first try "HH12", because "HH" is in string, but
594 * it is not good.
595 *
596 * (!)
597 * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598 * (!)
599 *
600 * For fast search is used the 'int index[]', index is ascii table from position
601 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602 * position or -1 if char is not used in the KeyWord. Search example for
603 * string "MM":
604 * 1) see in index to index['M' - 32],
605 * 2) take keywords position (enum DCH_MI) from index
606 * 3) run sequential search in keywords[] from this position
607 *
608 * ----------
609 */
610
611 typedef enum
612 {
613 DCH_A_D,
614 DCH_A_M,
615 DCH_AD,
616 DCH_AM,
617 DCH_B_C,
618 DCH_BC,
619 DCH_CC,
620 DCH_DAY,
621 DCH_DDD,
622 DCH_DD,
623 DCH_DY,
624 DCH_Day,
625 DCH_Dy,
626 DCH_D,
627 DCH_FF1,
628 DCH_FF2,
629 DCH_FF3,
630 DCH_FF4,
631 DCH_FF5,
632 DCH_FF6,
633 DCH_FX, /* global suffix */
634 DCH_HH24,
635 DCH_HH12,
636 DCH_HH,
637 DCH_IDDD,
638 DCH_ID,
639 DCH_IW,
640 DCH_IYYY,
641 DCH_IYY,
642 DCH_IY,
643 DCH_I,
644 DCH_J,
645 DCH_MI,
646 DCH_MM,
647 DCH_MONTH,
648 DCH_MON,
649 DCH_MS,
650 DCH_Month,
651 DCH_Mon,
652 DCH_OF,
653 DCH_P_M,
654 DCH_PM,
655 DCH_Q,
656 DCH_RM,
657 DCH_SSSSS,
658 DCH_SSSS,
659 DCH_SS,
660 DCH_TZH,
661 DCH_TZM,
662 DCH_TZ,
663 DCH_US,
664 DCH_WW,
665 DCH_W,
666 DCH_Y_YYY,
667 DCH_YYYY,
668 DCH_YYY,
669 DCH_YY,
670 DCH_Y,
671 DCH_a_d,
672 DCH_a_m,
673 DCH_ad,
674 DCH_am,
675 DCH_b_c,
676 DCH_bc,
677 DCH_cc,
678 DCH_day,
679 DCH_ddd,
680 DCH_dd,
681 DCH_dy,
682 DCH_d,
683 DCH_ff1,
684 DCH_ff2,
685 DCH_ff3,
686 DCH_ff4,
687 DCH_ff5,
688 DCH_ff6,
689 DCH_fx,
690 DCH_hh24,
691 DCH_hh12,
692 DCH_hh,
693 DCH_iddd,
694 DCH_id,
695 DCH_iw,
696 DCH_iyyy,
697 DCH_iyy,
698 DCH_iy,
699 DCH_i,
700 DCH_j,
701 DCH_mi,
702 DCH_mm,
703 DCH_month,
704 DCH_mon,
705 DCH_ms,
706 DCH_p_m,
707 DCH_pm,
708 DCH_q,
709 DCH_rm,
710 DCH_sssss,
711 DCH_ssss,
712 DCH_ss,
713 DCH_tz,
714 DCH_us,
715 DCH_ww,
716 DCH_w,
717 DCH_y_yyy,
718 DCH_yyyy,
719 DCH_yyy,
720 DCH_yy,
721 DCH_y,
722
723 /* last */
724 _DCH_last_
725 } DCH_poz;
726
727 typedef enum
728 {
729 NUM_COMMA,
730 NUM_DEC,
731 NUM_0,
732 NUM_9,
733 NUM_B,
734 NUM_C,
735 NUM_D,
736 NUM_E,
737 NUM_FM,
738 NUM_G,
739 NUM_L,
740 NUM_MI,
741 NUM_PL,
742 NUM_PR,
743 NUM_RN,
744 NUM_SG,
745 NUM_SP,
746 NUM_S,
747 NUM_TH,
748 NUM_V,
749 NUM_b,
750 NUM_c,
751 NUM_d,
752 NUM_e,
753 NUM_fm,
754 NUM_g,
755 NUM_l,
756 NUM_mi,
757 NUM_pl,
758 NUM_pr,
759 NUM_rn,
760 NUM_sg,
761 NUM_sp,
762 NUM_s,
763 NUM_th,
764 NUM_v,
765
766 /* last */
767 _NUM_last_
768 } NUM_poz;
769
770 /* ----------
771 * KeyWords for DATE-TIME version
772 * ----------
773 */
774 static const KeyWord DCH_keywords[] = {
775 /* name, len, id, is_digit, date_mode */
776 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
777 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
781 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
783 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
784 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790 {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
791 {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792 {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793 {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794 {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795 {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
798 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
801 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
809 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
816 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
817 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820 {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
821 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
824 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
827 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
828 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
830 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
835 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
839 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
841 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
842 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846 {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
847 {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848 {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849 {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850 {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851 {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
854 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
857 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
865 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
870 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873 {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
874 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
877 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
878 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
879 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
881 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885
886 /* last */
887 {NULL, 0, 0, 0, 0}
888 };
889
890 /* ----------
891 * KeyWords for NUMBER version
892 *
893 * The is_digit and date_mode fields are not relevant here.
894 * ----------
895 */
896 static const KeyWord NUM_keywords[] = {
897 /* name, len, id is in Index */
898 {",", 1, NUM_COMMA}, /* , */
899 {".", 1, NUM_DEC}, /* . */
900 {"0", 1, NUM_0}, /* 0 */
901 {"9", 1, NUM_9}, /* 9 */
902 {"B", 1, NUM_B}, /* B */
903 {"C", 1, NUM_C}, /* C */
904 {"D", 1, NUM_D}, /* D */
905 {"EEEE", 4, NUM_E}, /* E */
906 {"FM", 2, NUM_FM}, /* F */
907 {"G", 1, NUM_G}, /* G */
908 {"L", 1, NUM_L}, /* L */
909 {"MI", 2, NUM_MI}, /* M */
910 {"PL", 2, NUM_PL}, /* P */
911 {"PR", 2, NUM_PR},
912 {"RN", 2, NUM_RN}, /* R */
913 {"SG", 2, NUM_SG}, /* S */
914 {"SP", 2, NUM_SP},
915 {"S", 1, NUM_S},
916 {"TH", 2, NUM_TH}, /* T */
917 {"V", 1, NUM_V}, /* V */
918 {"b", 1, NUM_B}, /* b */
919 {"c", 1, NUM_C}, /* c */
920 {"d", 1, NUM_D}, /* d */
921 {"eeee", 4, NUM_E}, /* e */
922 {"fm", 2, NUM_FM}, /* f */
923 {"g", 1, NUM_G}, /* g */
924 {"l", 1, NUM_L}, /* l */
925 {"mi", 2, NUM_MI}, /* m */
926 {"pl", 2, NUM_PL}, /* p */
927 {"pr", 2, NUM_PR},
928 {"rn", 2, NUM_rn}, /* r */
929 {"sg", 2, NUM_SG}, /* s */
930 {"sp", 2, NUM_SP},
931 {"s", 1, NUM_S},
932 {"th", 2, NUM_th}, /* t */
933 {"v", 1, NUM_V}, /* v */
934
935 /* last */
936 {NULL, 0, 0}
937 };
938
939
940 /* ----------
941 * KeyWords index for DATE-TIME version
942 * ----------
943 */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0 1 2 3 4 5 6 7 8 9
947 */
948 /*---- first 0..31 chars are skipped ----*/
949
950 -1, -1, -1, -1, -1, -1, -1, -1,
951 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954 DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
955 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
956 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957 DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958 -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959 -1, DCH_y_yyy, -1, -1, -1, -1
960
961 /*---- chars over 126 are skipped ----*/
962 };
963
964 /* ----------
965 * KeyWords index for NUMBER version
966 * ----------
967 */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0 1 2 3 4 5 6 7 8 9
971 */
972 /*---- first 0..31 chars are skipped ----*/
973
974 -1, -1, -1, -1, -1, -1, -1, -1,
975 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983 -1, -1, -1, -1, -1, -1
984
985 /*---- chars over 126 are skipped ----*/
986 };
987
988 /* ----------
989 * Number processor struct
990 * ----------
991 */
992 typedef struct NUMProc
993 {
994 bool is_to_char;
995 NUMDesc *Num; /* number description */
996
997 int sign, /* '-' or '+' */
998 sign_wrote, /* was sign write */
999 num_count, /* number of write digits */
1000 num_in, /* is inside number */
1001 num_curr, /* current position in number */
1002 out_pre_spaces, /* spaces before first digit */
1003
1004 read_dec, /* to_number - was read dec. point */
1005 read_post, /* to_number - number of dec. digit */
1006 read_pre; /* to_number - number non-dec. digit */
1007
1008 char *number, /* string with number */
1009 *number_p, /* pointer to current number position */
1010 *inout, /* in / out buffer */
1011 *inout_p, /* pointer to current inout position */
1012 *last_relevant, /* last relevant number after decimal point */
1013
1014 *L_negative_sign, /* Locale */
1015 *L_positive_sign,
1016 *decimal,
1017 *L_thousands_sep,
1018 *L_currency_symbol;
1019 } NUMProc;
1020
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED 0x01
1023 #define DCH_TIMED 0x02
1024 #define DCH_ZONED 0x04
1025
1026 /* ----------
1027 * Functions
1028 * ----------
1029 */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031 const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039 TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041 Oid collid, bool std, bool *have_error);
1042
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int adjust_partial_year_to_2020(int year);
1051 static int strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053 bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055 bool *have_error);
1056 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1057 FormatNode *node, bool *have_error);
1058 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059 bool *have_error);
1060 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int seq_search_localized(const char *name, char **array, int *len,
1062 Oid collid);
1063 static int from_char_seq_search(int *dest, const char **src,
1064 const char *const *array,
1065 char **localized_array, Oid collid,
1066 FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068 struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069 uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078 char *number, int input_len, int to_char_out_pre_spaces,
1079 int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086
1087
1088 /* ----------
1089 * Fast sequential search, use index for data selection which
1090 * go to seq. cycle (it is very fast for unwanted strings)
1091 * (can't be used binary search in format parsing)
1092 * ----------
1093 */
1094 static const KeyWord *
index_seq_search(const char * str,const KeyWord * kw,const int * index)1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097 int poz;
1098
1099 if (!KeyWord_INDEX_FILTER(*str))
1100 return NULL;
1101
1102 if ((poz = *(index + (*str - ' '))) > -1)
1103 {
1104 const KeyWord *k = kw + poz;
1105
1106 do
1107 {
1108 if (strncmp(str, k->name, k->len) == 0)
1109 return k;
1110 k++;
1111 if (!k->name)
1112 return NULL;
1113 } while (*str == *k->name);
1114 }
1115 return NULL;
1116 }
1117
1118 static const KeySuffix *
suff_search(const char * str,const KeySuffix * suf,int type)1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121 const KeySuffix *s;
1122
1123 for (s = suf; s->name != NULL; s++)
1124 {
1125 if (s->type != type)
1126 continue;
1127
1128 if (strncmp(str, s->name, s->len) == 0)
1129 return s;
1130 }
1131 return NULL;
1132 }
1133
1134 static bool
is_separator_char(const char * str)1135 is_separator_char(const char *str)
1136 {
1137 /* ASCII printable character, but not letter or digit */
1138 return (*str > 0x20 && *str < 0x7F &&
1139 !(*str >= 'A' && *str <= 'Z') &&
1140 !(*str >= 'a' && *str <= 'z') &&
1141 !(*str >= '0' && *str <= '9'));
1142 }
1143
1144 /* ----------
1145 * Prepare NUMDesc (number description struct) via FormatNode struct
1146 * ----------
1147 */
1148 static void
NUMDesc_prepare(NUMDesc * num,FormatNode * n)1149 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1150 {
1151 if (n->type != NODE_TYPE_ACTION)
1152 return;
1153
1154 if (IS_EEEE(num) && n->key->id != NUM_E)
1155 ereport(ERROR,
1156 (errcode(ERRCODE_SYNTAX_ERROR),
1157 errmsg("\"EEEE\" must be the last pattern used")));
1158
1159 switch (n->key->id)
1160 {
1161 case NUM_9:
1162 if (IS_BRACKET(num))
1163 ereport(ERROR,
1164 (errcode(ERRCODE_SYNTAX_ERROR),
1165 errmsg("\"9\" must be ahead of \"PR\"")));
1166 if (IS_MULTI(num))
1167 {
1168 ++num->multi;
1169 break;
1170 }
1171 if (IS_DECIMAL(num))
1172 ++num->post;
1173 else
1174 ++num->pre;
1175 break;
1176
1177 case NUM_0:
1178 if (IS_BRACKET(num))
1179 ereport(ERROR,
1180 (errcode(ERRCODE_SYNTAX_ERROR),
1181 errmsg("\"0\" must be ahead of \"PR\"")));
1182 if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183 {
1184 num->flag |= NUM_F_ZERO;
1185 num->zero_start = num->pre + 1;
1186 }
1187 if (!IS_DECIMAL(num))
1188 ++num->pre;
1189 else
1190 ++num->post;
1191
1192 num->zero_end = num->pre + num->post;
1193 break;
1194
1195 case NUM_B:
1196 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197 num->flag |= NUM_F_BLANK;
1198 break;
1199
1200 case NUM_D:
1201 num->flag |= NUM_F_LDECIMAL;
1202 num->need_locale = true;
1203 /* FALLTHROUGH */
1204 case NUM_DEC:
1205 if (IS_DECIMAL(num))
1206 ereport(ERROR,
1207 (errcode(ERRCODE_SYNTAX_ERROR),
1208 errmsg("multiple decimal points")));
1209 if (IS_MULTI(num))
1210 ereport(ERROR,
1211 (errcode(ERRCODE_SYNTAX_ERROR),
1212 errmsg("cannot use \"V\" and decimal point together")));
1213 num->flag |= NUM_F_DECIMAL;
1214 break;
1215
1216 case NUM_FM:
1217 num->flag |= NUM_F_FILLMODE;
1218 break;
1219
1220 case NUM_S:
1221 if (IS_LSIGN(num))
1222 ereport(ERROR,
1223 (errcode(ERRCODE_SYNTAX_ERROR),
1224 errmsg("cannot use \"S\" twice")));
1225 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226 ereport(ERROR,
1227 (errcode(ERRCODE_SYNTAX_ERROR),
1228 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229 if (!IS_DECIMAL(num))
1230 {
1231 num->lsign = NUM_LSIGN_PRE;
1232 num->pre_lsign_num = num->pre;
1233 num->need_locale = true;
1234 num->flag |= NUM_F_LSIGN;
1235 }
1236 else if (num->lsign == NUM_LSIGN_NONE)
1237 {
1238 num->lsign = NUM_LSIGN_POST;
1239 num->need_locale = true;
1240 num->flag |= NUM_F_LSIGN;
1241 }
1242 break;
1243
1244 case NUM_MI:
1245 if (IS_LSIGN(num))
1246 ereport(ERROR,
1247 (errcode(ERRCODE_SYNTAX_ERROR),
1248 errmsg("cannot use \"S\" and \"MI\" together")));
1249 num->flag |= NUM_F_MINUS;
1250 if (IS_DECIMAL(num))
1251 num->flag |= NUM_F_MINUS_POST;
1252 break;
1253
1254 case NUM_PL:
1255 if (IS_LSIGN(num))
1256 ereport(ERROR,
1257 (errcode(ERRCODE_SYNTAX_ERROR),
1258 errmsg("cannot use \"S\" and \"PL\" together")));
1259 num->flag |= NUM_F_PLUS;
1260 if (IS_DECIMAL(num))
1261 num->flag |= NUM_F_PLUS_POST;
1262 break;
1263
1264 case NUM_SG:
1265 if (IS_LSIGN(num))
1266 ereport(ERROR,
1267 (errcode(ERRCODE_SYNTAX_ERROR),
1268 errmsg("cannot use \"S\" and \"SG\" together")));
1269 num->flag |= NUM_F_MINUS;
1270 num->flag |= NUM_F_PLUS;
1271 break;
1272
1273 case NUM_PR:
1274 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275 ereport(ERROR,
1276 (errcode(ERRCODE_SYNTAX_ERROR),
1277 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278 num->flag |= NUM_F_BRACKET;
1279 break;
1280
1281 case NUM_rn:
1282 case NUM_RN:
1283 num->flag |= NUM_F_ROMAN;
1284 break;
1285
1286 case NUM_L:
1287 case NUM_G:
1288 num->need_locale = true;
1289 break;
1290
1291 case NUM_V:
1292 if (IS_DECIMAL(num))
1293 ereport(ERROR,
1294 (errcode(ERRCODE_SYNTAX_ERROR),
1295 errmsg("cannot use \"V\" and decimal point together")));
1296 num->flag |= NUM_F_MULTI;
1297 break;
1298
1299 case NUM_E:
1300 if (IS_EEEE(num))
1301 ereport(ERROR,
1302 (errcode(ERRCODE_SYNTAX_ERROR),
1303 errmsg("cannot use \"EEEE\" twice")));
1304 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306 IS_ROMAN(num) || IS_MULTI(num))
1307 ereport(ERROR,
1308 (errcode(ERRCODE_SYNTAX_ERROR),
1309 errmsg("\"EEEE\" is incompatible with other formats"),
1310 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311 num->flag |= NUM_F_EEEE;
1312 break;
1313 }
1314 }
1315
1316 /* ----------
1317 * Format parser, search small keywords and keyword's suffixes, and make
1318 * format-node tree.
1319 *
1320 * for DATE-TIME & NUMBER version
1321 * ----------
1322 */
1323 static void
parse_format(FormatNode * node,const char * str,const KeyWord * kw,const KeySuffix * suf,const int * index,uint32 flags,NUMDesc * Num)1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327 FormatNode *n;
1328
1329 #ifdef DEBUG_TO_FROM_CHAR
1330 elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332
1333 n = node;
1334
1335 while (*str)
1336 {
1337 int suffix = 0;
1338 const KeySuffix *s;
1339
1340 /*
1341 * Prefix
1342 */
1343 if ((flags & DCH_FLAG) &&
1344 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345 {
1346 suffix |= s->id;
1347 if (s->len)
1348 str += s->len;
1349 }
1350
1351 /*
1352 * Keyword
1353 */
1354 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355 {
1356 n->type = NODE_TYPE_ACTION;
1357 n->suffix = suffix;
1358 if (n->key->len)
1359 str += n->key->len;
1360
1361 /*
1362 * NUM version: Prepare global NUMDesc struct
1363 */
1364 if (flags & NUM_FLAG)
1365 NUMDesc_prepare(Num, n);
1366
1367 /*
1368 * Postfix
1369 */
1370 if ((flags & DCH_FLAG) && *str &&
1371 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372 {
1373 n->suffix |= s->id;
1374 if (s->len)
1375 str += s->len;
1376 }
1377
1378 n++;
1379 }
1380 else if (*str)
1381 {
1382 int chlen;
1383
1384 if ((flags & STD_FLAG) && *str != '"')
1385 {
1386 /*
1387 * Standard mode, allow only following separators: "-./,':; ".
1388 * However, we support double quotes even in standard mode
1389 * (see below). This is our extension of standard mode.
1390 */
1391 if (strchr("-./,':; ", *str) == NULL)
1392 ereport(ERROR,
1393 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1394 errmsg("invalid datetime format separator: \"%s\"",
1395 pnstrdup(str, pg_mblen(str)))));
1396
1397 if (*str == ' ')
1398 n->type = NODE_TYPE_SPACE;
1399 else
1400 n->type = NODE_TYPE_SEPARATOR;
1401
1402 n->character[0] = *str;
1403 n->character[1] = '\0';
1404 n->key = NULL;
1405 n->suffix = 0;
1406 n++;
1407 str++;
1408 }
1409 else if (*str == '"')
1410 {
1411 /*
1412 * Process double-quoted literal string, if any
1413 */
1414 str++;
1415 while (*str)
1416 {
1417 if (*str == '"')
1418 {
1419 str++;
1420 break;
1421 }
1422 /* backslash quotes the next character, if any */
1423 if (*str == '\\' && *(str + 1))
1424 str++;
1425 chlen = pg_mblen(str);
1426 n->type = NODE_TYPE_CHAR;
1427 memcpy(n->character, str, chlen);
1428 n->character[chlen] = '\0';
1429 n->key = NULL;
1430 n->suffix = 0;
1431 n++;
1432 str += chlen;
1433 }
1434 }
1435 else
1436 {
1437 /*
1438 * Outside double-quoted strings, backslash is only special if
1439 * it immediately precedes a double quote.
1440 */
1441 if (*str == '\\' && *(str + 1) == '"')
1442 str++;
1443 chlen = pg_mblen(str);
1444
1445 if ((flags & DCH_FLAG) && is_separator_char(str))
1446 n->type = NODE_TYPE_SEPARATOR;
1447 else if (isspace((unsigned char) *str))
1448 n->type = NODE_TYPE_SPACE;
1449 else
1450 n->type = NODE_TYPE_CHAR;
1451
1452 memcpy(n->character, str, chlen);
1453 n->character[chlen] = '\0';
1454 n->key = NULL;
1455 n->suffix = 0;
1456 n++;
1457 str += chlen;
1458 }
1459 }
1460 }
1461
1462 n->type = NODE_TYPE_END;
1463 n->suffix = 0;
1464 }
1465
1466 /* ----------
1467 * DEBUG: Dump the FormatNode Tree (debug)
1468 * ----------
1469 */
1470 #ifdef DEBUG_TO_FROM_CHAR
1471
1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1473 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1474
1475 static void
dump_node(FormatNode * node,int max)1476 dump_node(FormatNode *node, int max)
1477 {
1478 FormatNode *n;
1479 int a;
1480
1481 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1482
1483 for (a = 0, n = node; a <= max; n++, a++)
1484 {
1485 if (n->type == NODE_TYPE_ACTION)
1486 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1487 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1488 else if (n->type == NODE_TYPE_CHAR)
1489 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1490 a, n->character);
1491 else if (n->type == NODE_TYPE_END)
1492 {
1493 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1494 return;
1495 }
1496 else
1497 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1498 }
1499 }
1500 #endif /* DEBUG */
1501
1502 /*****************************************************************************
1503 * Private utils
1504 *****************************************************************************/
1505
1506 /* ----------
1507 * Return ST/ND/RD/TH for simple (1..9) numbers
1508 * type --> 0 upper, 1 lower
1509 * ----------
1510 */
1511 static const char *
get_th(char * num,int type)1512 get_th(char *num, int type)
1513 {
1514 int len = strlen(num),
1515 last;
1516
1517 last = *(num + (len - 1));
1518 if (!isdigit((unsigned char) last))
1519 ereport(ERROR,
1520 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1521 errmsg("\"%s\" is not a number", num)));
1522
1523 /*
1524 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1525 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1526 */
1527 if ((len > 1) && (num[len - 2] == '1'))
1528 last = 0;
1529
1530 switch (last)
1531 {
1532 case '1':
1533 if (type == TH_UPPER)
1534 return numTH[0];
1535 return numth[0];
1536 case '2':
1537 if (type == TH_UPPER)
1538 return numTH[1];
1539 return numth[1];
1540 case '3':
1541 if (type == TH_UPPER)
1542 return numTH[2];
1543 return numth[2];
1544 default:
1545 if (type == TH_UPPER)
1546 return numTH[3];
1547 return numth[3];
1548 }
1549 }
1550
1551 /* ----------
1552 * Convert string-number to ordinal string-number
1553 * type --> 0 upper, 1 lower
1554 * ----------
1555 */
1556 static char *
str_numth(char * dest,char * num,int type)1557 str_numth(char *dest, char *num, int type)
1558 {
1559 if (dest != num)
1560 strcpy(dest, num);
1561 strcat(dest, get_th(num, type));
1562 return dest;
1563 }
1564
1565 /*****************************************************************************
1566 * upper/lower/initcap functions
1567 *****************************************************************************/
1568
1569 #ifdef USE_ICU
1570
1571 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1572 const UChar *src, int32_t srcLength,
1573 const char *locale,
1574 UErrorCode *pErrorCode);
1575
1576 static int32_t
icu_convert_case(ICU_Convert_Func func,pg_locale_t mylocale,UChar ** buff_dest,UChar * buff_source,int32_t len_source)1577 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1578 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1579 {
1580 UErrorCode status;
1581 int32_t len_dest;
1582
1583 len_dest = len_source; /* try first with same length */
1584 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1585 status = U_ZERO_ERROR;
1586 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1587 mylocale->info.icu.locale, &status);
1588 if (status == U_BUFFER_OVERFLOW_ERROR)
1589 {
1590 /* try again with adjusted length */
1591 pfree(*buff_dest);
1592 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1593 status = U_ZERO_ERROR;
1594 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1595 mylocale->info.icu.locale, &status);
1596 }
1597 if (U_FAILURE(status))
1598 ereport(ERROR,
1599 (errmsg("case conversion failed: %s", u_errorName(status))));
1600 return len_dest;
1601 }
1602
1603 static int32_t
u_strToTitle_default_BI(UChar * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,const char * locale,UErrorCode * pErrorCode)1604 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1605 const UChar *src, int32_t srcLength,
1606 const char *locale,
1607 UErrorCode *pErrorCode)
1608 {
1609 return u_strToTitle(dest, destCapacity, src, srcLength,
1610 NULL, locale, pErrorCode);
1611 }
1612
1613 #endif /* USE_ICU */
1614
1615 /*
1616 * If the system provides the needed functions for wide-character manipulation
1617 * (which are all standardized by C99), then we implement upper/lower/initcap
1618 * using wide-character functions, if necessary. Otherwise we use the
1619 * traditional <ctype.h> functions, which of course will not work as desired
1620 * in multibyte character sets. Note that in either case we are effectively
1621 * assuming that the database character encoding matches the encoding implied
1622 * by LC_CTYPE.
1623 *
1624 * If the system provides locale_t and associated functions (which are
1625 * standardized by Open Group's XBD), we can support collations that are
1626 * neither default nor C. The code is written to handle both combinations
1627 * of have-wide-characters and have-locale_t, though it's rather unlikely
1628 * a platform would have the latter without the former.
1629 */
1630
1631 /*
1632 * collation-aware, wide-character-aware lower function
1633 *
1634 * We pass the number of bytes so we can pass varlena and char*
1635 * to this function. The result is a palloc'd, null-terminated string.
1636 */
1637 char *
str_tolower(const char * buff,size_t nbytes,Oid collid)1638 str_tolower(const char *buff, size_t nbytes, Oid collid)
1639 {
1640 char *result;
1641
1642 if (!buff)
1643 return NULL;
1644
1645 /* C/POSIX collations use this path regardless of database encoding */
1646 if (lc_ctype_is_c(collid))
1647 {
1648 result = asc_tolower(buff, nbytes);
1649 }
1650 else
1651 {
1652 pg_locale_t mylocale = 0;
1653
1654 if (collid != DEFAULT_COLLATION_OID)
1655 {
1656 if (!OidIsValid(collid))
1657 {
1658 /*
1659 * This typically means that the parser could not resolve a
1660 * conflict of implicit collations, so report it that way.
1661 */
1662 ereport(ERROR,
1663 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1664 errmsg("could not determine which collation to use for %s function",
1665 "lower()"),
1666 errhint("Use the COLLATE clause to set the collation explicitly.")));
1667 }
1668 mylocale = pg_newlocale_from_collation(collid);
1669 }
1670
1671 #ifdef USE_ICU
1672 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1673 {
1674 int32_t len_uchar;
1675 int32_t len_conv;
1676 UChar *buff_uchar;
1677 UChar *buff_conv;
1678
1679 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1680 len_conv = icu_convert_case(u_strToLower, mylocale,
1681 &buff_conv, buff_uchar, len_uchar);
1682 icu_from_uchar(&result, buff_conv, len_conv);
1683 pfree(buff_uchar);
1684 pfree(buff_conv);
1685 }
1686 else
1687 #endif
1688 {
1689 if (pg_database_encoding_max_length() > 1)
1690 {
1691 wchar_t *workspace;
1692 size_t curr_char;
1693 size_t result_size;
1694
1695 /* Overflow paranoia */
1696 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1697 ereport(ERROR,
1698 (errcode(ERRCODE_OUT_OF_MEMORY),
1699 errmsg("out of memory")));
1700
1701 /* Output workspace cannot have more codes than input bytes */
1702 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1703
1704 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1705
1706 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1707 {
1708 #ifdef HAVE_LOCALE_T
1709 if (mylocale)
1710 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1711 else
1712 #endif
1713 workspace[curr_char] = towlower(workspace[curr_char]);
1714 }
1715
1716 /*
1717 * Make result large enough; case change might change number
1718 * of bytes
1719 */
1720 result_size = curr_char * pg_database_encoding_max_length() + 1;
1721 result = palloc(result_size);
1722
1723 wchar2char(result, workspace, result_size, mylocale);
1724 pfree(workspace);
1725 }
1726 else
1727 {
1728 char *p;
1729
1730 result = pnstrdup(buff, nbytes);
1731
1732 /*
1733 * Note: we assume that tolower_l() will not be so broken as
1734 * to need an isupper_l() guard test. When using the default
1735 * collation, we apply the traditional Postgres behavior that
1736 * forces ASCII-style treatment of I/i, but in non-default
1737 * collations you get exactly what the collation says.
1738 */
1739 for (p = result; *p; p++)
1740 {
1741 #ifdef HAVE_LOCALE_T
1742 if (mylocale)
1743 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1744 else
1745 #endif
1746 *p = pg_tolower((unsigned char) *p);
1747 }
1748 }
1749 }
1750 }
1751
1752 return result;
1753 }
1754
1755 /*
1756 * collation-aware, wide-character-aware upper function
1757 *
1758 * We pass the number of bytes so we can pass varlena and char*
1759 * to this function. The result is a palloc'd, null-terminated string.
1760 */
1761 char *
str_toupper(const char * buff,size_t nbytes,Oid collid)1762 str_toupper(const char *buff, size_t nbytes, Oid collid)
1763 {
1764 char *result;
1765
1766 if (!buff)
1767 return NULL;
1768
1769 /* C/POSIX collations use this path regardless of database encoding */
1770 if (lc_ctype_is_c(collid))
1771 {
1772 result = asc_toupper(buff, nbytes);
1773 }
1774 else
1775 {
1776 pg_locale_t mylocale = 0;
1777
1778 if (collid != DEFAULT_COLLATION_OID)
1779 {
1780 if (!OidIsValid(collid))
1781 {
1782 /*
1783 * This typically means that the parser could not resolve a
1784 * conflict of implicit collations, so report it that way.
1785 */
1786 ereport(ERROR,
1787 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1788 errmsg("could not determine which collation to use for %s function",
1789 "upper()"),
1790 errhint("Use the COLLATE clause to set the collation explicitly.")));
1791 }
1792 mylocale = pg_newlocale_from_collation(collid);
1793 }
1794
1795 #ifdef USE_ICU
1796 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1797 {
1798 int32_t len_uchar,
1799 len_conv;
1800 UChar *buff_uchar;
1801 UChar *buff_conv;
1802
1803 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1804 len_conv = icu_convert_case(u_strToUpper, mylocale,
1805 &buff_conv, buff_uchar, len_uchar);
1806 icu_from_uchar(&result, buff_conv, len_conv);
1807 pfree(buff_uchar);
1808 pfree(buff_conv);
1809 }
1810 else
1811 #endif
1812 {
1813 if (pg_database_encoding_max_length() > 1)
1814 {
1815 wchar_t *workspace;
1816 size_t curr_char;
1817 size_t result_size;
1818
1819 /* Overflow paranoia */
1820 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1821 ereport(ERROR,
1822 (errcode(ERRCODE_OUT_OF_MEMORY),
1823 errmsg("out of memory")));
1824
1825 /* Output workspace cannot have more codes than input bytes */
1826 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1827
1828 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1829
1830 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1831 {
1832 #ifdef HAVE_LOCALE_T
1833 if (mylocale)
1834 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1835 else
1836 #endif
1837 workspace[curr_char] = towupper(workspace[curr_char]);
1838 }
1839
1840 /*
1841 * Make result large enough; case change might change number
1842 * of bytes
1843 */
1844 result_size = curr_char * pg_database_encoding_max_length() + 1;
1845 result = palloc(result_size);
1846
1847 wchar2char(result, workspace, result_size, mylocale);
1848 pfree(workspace);
1849 }
1850 else
1851 {
1852 char *p;
1853
1854 result = pnstrdup(buff, nbytes);
1855
1856 /*
1857 * Note: we assume that toupper_l() will not be so broken as
1858 * to need an islower_l() guard test. When using the default
1859 * collation, we apply the traditional Postgres behavior that
1860 * forces ASCII-style treatment of I/i, but in non-default
1861 * collations you get exactly what the collation says.
1862 */
1863 for (p = result; *p; p++)
1864 {
1865 #ifdef HAVE_LOCALE_T
1866 if (mylocale)
1867 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1868 else
1869 #endif
1870 *p = pg_toupper((unsigned char) *p);
1871 }
1872 }
1873 }
1874 }
1875
1876 return result;
1877 }
1878
1879 /*
1880 * collation-aware, wide-character-aware initcap function
1881 *
1882 * We pass the number of bytes so we can pass varlena and char*
1883 * to this function. The result is a palloc'd, null-terminated string.
1884 */
1885 char *
str_initcap(const char * buff,size_t nbytes,Oid collid)1886 str_initcap(const char *buff, size_t nbytes, Oid collid)
1887 {
1888 char *result;
1889 int wasalnum = false;
1890
1891 if (!buff)
1892 return NULL;
1893
1894 /* C/POSIX collations use this path regardless of database encoding */
1895 if (lc_ctype_is_c(collid))
1896 {
1897 result = asc_initcap(buff, nbytes);
1898 }
1899 else
1900 {
1901 pg_locale_t mylocale = 0;
1902
1903 if (collid != DEFAULT_COLLATION_OID)
1904 {
1905 if (!OidIsValid(collid))
1906 {
1907 /*
1908 * This typically means that the parser could not resolve a
1909 * conflict of implicit collations, so report it that way.
1910 */
1911 ereport(ERROR,
1912 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1913 errmsg("could not determine which collation to use for %s function",
1914 "initcap()"),
1915 errhint("Use the COLLATE clause to set the collation explicitly.")));
1916 }
1917 mylocale = pg_newlocale_from_collation(collid);
1918 }
1919
1920 #ifdef USE_ICU
1921 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1922 {
1923 int32_t len_uchar,
1924 len_conv;
1925 UChar *buff_uchar;
1926 UChar *buff_conv;
1927
1928 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1929 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1930 &buff_conv, buff_uchar, len_uchar);
1931 icu_from_uchar(&result, buff_conv, len_conv);
1932 pfree(buff_uchar);
1933 pfree(buff_conv);
1934 }
1935 else
1936 #endif
1937 {
1938 if (pg_database_encoding_max_length() > 1)
1939 {
1940 wchar_t *workspace;
1941 size_t curr_char;
1942 size_t result_size;
1943
1944 /* Overflow paranoia */
1945 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1946 ereport(ERROR,
1947 (errcode(ERRCODE_OUT_OF_MEMORY),
1948 errmsg("out of memory")));
1949
1950 /* Output workspace cannot have more codes than input bytes */
1951 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1952
1953 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1954
1955 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1956 {
1957 #ifdef HAVE_LOCALE_T
1958 if (mylocale)
1959 {
1960 if (wasalnum)
1961 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1962 else
1963 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1964 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1965 }
1966 else
1967 #endif
1968 {
1969 if (wasalnum)
1970 workspace[curr_char] = towlower(workspace[curr_char]);
1971 else
1972 workspace[curr_char] = towupper(workspace[curr_char]);
1973 wasalnum = iswalnum(workspace[curr_char]);
1974 }
1975 }
1976
1977 /*
1978 * Make result large enough; case change might change number
1979 * of bytes
1980 */
1981 result_size = curr_char * pg_database_encoding_max_length() + 1;
1982 result = palloc(result_size);
1983
1984 wchar2char(result, workspace, result_size, mylocale);
1985 pfree(workspace);
1986 }
1987 else
1988 {
1989 char *p;
1990
1991 result = pnstrdup(buff, nbytes);
1992
1993 /*
1994 * Note: we assume that toupper_l()/tolower_l() will not be so
1995 * broken as to need guard tests. When using the default
1996 * collation, we apply the traditional Postgres behavior that
1997 * forces ASCII-style treatment of I/i, but in non-default
1998 * collations you get exactly what the collation says.
1999 */
2000 for (p = result; *p; p++)
2001 {
2002 #ifdef HAVE_LOCALE_T
2003 if (mylocale)
2004 {
2005 if (wasalnum)
2006 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2007 else
2008 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2009 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2010 }
2011 else
2012 #endif
2013 {
2014 if (wasalnum)
2015 *p = pg_tolower((unsigned char) *p);
2016 else
2017 *p = pg_toupper((unsigned char) *p);
2018 wasalnum = isalnum((unsigned char) *p);
2019 }
2020 }
2021 }
2022 }
2023 }
2024
2025 return result;
2026 }
2027
2028 /*
2029 * ASCII-only lower function
2030 *
2031 * We pass the number of bytes so we can pass varlena and char*
2032 * to this function. The result is a palloc'd, null-terminated string.
2033 */
2034 char *
asc_tolower(const char * buff,size_t nbytes)2035 asc_tolower(const char *buff, size_t nbytes)
2036 {
2037 char *result;
2038 char *p;
2039
2040 if (!buff)
2041 return NULL;
2042
2043 result = pnstrdup(buff, nbytes);
2044
2045 for (p = result; *p; p++)
2046 *p = pg_ascii_tolower((unsigned char) *p);
2047
2048 return result;
2049 }
2050
2051 /*
2052 * ASCII-only upper function
2053 *
2054 * We pass the number of bytes so we can pass varlena and char*
2055 * to this function. The result is a palloc'd, null-terminated string.
2056 */
2057 char *
asc_toupper(const char * buff,size_t nbytes)2058 asc_toupper(const char *buff, size_t nbytes)
2059 {
2060 char *result;
2061 char *p;
2062
2063 if (!buff)
2064 return NULL;
2065
2066 result = pnstrdup(buff, nbytes);
2067
2068 for (p = result; *p; p++)
2069 *p = pg_ascii_toupper((unsigned char) *p);
2070
2071 return result;
2072 }
2073
2074 /*
2075 * ASCII-only initcap function
2076 *
2077 * We pass the number of bytes so we can pass varlena and char*
2078 * to this function. The result is a palloc'd, null-terminated string.
2079 */
2080 char *
asc_initcap(const char * buff,size_t nbytes)2081 asc_initcap(const char *buff, size_t nbytes)
2082 {
2083 char *result;
2084 char *p;
2085 int wasalnum = false;
2086
2087 if (!buff)
2088 return NULL;
2089
2090 result = pnstrdup(buff, nbytes);
2091
2092 for (p = result; *p; p++)
2093 {
2094 char c;
2095
2096 if (wasalnum)
2097 *p = c = pg_ascii_tolower((unsigned char) *p);
2098 else
2099 *p = c = pg_ascii_toupper((unsigned char) *p);
2100 /* we don't trust isalnum() here */
2101 wasalnum = ((c >= 'A' && c <= 'Z') ||
2102 (c >= 'a' && c <= 'z') ||
2103 (c >= '0' && c <= '9'));
2104 }
2105
2106 return result;
2107 }
2108
2109 /* convenience routines for when the input is null-terminated */
2110
2111 static char *
str_tolower_z(const char * buff,Oid collid)2112 str_tolower_z(const char *buff, Oid collid)
2113 {
2114 return str_tolower(buff, strlen(buff), collid);
2115 }
2116
2117 static char *
str_toupper_z(const char * buff,Oid collid)2118 str_toupper_z(const char *buff, Oid collid)
2119 {
2120 return str_toupper(buff, strlen(buff), collid);
2121 }
2122
2123 static char *
str_initcap_z(const char * buff,Oid collid)2124 str_initcap_z(const char *buff, Oid collid)
2125 {
2126 return str_initcap(buff, strlen(buff), collid);
2127 }
2128
2129 static char *
asc_tolower_z(const char * buff)2130 asc_tolower_z(const char *buff)
2131 {
2132 return asc_tolower(buff, strlen(buff));
2133 }
2134
2135 static char *
asc_toupper_z(const char * buff)2136 asc_toupper_z(const char *buff)
2137 {
2138 return asc_toupper(buff, strlen(buff));
2139 }
2140
2141 /* asc_initcap_z is not currently needed */
2142
2143
2144 /* ----------
2145 * Skip TM / th in FROM_CHAR
2146 *
2147 * If S_THth is on, skip two chars, assuming there are two available
2148 * ----------
2149 */
2150 #define SKIP_THth(ptr, _suf) \
2151 do { \
2152 if (S_THth(_suf)) \
2153 { \
2154 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2155 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2156 } \
2157 } while (0)
2158
2159
2160 #ifdef DEBUG_TO_FROM_CHAR
2161 /* -----------
2162 * DEBUG: Call for debug and for index checking; (Show ASCII char
2163 * and defined keyword for each used position
2164 * ----------
2165 */
2166 static void
dump_index(const KeyWord * k,const int * index)2167 dump_index(const KeyWord *k, const int *index)
2168 {
2169 int i,
2170 count = 0,
2171 free_i = 0;
2172
2173 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2174
2175 for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2176 {
2177 if (index[i] != -1)
2178 {
2179 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2180 count++;
2181 }
2182 else
2183 {
2184 free_i++;
2185 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2186 }
2187 }
2188 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2189 count, free_i);
2190 }
2191 #endif /* DEBUG */
2192
2193 /* ----------
2194 * Return true if next format picture is not digit value
2195 * ----------
2196 */
2197 static bool
is_next_separator(FormatNode * n)2198 is_next_separator(FormatNode *n)
2199 {
2200 if (n->type == NODE_TYPE_END)
2201 return false;
2202
2203 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2204 return true;
2205
2206 /*
2207 * Next node
2208 */
2209 n++;
2210
2211 /* end of format string is treated like a non-digit separator */
2212 if (n->type == NODE_TYPE_END)
2213 return true;
2214
2215 if (n->type == NODE_TYPE_ACTION)
2216 {
2217 if (n->key->is_digit)
2218 return false;
2219
2220 return true;
2221 }
2222 else if (n->character[1] == '\0' &&
2223 isdigit((unsigned char) n->character[0]))
2224 return false;
2225
2226 return true; /* some non-digit input (separator) */
2227 }
2228
2229
2230 static int
adjust_partial_year_to_2020(int year)2231 adjust_partial_year_to_2020(int year)
2232 {
2233 /*
2234 * Adjust all dates toward 2020; this is effectively what happens when we
2235 * assume '70' is 1970 and '69' is 2069.
2236 */
2237 /* Force 0-69 into the 2000's */
2238 if (year < 70)
2239 return year + 2000;
2240 /* Force 70-99 into the 1900's */
2241 else if (year < 100)
2242 return year + 1900;
2243 /* Force 100-519 into the 2000's */
2244 else if (year < 520)
2245 return year + 2000;
2246 /* Force 520-999 into the 1000's */
2247 else if (year < 1000)
2248 return year + 1000;
2249 else
2250 return year;
2251 }
2252
2253
2254 static int
strspace_len(const char * str)2255 strspace_len(const char *str)
2256 {
2257 int len = 0;
2258
2259 while (*str && isspace((unsigned char) *str))
2260 {
2261 str++;
2262 len++;
2263 }
2264 return len;
2265 }
2266
2267 /*
2268 * Set the date mode of a from-char conversion.
2269 *
2270 * Puke if the date mode has already been set, and the caller attempts to set
2271 * it to a conflicting mode.
2272 *
2273 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2274 */
2275 static void
from_char_set_mode(TmFromChar * tmfc,const FromCharDateMode mode,bool * have_error)2276 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2277 {
2278 if (mode != FROM_CHAR_DATE_NONE)
2279 {
2280 if (tmfc->mode == FROM_CHAR_DATE_NONE)
2281 tmfc->mode = mode;
2282 else if (tmfc->mode != mode)
2283 RETURN_ERROR(ereport(ERROR,
2284 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2285 errmsg("invalid combination of date conventions"),
2286 errhint("Do not mix Gregorian and ISO week date "
2287 "conventions in a formatting template."))));
2288 }
2289
2290 on_error:
2291 return;
2292 }
2293
2294 /*
2295 * Set the integer pointed to by 'dest' to the given value.
2296 *
2297 * Puke if the destination integer has previously been set to some other
2298 * non-zero value.
2299 *
2300 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2301 */
2302 static void
from_char_set_int(int * dest,const int value,const FormatNode * node,bool * have_error)2303 from_char_set_int(int *dest, const int value, const FormatNode *node,
2304 bool *have_error)
2305 {
2306 if (*dest != 0 && *dest != value)
2307 RETURN_ERROR(ereport(ERROR,
2308 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2309 errmsg("conflicting values for \"%s\" field in "
2310 "formatting string",
2311 node->key->name),
2312 errdetail("This value contradicts a previous setting "
2313 "for the same field type."))));
2314 *dest = value;
2315
2316 on_error:
2317 return;
2318 }
2319
2320 /*
2321 * Read a single integer from the source string, into the int pointed to by
2322 * 'dest'. If 'dest' is NULL, the result is discarded.
2323 *
2324 * In fixed-width mode (the node does not have the FM suffix), consume at most
2325 * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2326 *
2327 * We use strtol() to recover the integer value from the source string, in
2328 * accordance with the given FormatNode.
2329 *
2330 * If the conversion completes successfully, src will have been advanced to
2331 * point at the character immediately following the last character used in the
2332 * conversion.
2333 *
2334 * Return the number of characters consumed.
2335 *
2336 * Note that from_char_parse_int() provides a more convenient wrapper where
2337 * the length of the field is the same as the length of the format keyword (as
2338 * with DD and MI).
2339 *
2340 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2341 * and -1 is returned.
2342 */
2343 static int
from_char_parse_int_len(int * dest,const char ** src,const int len,FormatNode * node,bool * have_error)2344 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2345 bool *have_error)
2346 {
2347 long result;
2348 char copy[DCH_MAX_ITEM_SIZ + 1];
2349 const char *init = *src;
2350 int used;
2351
2352 /*
2353 * Skip any whitespace before parsing the integer.
2354 */
2355 *src += strspace_len(*src);
2356
2357 Assert(len <= DCH_MAX_ITEM_SIZ);
2358 used = (int) strlcpy(copy, *src, len + 1);
2359
2360 if (S_FM(node->suffix) || is_next_separator(node))
2361 {
2362 /*
2363 * This node is in Fill Mode, or the next node is known to be a
2364 * non-digit value, so we just slurp as many characters as we can get.
2365 */
2366 char *endptr;
2367
2368 errno = 0;
2369 result = strtol(init, &endptr, 10);
2370 *src = endptr;
2371 }
2372 else
2373 {
2374 /*
2375 * We need to pull exactly the number of characters given in 'len' out
2376 * of the string, and convert those.
2377 */
2378 char *last;
2379
2380 if (used < len)
2381 RETURN_ERROR(ereport(ERROR,
2382 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2383 errmsg("source string too short for \"%s\" "
2384 "formatting field",
2385 node->key->name),
2386 errdetail("Field requires %d characters, "
2387 "but only %d remain.",
2388 len, used),
2389 errhint("If your source string is not fixed-width, "
2390 "try using the \"FM\" modifier."))));
2391
2392 errno = 0;
2393 result = strtol(copy, &last, 10);
2394 used = last - copy;
2395
2396 if (used > 0 && used < len)
2397 RETURN_ERROR(ereport(ERROR,
2398 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2399 errmsg("invalid value \"%s\" for \"%s\"",
2400 copy, node->key->name),
2401 errdetail("Field requires %d characters, "
2402 "but only %d could be parsed.",
2403 len, used),
2404 errhint("If your source string is not fixed-width, "
2405 "try using the \"FM\" modifier."))));
2406
2407 *src += used;
2408 }
2409
2410 if (*src == init)
2411 RETURN_ERROR(ereport(ERROR,
2412 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2413 errmsg("invalid value \"%s\" for \"%s\"",
2414 copy, node->key->name),
2415 errdetail("Value must be an integer."))));
2416
2417 if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2418 RETURN_ERROR(ereport(ERROR,
2419 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2420 errmsg("value for \"%s\" in source string is out of range",
2421 node->key->name),
2422 errdetail("Value must be in the range %d to %d.",
2423 INT_MIN, INT_MAX))));
2424
2425 if (dest != NULL)
2426 {
2427 from_char_set_int(dest, (int) result, node, have_error);
2428 CHECK_ERROR;
2429 }
2430
2431 return *src - init;
2432
2433 on_error:
2434 return -1;
2435 }
2436
2437 /*
2438 * Call from_char_parse_int_len(), using the length of the format keyword as
2439 * the expected length of the field.
2440 *
2441 * Don't call this function if the field differs in length from the format
2442 * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2443 * In such cases, call from_char_parse_int_len() instead to specify the
2444 * required length explicitly.
2445 */
2446 static int
from_char_parse_int(int * dest,const char ** src,FormatNode * node,bool * have_error)2447 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2448 {
2449 return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2450 }
2451
2452 /*
2453 * Sequentially search null-terminated "array" for a case-insensitive match
2454 * to the initial character(s) of "name".
2455 *
2456 * Returns array index of match, or -1 for no match.
2457 *
2458 * *len is set to the length of the match, or 0 for no match.
2459 *
2460 * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2461 * suitable for comparisons to ASCII strings.
2462 */
2463 static int
seq_search_ascii(const char * name,const char * const * array,int * len)2464 seq_search_ascii(const char *name, const char *const *array, int *len)
2465 {
2466 unsigned char firstc;
2467 const char *const *a;
2468
2469 *len = 0;
2470
2471 /* empty string can't match anything */
2472 if (!*name)
2473 return -1;
2474
2475 /* we handle first char specially to gain some speed */
2476 firstc = pg_ascii_tolower((unsigned char) *name);
2477
2478 for (a = array; *a != NULL; a++)
2479 {
2480 const char *p;
2481 const char *n;
2482
2483 /* compare first chars */
2484 if (pg_ascii_tolower((unsigned char) **a) != firstc)
2485 continue;
2486
2487 /* compare rest of string */
2488 for (p = *a + 1, n = name + 1;; p++, n++)
2489 {
2490 /* return success if we matched whole array entry */
2491 if (*p == '\0')
2492 {
2493 *len = n - name;
2494 return a - array;
2495 }
2496 /* else, must have another character in "name" ... */
2497 if (*n == '\0')
2498 break;
2499 /* ... and it must match */
2500 if (pg_ascii_tolower((unsigned char) *p) !=
2501 pg_ascii_tolower((unsigned char) *n))
2502 break;
2503 }
2504 }
2505
2506 return -1;
2507 }
2508
2509 /*
2510 * Sequentially search an array of possibly non-English words for
2511 * a case-insensitive match to the initial character(s) of "name".
2512 *
2513 * This has the same API as seq_search_ascii(), but we use a more general
2514 * case-folding transformation to achieve case-insensitivity. Case folding
2515 * is done per the rules of the collation identified by "collid".
2516 *
2517 * The array is treated as const, but we don't declare it that way because
2518 * the arrays exported by pg_locale.c aren't const.
2519 */
2520 static int
seq_search_localized(const char * name,char ** array,int * len,Oid collid)2521 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2522 {
2523 char **a;
2524 char *upper_name;
2525 char *lower_name;
2526
2527 *len = 0;
2528
2529 /* empty string can't match anything */
2530 if (!*name)
2531 return -1;
2532
2533 /*
2534 * The case-folding processing done below is fairly expensive, so before
2535 * doing that, make a quick pass to see if there is an exact match.
2536 */
2537 for (a = array; *a != NULL; a++)
2538 {
2539 int element_len = strlen(*a);
2540
2541 if (strncmp(name, *a, element_len) == 0)
2542 {
2543 *len = element_len;
2544 return a - array;
2545 }
2546 }
2547
2548 /*
2549 * Fold to upper case, then to lower case, so that we can match reliably
2550 * even in languages in which case conversions are not injective.
2551 */
2552 upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2553 lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2554 pfree(upper_name);
2555
2556 for (a = array; *a != NULL; a++)
2557 {
2558 char *upper_element;
2559 char *lower_element;
2560 int element_len;
2561
2562 /* Likewise upper/lower-case array element */
2563 upper_element = str_toupper(*a, strlen(*a), collid);
2564 lower_element = str_tolower(upper_element, strlen(upper_element),
2565 collid);
2566 pfree(upper_element);
2567 element_len = strlen(lower_element);
2568
2569 /* Match? */
2570 if (strncmp(lower_name, lower_element, element_len) == 0)
2571 {
2572 *len = element_len;
2573 pfree(lower_element);
2574 pfree(lower_name);
2575 return a - array;
2576 }
2577 pfree(lower_element);
2578 }
2579
2580 pfree(lower_name);
2581 return -1;
2582 }
2583
2584 /*
2585 * Perform a sequential search in 'array' (or 'localized_array', if that's
2586 * not NULL) for an entry matching the first character(s) of the 'src'
2587 * string case-insensitively.
2588 *
2589 * The 'array' is presumed to be English words (all-ASCII), but
2590 * if 'localized_array' is supplied, that might be non-English
2591 * so we need a more expensive case-folding transformation
2592 * (which will follow the rules of the collation 'collid').
2593 *
2594 * If a match is found, copy the array index of the match into the integer
2595 * pointed to by 'dest', advance 'src' to the end of the part of the string
2596 * which matched, and return the number of characters consumed.
2597 *
2598 * If the string doesn't match, throw an error if 'have_error' is NULL,
2599 * otherwise set '*have_error' and return -1.
2600 *
2601 * 'node' is used only for error reports: node->key->name identifies the
2602 * field type we were searching for.
2603 */
2604 static int
from_char_seq_search(int * dest,const char ** src,const char * const * array,char ** localized_array,Oid collid,FormatNode * node,bool * have_error)2605 from_char_seq_search(int *dest, const char **src, const char *const *array,
2606 char **localized_array, Oid collid,
2607 FormatNode *node, bool *have_error)
2608 {
2609 int len;
2610
2611 if (localized_array == NULL)
2612 *dest = seq_search_ascii(*src, array, &len);
2613 else
2614 *dest = seq_search_localized(*src, localized_array, &len, collid);
2615
2616 if (len <= 0)
2617 {
2618 /*
2619 * In the error report, truncate the string at the next whitespace (if
2620 * any) to avoid including irrelevant data.
2621 */
2622 char *copy = pstrdup(*src);
2623 char *c;
2624
2625 for (c = copy; *c; c++)
2626 {
2627 if (scanner_isspace(*c))
2628 {
2629 *c = '\0';
2630 break;
2631 }
2632 }
2633
2634 RETURN_ERROR(ereport(ERROR,
2635 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2636 errmsg("invalid value \"%s\" for \"%s\"",
2637 copy, node->key->name),
2638 errdetail("The given value did not match any of "
2639 "the allowed values for this field."))));
2640 }
2641 *src += len;
2642 return len;
2643
2644 on_error:
2645 return -1;
2646 }
2647
2648 /* ----------
2649 * Process a TmToChar struct as denoted by a list of FormatNodes.
2650 * The formatted data is written to the string pointed to by 'out'.
2651 * ----------
2652 */
2653 static void
DCH_to_char(FormatNode * node,bool is_interval,TmToChar * in,char * out,Oid collid)2654 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2655 {
2656 FormatNode *n;
2657 char *s;
2658 struct pg_tm *tm = &in->tm;
2659 int i;
2660
2661 /* cache localized days and months */
2662 cache_locale_time();
2663
2664 s = out;
2665 for (n = node; n->type != NODE_TYPE_END; n++)
2666 {
2667 if (n->type != NODE_TYPE_ACTION)
2668 {
2669 strcpy(s, n->character);
2670 s += strlen(s);
2671 continue;
2672 }
2673
2674 switch (n->key->id)
2675 {
2676 case DCH_A_M:
2677 case DCH_P_M:
2678 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2679 ? P_M_STR : A_M_STR);
2680 s += strlen(s);
2681 break;
2682 case DCH_AM:
2683 case DCH_PM:
2684 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2685 ? PM_STR : AM_STR);
2686 s += strlen(s);
2687 break;
2688 case DCH_a_m:
2689 case DCH_p_m:
2690 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2691 ? p_m_STR : a_m_STR);
2692 s += strlen(s);
2693 break;
2694 case DCH_am:
2695 case DCH_pm:
2696 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2697 ? pm_STR : am_STR);
2698 s += strlen(s);
2699 break;
2700 case DCH_HH:
2701 case DCH_HH12:
2702
2703 /*
2704 * display time as shown on a 12-hour clock, even for
2705 * intervals
2706 */
2707 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2708 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2709 tm->tm_hour % (HOURS_PER_DAY / 2));
2710 if (S_THth(n->suffix))
2711 str_numth(s, s, S_TH_TYPE(n->suffix));
2712 s += strlen(s);
2713 break;
2714 case DCH_HH24:
2715 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2716 tm->tm_hour);
2717 if (S_THth(n->suffix))
2718 str_numth(s, s, S_TH_TYPE(n->suffix));
2719 s += strlen(s);
2720 break;
2721 case DCH_MI:
2722 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2723 tm->tm_min);
2724 if (S_THth(n->suffix))
2725 str_numth(s, s, S_TH_TYPE(n->suffix));
2726 s += strlen(s);
2727 break;
2728 case DCH_SS:
2729 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2730 tm->tm_sec);
2731 if (S_THth(n->suffix))
2732 str_numth(s, s, S_TH_TYPE(n->suffix));
2733 s += strlen(s);
2734 break;
2735
2736 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2737 sprintf(s, frac_fmt, (int) (frac_val)); \
2738 if (S_THth(n->suffix)) \
2739 str_numth(s, s, S_TH_TYPE(n->suffix)); \
2740 s += strlen(s)
2741
2742 case DCH_FF1: /* tenth of second */
2743 DCH_to_char_fsec("%01d", in->fsec / 100000);
2744 break;
2745 case DCH_FF2: /* hundredth of second */
2746 DCH_to_char_fsec("%02d", in->fsec / 10000);
2747 break;
2748 case DCH_FF3:
2749 case DCH_MS: /* millisecond */
2750 DCH_to_char_fsec("%03d", in->fsec / 1000);
2751 break;
2752 case DCH_FF4: /* tenth of a millisecond */
2753 DCH_to_char_fsec("%04d", in->fsec / 100);
2754 break;
2755 case DCH_FF5: /* hundredth of a millisecond */
2756 DCH_to_char_fsec("%05d", in->fsec / 10);
2757 break;
2758 case DCH_FF6:
2759 case DCH_US: /* microsecond */
2760 DCH_to_char_fsec("%06d", in->fsec);
2761 break;
2762 #undef DCH_to_char_fsec
2763 case DCH_SSSS:
2764 sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2765 tm->tm_min * SECS_PER_MINUTE +
2766 tm->tm_sec);
2767 if (S_THth(n->suffix))
2768 str_numth(s, s, S_TH_TYPE(n->suffix));
2769 s += strlen(s);
2770 break;
2771 case DCH_tz:
2772 INVALID_FOR_INTERVAL;
2773 if (tmtcTzn(in))
2774 {
2775 /* We assume here that timezone names aren't localized */
2776 char *p = asc_tolower_z(tmtcTzn(in));
2777
2778 strcpy(s, p);
2779 pfree(p);
2780 s += strlen(s);
2781 }
2782 break;
2783 case DCH_TZ:
2784 INVALID_FOR_INTERVAL;
2785 if (tmtcTzn(in))
2786 {
2787 strcpy(s, tmtcTzn(in));
2788 s += strlen(s);
2789 }
2790 break;
2791 case DCH_TZH:
2792 INVALID_FOR_INTERVAL;
2793 sprintf(s, "%c%02d",
2794 (tm->tm_gmtoff >= 0) ? '+' : '-',
2795 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2796 s += strlen(s);
2797 break;
2798 case DCH_TZM:
2799 INVALID_FOR_INTERVAL;
2800 sprintf(s, "%02d",
2801 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2802 s += strlen(s);
2803 break;
2804 case DCH_OF:
2805 INVALID_FOR_INTERVAL;
2806 sprintf(s, "%c%0*d",
2807 (tm->tm_gmtoff >= 0) ? '+' : '-',
2808 S_FM(n->suffix) ? 0 : 2,
2809 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2810 s += strlen(s);
2811 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2812 {
2813 sprintf(s, ":%02d",
2814 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2815 s += strlen(s);
2816 }
2817 break;
2818 case DCH_A_D:
2819 case DCH_B_C:
2820 INVALID_FOR_INTERVAL;
2821 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2822 s += strlen(s);
2823 break;
2824 case DCH_AD:
2825 case DCH_BC:
2826 INVALID_FOR_INTERVAL;
2827 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2828 s += strlen(s);
2829 break;
2830 case DCH_a_d:
2831 case DCH_b_c:
2832 INVALID_FOR_INTERVAL;
2833 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2834 s += strlen(s);
2835 break;
2836 case DCH_ad:
2837 case DCH_bc:
2838 INVALID_FOR_INTERVAL;
2839 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2840 s += strlen(s);
2841 break;
2842 case DCH_MONTH:
2843 INVALID_FOR_INTERVAL;
2844 if (!tm->tm_mon)
2845 break;
2846 if (S_TM(n->suffix))
2847 {
2848 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2849
2850 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2851 strcpy(s, str);
2852 else
2853 ereport(ERROR,
2854 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2855 errmsg("localized string format value too long")));
2856 }
2857 else
2858 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2859 asc_toupper_z(months_full[tm->tm_mon - 1]));
2860 s += strlen(s);
2861 break;
2862 case DCH_Month:
2863 INVALID_FOR_INTERVAL;
2864 if (!tm->tm_mon)
2865 break;
2866 if (S_TM(n->suffix))
2867 {
2868 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2869
2870 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2871 strcpy(s, str);
2872 else
2873 ereport(ERROR,
2874 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2875 errmsg("localized string format value too long")));
2876 }
2877 else
2878 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2879 months_full[tm->tm_mon - 1]);
2880 s += strlen(s);
2881 break;
2882 case DCH_month:
2883 INVALID_FOR_INTERVAL;
2884 if (!tm->tm_mon)
2885 break;
2886 if (S_TM(n->suffix))
2887 {
2888 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2889
2890 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2891 strcpy(s, str);
2892 else
2893 ereport(ERROR,
2894 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2895 errmsg("localized string format value too long")));
2896 }
2897 else
2898 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2899 asc_tolower_z(months_full[tm->tm_mon - 1]));
2900 s += strlen(s);
2901 break;
2902 case DCH_MON:
2903 INVALID_FOR_INTERVAL;
2904 if (!tm->tm_mon)
2905 break;
2906 if (S_TM(n->suffix))
2907 {
2908 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2909
2910 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2911 strcpy(s, str);
2912 else
2913 ereport(ERROR,
2914 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2915 errmsg("localized string format value too long")));
2916 }
2917 else
2918 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2919 s += strlen(s);
2920 break;
2921 case DCH_Mon:
2922 INVALID_FOR_INTERVAL;
2923 if (!tm->tm_mon)
2924 break;
2925 if (S_TM(n->suffix))
2926 {
2927 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2928
2929 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2930 strcpy(s, str);
2931 else
2932 ereport(ERROR,
2933 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2934 errmsg("localized string format value too long")));
2935 }
2936 else
2937 strcpy(s, months[tm->tm_mon - 1]);
2938 s += strlen(s);
2939 break;
2940 case DCH_mon:
2941 INVALID_FOR_INTERVAL;
2942 if (!tm->tm_mon)
2943 break;
2944 if (S_TM(n->suffix))
2945 {
2946 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2947
2948 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2949 strcpy(s, str);
2950 else
2951 ereport(ERROR,
2952 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2953 errmsg("localized string format value too long")));
2954 }
2955 else
2956 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2957 s += strlen(s);
2958 break;
2959 case DCH_MM:
2960 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2961 tm->tm_mon);
2962 if (S_THth(n->suffix))
2963 str_numth(s, s, S_TH_TYPE(n->suffix));
2964 s += strlen(s);
2965 break;
2966 case DCH_DAY:
2967 INVALID_FOR_INTERVAL;
2968 if (S_TM(n->suffix))
2969 {
2970 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2971
2972 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2973 strcpy(s, str);
2974 else
2975 ereport(ERROR,
2976 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2977 errmsg("localized string format value too long")));
2978 }
2979 else
2980 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2981 asc_toupper_z(days[tm->tm_wday]));
2982 s += strlen(s);
2983 break;
2984 case DCH_Day:
2985 INVALID_FOR_INTERVAL;
2986 if (S_TM(n->suffix))
2987 {
2988 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2989
2990 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2991 strcpy(s, str);
2992 else
2993 ereport(ERROR,
2994 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2995 errmsg("localized string format value too long")));
2996 }
2997 else
2998 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2999 days[tm->tm_wday]);
3000 s += strlen(s);
3001 break;
3002 case DCH_day:
3003 INVALID_FOR_INTERVAL;
3004 if (S_TM(n->suffix))
3005 {
3006 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3007
3008 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3009 strcpy(s, str);
3010 else
3011 ereport(ERROR,
3012 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3013 errmsg("localized string format value too long")));
3014 }
3015 else
3016 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3017 asc_tolower_z(days[tm->tm_wday]));
3018 s += strlen(s);
3019 break;
3020 case DCH_DY:
3021 INVALID_FOR_INTERVAL;
3022 if (S_TM(n->suffix))
3023 {
3024 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3025
3026 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3027 strcpy(s, str);
3028 else
3029 ereport(ERROR,
3030 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3031 errmsg("localized string format value too long")));
3032 }
3033 else
3034 strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3035 s += strlen(s);
3036 break;
3037 case DCH_Dy:
3038 INVALID_FOR_INTERVAL;
3039 if (S_TM(n->suffix))
3040 {
3041 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3042
3043 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3044 strcpy(s, str);
3045 else
3046 ereport(ERROR,
3047 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3048 errmsg("localized string format value too long")));
3049 }
3050 else
3051 strcpy(s, days_short[tm->tm_wday]);
3052 s += strlen(s);
3053 break;
3054 case DCH_dy:
3055 INVALID_FOR_INTERVAL;
3056 if (S_TM(n->suffix))
3057 {
3058 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3059
3060 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3061 strcpy(s, str);
3062 else
3063 ereport(ERROR,
3064 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3065 errmsg("localized string format value too long")));
3066 }
3067 else
3068 strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3069 s += strlen(s);
3070 break;
3071 case DCH_DDD:
3072 case DCH_IDDD:
3073 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3074 (n->key->id == DCH_DDD) ?
3075 tm->tm_yday :
3076 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3077 if (S_THth(n->suffix))
3078 str_numth(s, s, S_TH_TYPE(n->suffix));
3079 s += strlen(s);
3080 break;
3081 case DCH_DD:
3082 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3083 if (S_THth(n->suffix))
3084 str_numth(s, s, S_TH_TYPE(n->suffix));
3085 s += strlen(s);
3086 break;
3087 case DCH_D:
3088 INVALID_FOR_INTERVAL;
3089 sprintf(s, "%d", tm->tm_wday + 1);
3090 if (S_THth(n->suffix))
3091 str_numth(s, s, S_TH_TYPE(n->suffix));
3092 s += strlen(s);
3093 break;
3094 case DCH_ID:
3095 INVALID_FOR_INTERVAL;
3096 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3097 if (S_THth(n->suffix))
3098 str_numth(s, s, S_TH_TYPE(n->suffix));
3099 s += strlen(s);
3100 break;
3101 case DCH_WW:
3102 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3103 (tm->tm_yday - 1) / 7 + 1);
3104 if (S_THth(n->suffix))
3105 str_numth(s, s, S_TH_TYPE(n->suffix));
3106 s += strlen(s);
3107 break;
3108 case DCH_IW:
3109 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3110 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3111 if (S_THth(n->suffix))
3112 str_numth(s, s, S_TH_TYPE(n->suffix));
3113 s += strlen(s);
3114 break;
3115 case DCH_Q:
3116 if (!tm->tm_mon)
3117 break;
3118 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3119 if (S_THth(n->suffix))
3120 str_numth(s, s, S_TH_TYPE(n->suffix));
3121 s += strlen(s);
3122 break;
3123 case DCH_CC:
3124 if (is_interval) /* straight calculation */
3125 i = tm->tm_year / 100;
3126 else
3127 {
3128 if (tm->tm_year > 0)
3129 /* Century 20 == 1901 - 2000 */
3130 i = (tm->tm_year - 1) / 100 + 1;
3131 else
3132 /* Century 6BC == 600BC - 501BC */
3133 i = tm->tm_year / 100 - 1;
3134 }
3135 if (i <= 99 && i >= -99)
3136 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3137 else
3138 sprintf(s, "%d", i);
3139 if (S_THth(n->suffix))
3140 str_numth(s, s, S_TH_TYPE(n->suffix));
3141 s += strlen(s);
3142 break;
3143 case DCH_Y_YYY:
3144 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3145 sprintf(s, "%d,%03d", i,
3146 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3147 if (S_THth(n->suffix))
3148 str_numth(s, s, S_TH_TYPE(n->suffix));
3149 s += strlen(s);
3150 break;
3151 case DCH_YYYY:
3152 case DCH_IYYY:
3153 sprintf(s, "%0*d",
3154 S_FM(n->suffix) ? 0 :
3155 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3156 (n->key->id == DCH_YYYY ?
3157 ADJUST_YEAR(tm->tm_year, is_interval) :
3158 ADJUST_YEAR(date2isoyear(tm->tm_year,
3159 tm->tm_mon,
3160 tm->tm_mday),
3161 is_interval)));
3162 if (S_THth(n->suffix))
3163 str_numth(s, s, S_TH_TYPE(n->suffix));
3164 s += strlen(s);
3165 break;
3166 case DCH_YYY:
3167 case DCH_IYY:
3168 sprintf(s, "%0*d",
3169 S_FM(n->suffix) ? 0 :
3170 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3171 (n->key->id == DCH_YYY ?
3172 ADJUST_YEAR(tm->tm_year, is_interval) :
3173 ADJUST_YEAR(date2isoyear(tm->tm_year,
3174 tm->tm_mon,
3175 tm->tm_mday),
3176 is_interval)) % 1000);
3177 if (S_THth(n->suffix))
3178 str_numth(s, s, S_TH_TYPE(n->suffix));
3179 s += strlen(s);
3180 break;
3181 case DCH_YY:
3182 case DCH_IY:
3183 sprintf(s, "%0*d",
3184 S_FM(n->suffix) ? 0 :
3185 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3186 (n->key->id == DCH_YY ?
3187 ADJUST_YEAR(tm->tm_year, is_interval) :
3188 ADJUST_YEAR(date2isoyear(tm->tm_year,
3189 tm->tm_mon,
3190 tm->tm_mday),
3191 is_interval)) % 100);
3192 if (S_THth(n->suffix))
3193 str_numth(s, s, S_TH_TYPE(n->suffix));
3194 s += strlen(s);
3195 break;
3196 case DCH_Y:
3197 case DCH_I:
3198 sprintf(s, "%1d",
3199 (n->key->id == DCH_Y ?
3200 ADJUST_YEAR(tm->tm_year, is_interval) :
3201 ADJUST_YEAR(date2isoyear(tm->tm_year,
3202 tm->tm_mon,
3203 tm->tm_mday),
3204 is_interval)) % 10);
3205 if (S_THth(n->suffix))
3206 str_numth(s, s, S_TH_TYPE(n->suffix));
3207 s += strlen(s);
3208 break;
3209 case DCH_RM:
3210 /* FALLTHROUGH */
3211 case DCH_rm:
3212
3213 /*
3214 * For intervals, values like '12 month' will be reduced to 0
3215 * month and some years. These should be processed.
3216 */
3217 if (!tm->tm_mon && !tm->tm_year)
3218 break;
3219 else
3220 {
3221 int mon = 0;
3222 const char *const *months;
3223
3224 if (n->key->id == DCH_RM)
3225 months = rm_months_upper;
3226 else
3227 months = rm_months_lower;
3228
3229 /*
3230 * Compute the position in the roman-numeral array. Note
3231 * that the contents of the array are reversed, December
3232 * being first and January last.
3233 */
3234 if (tm->tm_mon == 0)
3235 {
3236 /*
3237 * This case is special, and tracks the case of full
3238 * interval years.
3239 */
3240 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3241 }
3242 else if (tm->tm_mon < 0)
3243 {
3244 /*
3245 * Negative case. In this case, the calculation is
3246 * reversed, where -1 means December, -2 November,
3247 * etc.
3248 */
3249 mon = -1 * (tm->tm_mon + 1);
3250 }
3251 else
3252 {
3253 /*
3254 * Common case, with a strictly positive value. The
3255 * position in the array matches with the value of
3256 * tm_mon.
3257 */
3258 mon = MONTHS_PER_YEAR - tm->tm_mon;
3259 }
3260
3261 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3262 months[mon]);
3263 s += strlen(s);
3264 }
3265 break;
3266 case DCH_W:
3267 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3268 if (S_THth(n->suffix))
3269 str_numth(s, s, S_TH_TYPE(n->suffix));
3270 s += strlen(s);
3271 break;
3272 case DCH_J:
3273 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3274 if (S_THth(n->suffix))
3275 str_numth(s, s, S_TH_TYPE(n->suffix));
3276 s += strlen(s);
3277 break;
3278 }
3279 }
3280
3281 *s = '\0';
3282 }
3283
3284 /*
3285 * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3286 * The TmFromChar struct pointed to by 'out' is populated with the results.
3287 *
3288 * 'collid' identifies the collation to use, if needed.
3289 * 'std' specifies standard parsing mode.
3290 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3291 *
3292 * Note: we currently don't have any to_interval() function, so there
3293 * is no need here for INVALID_FOR_INTERVAL checks.
3294 */
3295 static void
DCH_from_char(FormatNode * node,const char * in,TmFromChar * out,Oid collid,bool std,bool * have_error)3296 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3297 Oid collid, bool std, bool *have_error)
3298 {
3299 FormatNode *n;
3300 const char *s;
3301 int len,
3302 value;
3303 bool fx_mode = std;
3304
3305 /* number of extra skipped characters (more than given in format string) */
3306 int extra_skip = 0;
3307
3308 /* cache localized days and months */
3309 cache_locale_time();
3310
3311 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3312 {
3313 /*
3314 * Ignore spaces at the beginning of the string and before fields when
3315 * not in FX (fixed width) mode.
3316 */
3317 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3318 (n->type == NODE_TYPE_ACTION || n == node))
3319 {
3320 while (*s != '\0' && isspace((unsigned char) *s))
3321 {
3322 s++;
3323 extra_skip++;
3324 }
3325 }
3326
3327 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3328 {
3329 if (std)
3330 {
3331 /*
3332 * Standard mode requires strict matching between format
3333 * string separators/spaces and input string.
3334 */
3335 Assert(n->character[0] && !n->character[1]);
3336
3337 if (*s == n->character[0])
3338 s++;
3339 else
3340 RETURN_ERROR(ereport(ERROR,
3341 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3342 errmsg("unmatched format separator \"%c\"",
3343 n->character[0]))));
3344 }
3345 else if (!fx_mode)
3346 {
3347 /*
3348 * In non FX (fixed format) mode one format string space or
3349 * separator match to one space or separator in input string.
3350 * Or match nothing if there is no space or separator in the
3351 * current position of input string.
3352 */
3353 extra_skip--;
3354 if (isspace((unsigned char) *s) || is_separator_char(s))
3355 {
3356 s++;
3357 extra_skip++;
3358 }
3359 }
3360 else
3361 {
3362 /*
3363 * In FX mode, on format string space or separator we consume
3364 * exactly one character from input string. Notice we don't
3365 * insist that the consumed character match the format's
3366 * character.
3367 */
3368 s += pg_mblen(s);
3369 }
3370 continue;
3371 }
3372 else if (n->type != NODE_TYPE_ACTION)
3373 {
3374 /*
3375 * Text character, so consume one character from input string.
3376 * Notice we don't insist that the consumed character match the
3377 * format's character.
3378 */
3379 if (!fx_mode)
3380 {
3381 /*
3382 * In non FX mode we might have skipped some extra characters
3383 * (more than specified in format string) before. In this
3384 * case we don't skip input string character, because it might
3385 * be part of field.
3386 */
3387 if (extra_skip > 0)
3388 extra_skip--;
3389 else
3390 s += pg_mblen(s);
3391 }
3392 else
3393 {
3394 int chlen = pg_mblen(s);
3395
3396 /*
3397 * Standard mode requires strict match of format characters.
3398 */
3399 if (std && n->type == NODE_TYPE_CHAR &&
3400 strncmp(s, n->character, chlen) != 0)
3401 RETURN_ERROR(ereport(ERROR,
3402 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3403 errmsg("unmatched format character \"%s\"",
3404 n->character))));
3405
3406 s += chlen;
3407 }
3408 continue;
3409 }
3410
3411 from_char_set_mode(out, n->key->date_mode, have_error);
3412 CHECK_ERROR;
3413
3414 switch (n->key->id)
3415 {
3416 case DCH_FX:
3417 fx_mode = true;
3418 break;
3419 case DCH_A_M:
3420 case DCH_P_M:
3421 case DCH_a_m:
3422 case DCH_p_m:
3423 from_char_seq_search(&value, &s, ampm_strings_long,
3424 NULL, InvalidOid,
3425 n, have_error);
3426 CHECK_ERROR;
3427 from_char_set_int(&out->pm, value % 2, n, have_error);
3428 CHECK_ERROR;
3429 out->clock = CLOCK_12_HOUR;
3430 break;
3431 case DCH_AM:
3432 case DCH_PM:
3433 case DCH_am:
3434 case DCH_pm:
3435 from_char_seq_search(&value, &s, ampm_strings,
3436 NULL, InvalidOid,
3437 n, have_error);
3438 CHECK_ERROR;
3439 from_char_set_int(&out->pm, value % 2, n, have_error);
3440 CHECK_ERROR;
3441 out->clock = CLOCK_12_HOUR;
3442 break;
3443 case DCH_HH:
3444 case DCH_HH12:
3445 from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3446 CHECK_ERROR;
3447 out->clock = CLOCK_12_HOUR;
3448 SKIP_THth(s, n->suffix);
3449 break;
3450 case DCH_HH24:
3451 from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3452 CHECK_ERROR;
3453 SKIP_THth(s, n->suffix);
3454 break;
3455 case DCH_MI:
3456 from_char_parse_int(&out->mi, &s, n, have_error);
3457 CHECK_ERROR;
3458 SKIP_THth(s, n->suffix);
3459 break;
3460 case DCH_SS:
3461 from_char_parse_int(&out->ss, &s, n, have_error);
3462 CHECK_ERROR;
3463 SKIP_THth(s, n->suffix);
3464 break;
3465 case DCH_MS: /* millisecond */
3466 len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3467 CHECK_ERROR;
3468
3469 /*
3470 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3471 */
3472 out->ms *= len == 1 ? 100 :
3473 len == 2 ? 10 : 1;
3474
3475 SKIP_THth(s, n->suffix);
3476 break;
3477 case DCH_FF1:
3478 case DCH_FF2:
3479 case DCH_FF3:
3480 case DCH_FF4:
3481 case DCH_FF5:
3482 case DCH_FF6:
3483 out->ff = n->key->id - DCH_FF1 + 1;
3484 /* fall through */
3485 case DCH_US: /* microsecond */
3486 len = from_char_parse_int_len(&out->us, &s,
3487 n->key->id == DCH_US ? 6 :
3488 out->ff, n, have_error);
3489 CHECK_ERROR;
3490
3491 out->us *= len == 1 ? 100000 :
3492 len == 2 ? 10000 :
3493 len == 3 ? 1000 :
3494 len == 4 ? 100 :
3495 len == 5 ? 10 : 1;
3496
3497 SKIP_THth(s, n->suffix);
3498 break;
3499 case DCH_SSSS:
3500 from_char_parse_int(&out->ssss, &s, n, have_error);
3501 CHECK_ERROR;
3502 SKIP_THth(s, n->suffix);
3503 break;
3504 case DCH_tz:
3505 case DCH_TZ:
3506 case DCH_OF:
3507 RETURN_ERROR(ereport(ERROR,
3508 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3509 errmsg("formatting field \"%s\" is only supported in to_char",
3510 n->key->name))));
3511 CHECK_ERROR;
3512 break;
3513 case DCH_TZH:
3514
3515 /*
3516 * Value of TZH might be negative. And the issue is that we
3517 * might swallow minus sign as the separator. So, if we have
3518 * skipped more characters than specified in the format
3519 * string, then we consider prepending last skipped minus to
3520 * TZH.
3521 */
3522 if (*s == '+' || *s == '-' || *s == ' ')
3523 {
3524 out->tzsign = *s == '-' ? -1 : +1;
3525 s++;
3526 }
3527 else
3528 {
3529 if (extra_skip > 0 && *(s - 1) == '-')
3530 out->tzsign = -1;
3531 else
3532 out->tzsign = +1;
3533 }
3534
3535 from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3536 CHECK_ERROR;
3537 break;
3538 case DCH_TZM:
3539 /* assign positive timezone sign if TZH was not seen before */
3540 if (!out->tzsign)
3541 out->tzsign = +1;
3542 from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3543 CHECK_ERROR;
3544 break;
3545 case DCH_A_D:
3546 case DCH_B_C:
3547 case DCH_a_d:
3548 case DCH_b_c:
3549 from_char_seq_search(&value, &s, adbc_strings_long,
3550 NULL, InvalidOid,
3551 n, have_error);
3552 CHECK_ERROR;
3553 from_char_set_int(&out->bc, value % 2, n, have_error);
3554 CHECK_ERROR;
3555 break;
3556 case DCH_AD:
3557 case DCH_BC:
3558 case DCH_ad:
3559 case DCH_bc:
3560 from_char_seq_search(&value, &s, adbc_strings,
3561 NULL, InvalidOid,
3562 n, have_error);
3563 CHECK_ERROR;
3564 from_char_set_int(&out->bc, value % 2, n, have_error);
3565 CHECK_ERROR;
3566 break;
3567 case DCH_MONTH:
3568 case DCH_Month:
3569 case DCH_month:
3570 from_char_seq_search(&value, &s, months_full,
3571 S_TM(n->suffix) ? localized_full_months : NULL,
3572 collid,
3573 n, have_error);
3574 CHECK_ERROR;
3575 from_char_set_int(&out->mm, value + 1, n, have_error);
3576 CHECK_ERROR;
3577 break;
3578 case DCH_MON:
3579 case DCH_Mon:
3580 case DCH_mon:
3581 from_char_seq_search(&value, &s, months,
3582 S_TM(n->suffix) ? localized_abbrev_months : NULL,
3583 collid,
3584 n, have_error);
3585 CHECK_ERROR;
3586 from_char_set_int(&out->mm, value + 1, n, have_error);
3587 CHECK_ERROR;
3588 break;
3589 case DCH_MM:
3590 from_char_parse_int(&out->mm, &s, n, have_error);
3591 CHECK_ERROR;
3592 SKIP_THth(s, n->suffix);
3593 break;
3594 case DCH_DAY:
3595 case DCH_Day:
3596 case DCH_day:
3597 from_char_seq_search(&value, &s, days,
3598 S_TM(n->suffix) ? localized_full_days : NULL,
3599 collid,
3600 n, have_error);
3601 CHECK_ERROR;
3602 from_char_set_int(&out->d, value, n, have_error);
3603 CHECK_ERROR;
3604 out->d++;
3605 break;
3606 case DCH_DY:
3607 case DCH_Dy:
3608 case DCH_dy:
3609 from_char_seq_search(&value, &s, days_short,
3610 S_TM(n->suffix) ? localized_abbrev_days : NULL,
3611 collid,
3612 n, have_error);
3613 CHECK_ERROR;
3614 from_char_set_int(&out->d, value, n, have_error);
3615 CHECK_ERROR;
3616 out->d++;
3617 break;
3618 case DCH_DDD:
3619 from_char_parse_int(&out->ddd, &s, n, have_error);
3620 CHECK_ERROR;
3621 SKIP_THth(s, n->suffix);
3622 break;
3623 case DCH_IDDD:
3624 from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3625 CHECK_ERROR;
3626 SKIP_THth(s, n->suffix);
3627 break;
3628 case DCH_DD:
3629 from_char_parse_int(&out->dd, &s, n, have_error);
3630 CHECK_ERROR;
3631 SKIP_THth(s, n->suffix);
3632 break;
3633 case DCH_D:
3634 from_char_parse_int(&out->d, &s, n, have_error);
3635 CHECK_ERROR;
3636 SKIP_THth(s, n->suffix);
3637 break;
3638 case DCH_ID:
3639 from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3640 CHECK_ERROR;
3641 /* Shift numbering to match Gregorian where Sunday = 1 */
3642 if (++out->d > 7)
3643 out->d = 1;
3644 SKIP_THth(s, n->suffix);
3645 break;
3646 case DCH_WW:
3647 case DCH_IW:
3648 from_char_parse_int(&out->ww, &s, n, have_error);
3649 CHECK_ERROR;
3650 SKIP_THth(s, n->suffix);
3651 break;
3652 case DCH_Q:
3653
3654 /*
3655 * We ignore 'Q' when converting to date because it is unclear
3656 * which date in the quarter to use, and some people specify
3657 * both quarter and month, so if it was honored it might
3658 * conflict with the supplied month. That is also why we don't
3659 * throw an error.
3660 *
3661 * We still parse the source string for an integer, but it
3662 * isn't stored anywhere in 'out'.
3663 */
3664 from_char_parse_int((int *) NULL, &s, n, have_error);
3665 CHECK_ERROR;
3666 SKIP_THth(s, n->suffix);
3667 break;
3668 case DCH_CC:
3669 from_char_parse_int(&out->cc, &s, n, have_error);
3670 CHECK_ERROR;
3671 SKIP_THth(s, n->suffix);
3672 break;
3673 case DCH_Y_YYY:
3674 {
3675 int matched,
3676 years,
3677 millennia,
3678 nch;
3679
3680 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3681 if (matched < 2)
3682 RETURN_ERROR(ereport(ERROR,
3683 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3684 errmsg("invalid input string for \"Y,YYY\""))));
3685 years += (millennia * 1000);
3686 from_char_set_int(&out->year, years, n, have_error);
3687 CHECK_ERROR;
3688 out->yysz = 4;
3689 s += nch;
3690 SKIP_THth(s, n->suffix);
3691 }
3692 break;
3693 case DCH_YYYY:
3694 case DCH_IYYY:
3695 from_char_parse_int(&out->year, &s, n, have_error);
3696 CHECK_ERROR;
3697 out->yysz = 4;
3698 SKIP_THth(s, n->suffix);
3699 break;
3700 case DCH_YYY:
3701 case DCH_IYY:
3702 len = from_char_parse_int(&out->year, &s, n, have_error);
3703 CHECK_ERROR;
3704 if (len < 4)
3705 out->year = adjust_partial_year_to_2020(out->year);
3706 out->yysz = 3;
3707 SKIP_THth(s, n->suffix);
3708 break;
3709 case DCH_YY:
3710 case DCH_IY:
3711 len = from_char_parse_int(&out->year, &s, n, have_error);
3712 CHECK_ERROR;
3713 if (len < 4)
3714 out->year = adjust_partial_year_to_2020(out->year);
3715 out->yysz = 2;
3716 SKIP_THth(s, n->suffix);
3717 break;
3718 case DCH_Y:
3719 case DCH_I:
3720 len = from_char_parse_int(&out->year, &s, n, have_error);
3721 CHECK_ERROR;
3722 if (len < 4)
3723 out->year = adjust_partial_year_to_2020(out->year);
3724 out->yysz = 1;
3725 SKIP_THth(s, n->suffix);
3726 break;
3727 case DCH_RM:
3728 case DCH_rm:
3729 from_char_seq_search(&value, &s, rm_months_lower,
3730 NULL, InvalidOid,
3731 n, have_error);
3732 CHECK_ERROR;
3733 from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3734 n, have_error);
3735 CHECK_ERROR;
3736 break;
3737 case DCH_W:
3738 from_char_parse_int(&out->w, &s, n, have_error);
3739 CHECK_ERROR;
3740 SKIP_THth(s, n->suffix);
3741 break;
3742 case DCH_J:
3743 from_char_parse_int(&out->j, &s, n, have_error);
3744 CHECK_ERROR;
3745 SKIP_THth(s, n->suffix);
3746 break;
3747 }
3748
3749 /* Ignore all spaces after fields */
3750 if (!fx_mode)
3751 {
3752 extra_skip = 0;
3753 while (*s != '\0' && isspace((unsigned char) *s))
3754 {
3755 s++;
3756 extra_skip++;
3757 }
3758 }
3759 }
3760
3761 /*
3762 * Standard parsing mode doesn't allow unmatched format patterns or
3763 * trailing characters in the input string.
3764 */
3765 if (std)
3766 {
3767 if (n->type != NODE_TYPE_END)
3768 RETURN_ERROR(ereport(ERROR,
3769 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3770 errmsg("input string is too short for datetime format"))));
3771
3772 while (*s != '\0' && isspace((unsigned char) *s))
3773 s++;
3774
3775 if (*s != '\0')
3776 RETURN_ERROR(ereport(ERROR,
3777 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3778 errmsg("trailing characters remain in input string "
3779 "after datetime format"))));
3780 }
3781
3782 on_error:
3783 return;
3784 }
3785
3786 /*
3787 * The invariant for DCH cache entry management is that DCHCounter is equal
3788 * to the maximum age value among the existing entries, and we increment it
3789 * whenever an access occurs. If we approach overflow, deal with that by
3790 * halving all the age values, so that we retain a fairly accurate idea of
3791 * which entries are oldest.
3792 */
3793 static inline void
DCH_prevent_counter_overflow(void)3794 DCH_prevent_counter_overflow(void)
3795 {
3796 if (DCHCounter >= (INT_MAX - 1))
3797 {
3798 for (int i = 0; i < n_DCHCache; i++)
3799 DCHCache[i]->age >>= 1;
3800 DCHCounter >>= 1;
3801 }
3802 }
3803
3804 /*
3805 * Get mask of date/time/zone components present in format nodes.
3806 *
3807 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3808 */
3809 static int
DCH_datetime_type(FormatNode * node,bool * have_error)3810 DCH_datetime_type(FormatNode *node, bool *have_error)
3811 {
3812 FormatNode *n;
3813 int flags = 0;
3814
3815 for (n = node; n->type != NODE_TYPE_END; n++)
3816 {
3817 if (n->type != NODE_TYPE_ACTION)
3818 continue;
3819
3820 switch (n->key->id)
3821 {
3822 case DCH_FX:
3823 break;
3824 case DCH_A_M:
3825 case DCH_P_M:
3826 case DCH_a_m:
3827 case DCH_p_m:
3828 case DCH_AM:
3829 case DCH_PM:
3830 case DCH_am:
3831 case DCH_pm:
3832 case DCH_HH:
3833 case DCH_HH12:
3834 case DCH_HH24:
3835 case DCH_MI:
3836 case DCH_SS:
3837 case DCH_MS: /* millisecond */
3838 case DCH_US: /* microsecond */
3839 case DCH_FF1:
3840 case DCH_FF2:
3841 case DCH_FF3:
3842 case DCH_FF4:
3843 case DCH_FF5:
3844 case DCH_FF6:
3845 case DCH_SSSS:
3846 flags |= DCH_TIMED;
3847 break;
3848 case DCH_tz:
3849 case DCH_TZ:
3850 case DCH_OF:
3851 RETURN_ERROR(ereport(ERROR,
3852 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3853 errmsg("formatting field \"%s\" is only supported in to_char",
3854 n->key->name))));
3855 flags |= DCH_ZONED;
3856 break;
3857 case DCH_TZH:
3858 case DCH_TZM:
3859 flags |= DCH_ZONED;
3860 break;
3861 case DCH_A_D:
3862 case DCH_B_C:
3863 case DCH_a_d:
3864 case DCH_b_c:
3865 case DCH_AD:
3866 case DCH_BC:
3867 case DCH_ad:
3868 case DCH_bc:
3869 case DCH_MONTH:
3870 case DCH_Month:
3871 case DCH_month:
3872 case DCH_MON:
3873 case DCH_Mon:
3874 case DCH_mon:
3875 case DCH_MM:
3876 case DCH_DAY:
3877 case DCH_Day:
3878 case DCH_day:
3879 case DCH_DY:
3880 case DCH_Dy:
3881 case DCH_dy:
3882 case DCH_DDD:
3883 case DCH_IDDD:
3884 case DCH_DD:
3885 case DCH_D:
3886 case DCH_ID:
3887 case DCH_WW:
3888 case DCH_Q:
3889 case DCH_CC:
3890 case DCH_Y_YYY:
3891 case DCH_YYYY:
3892 case DCH_IYYY:
3893 case DCH_YYY:
3894 case DCH_IYY:
3895 case DCH_YY:
3896 case DCH_IY:
3897 case DCH_Y:
3898 case DCH_I:
3899 case DCH_RM:
3900 case DCH_rm:
3901 case DCH_W:
3902 case DCH_J:
3903 flags |= DCH_DATED;
3904 break;
3905 }
3906 }
3907
3908 on_error:
3909 return flags;
3910 }
3911
3912 /* select a DCHCacheEntry to hold the given format picture */
3913 static DCHCacheEntry *
DCH_cache_getnew(const char * str,bool std)3914 DCH_cache_getnew(const char *str, bool std)
3915 {
3916 DCHCacheEntry *ent;
3917
3918 /* Ensure we can advance DCHCounter below */
3919 DCH_prevent_counter_overflow();
3920
3921 /*
3922 * If cache is full, remove oldest entry (or recycle first not-valid one)
3923 */
3924 if (n_DCHCache >= DCH_CACHE_ENTRIES)
3925 {
3926 DCHCacheEntry *old = DCHCache[0];
3927
3928 #ifdef DEBUG_TO_FROM_CHAR
3929 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3930 #endif
3931 if (old->valid)
3932 {
3933 for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3934 {
3935 ent = DCHCache[i];
3936 if (!ent->valid)
3937 {
3938 old = ent;
3939 break;
3940 }
3941 if (ent->age < old->age)
3942 old = ent;
3943 }
3944 }
3945 #ifdef DEBUG_TO_FROM_CHAR
3946 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3947 #endif
3948 old->valid = false;
3949 strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
3950 old->age = (++DCHCounter);
3951 /* caller is expected to fill format, then set valid */
3952 return old;
3953 }
3954 else
3955 {
3956 #ifdef DEBUG_TO_FROM_CHAR
3957 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3958 #endif
3959 Assert(DCHCache[n_DCHCache] == NULL);
3960 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3961 MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
3962 ent->valid = false;
3963 strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
3964 ent->std = std;
3965 ent->age = (++DCHCounter);
3966 /* caller is expected to fill format, then set valid */
3967 ++n_DCHCache;
3968 return ent;
3969 }
3970 }
3971
3972 /* look for an existing DCHCacheEntry matching the given format picture */
3973 static DCHCacheEntry *
DCH_cache_search(const char * str,bool std)3974 DCH_cache_search(const char *str, bool std)
3975 {
3976 /* Ensure we can advance DCHCounter below */
3977 DCH_prevent_counter_overflow();
3978
3979 for (int i = 0; i < n_DCHCache; i++)
3980 {
3981 DCHCacheEntry *ent = DCHCache[i];
3982
3983 if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3984 {
3985 ent->age = (++DCHCounter);
3986 return ent;
3987 }
3988 }
3989
3990 return NULL;
3991 }
3992
3993 /* Find or create a DCHCacheEntry for the given format picture */
3994 static DCHCacheEntry *
DCH_cache_fetch(const char * str,bool std)3995 DCH_cache_fetch(const char *str, bool std)
3996 {
3997 DCHCacheEntry *ent;
3998
3999 if ((ent = DCH_cache_search(str, std)) == NULL)
4000 {
4001 /*
4002 * Not in the cache, must run parser and save a new format-picture to
4003 * the cache. Do not mark the cache entry valid until parsing
4004 * succeeds.
4005 */
4006 ent = DCH_cache_getnew(str, std);
4007
4008 parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4009 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4010
4011 ent->valid = true;
4012 }
4013 return ent;
4014 }
4015
4016 /*
4017 * Format a date/time or interval into a string according to fmt.
4018 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4019 * for formatting.
4020 */
4021 static text *
datetime_to_char_body(TmToChar * tmtc,text * fmt,bool is_interval,Oid collid)4022 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4023 {
4024 FormatNode *format;
4025 char *fmt_str,
4026 *result;
4027 bool incache;
4028 int fmt_len;
4029 text *res;
4030
4031 /*
4032 * Convert fmt to C string
4033 */
4034 fmt_str = text_to_cstring(fmt);
4035 fmt_len = strlen(fmt_str);
4036
4037 /*
4038 * Allocate workspace for result as C string
4039 */
4040 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4041 *result = '\0';
4042
4043 if (fmt_len > DCH_CACHE_SIZE)
4044 {
4045 /*
4046 * Allocate new memory if format picture is bigger than static cache
4047 * and do not use cache (call parser always)
4048 */
4049 incache = false;
4050
4051 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4052
4053 parse_format(format, fmt_str, DCH_keywords,
4054 DCH_suff, DCH_index, DCH_FLAG, NULL);
4055 }
4056 else
4057 {
4058 /*
4059 * Use cache buffers
4060 */
4061 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4062
4063 incache = true;
4064 format = ent->format;
4065 }
4066
4067 /* The real work is here */
4068 DCH_to_char(format, is_interval, tmtc, result, collid);
4069
4070 if (!incache)
4071 pfree(format);
4072
4073 pfree(fmt_str);
4074
4075 /* convert C-string result to TEXT format */
4076 res = cstring_to_text(result);
4077
4078 pfree(result);
4079 return res;
4080 }
4081
4082 /****************************************************************************
4083 * Public routines
4084 ***************************************************************************/
4085
4086 /* -------------------
4087 * TIMESTAMP to_char()
4088 * -------------------
4089 */
4090 Datum
timestamp_to_char(PG_FUNCTION_ARGS)4091 timestamp_to_char(PG_FUNCTION_ARGS)
4092 {
4093 Timestamp dt = PG_GETARG_TIMESTAMP(0);
4094 text *fmt = PG_GETARG_TEXT_PP(1),
4095 *res;
4096 TmToChar tmtc;
4097 struct pg_tm *tm;
4098 int thisdate;
4099
4100 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4101 PG_RETURN_NULL();
4102
4103 ZERO_tmtc(&tmtc);
4104 tm = tmtcTm(&tmtc);
4105
4106 if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4107 ereport(ERROR,
4108 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4109 errmsg("timestamp out of range")));
4110
4111 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4112 tm->tm_wday = (thisdate + 1) % 7;
4113 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4114
4115 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4116 PG_RETURN_NULL();
4117
4118 PG_RETURN_TEXT_P(res);
4119 }
4120
4121 Datum
timestamptz_to_char(PG_FUNCTION_ARGS)4122 timestamptz_to_char(PG_FUNCTION_ARGS)
4123 {
4124 TimestampTz dt = PG_GETARG_TIMESTAMP(0);
4125 text *fmt = PG_GETARG_TEXT_PP(1),
4126 *res;
4127 TmToChar tmtc;
4128 int tz;
4129 struct pg_tm *tm;
4130 int thisdate;
4131
4132 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4133 PG_RETURN_NULL();
4134
4135 ZERO_tmtc(&tmtc);
4136 tm = tmtcTm(&tmtc);
4137
4138 if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4139 ereport(ERROR,
4140 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4141 errmsg("timestamp out of range")));
4142
4143 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4144 tm->tm_wday = (thisdate + 1) % 7;
4145 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4146
4147 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4148 PG_RETURN_NULL();
4149
4150 PG_RETURN_TEXT_P(res);
4151 }
4152
4153
4154 /* -------------------
4155 * INTERVAL to_char()
4156 * -------------------
4157 */
4158 Datum
interval_to_char(PG_FUNCTION_ARGS)4159 interval_to_char(PG_FUNCTION_ARGS)
4160 {
4161 Interval *it = PG_GETARG_INTERVAL_P(0);
4162 text *fmt = PG_GETARG_TEXT_PP(1),
4163 *res;
4164 TmToChar tmtc;
4165 struct pg_tm *tm;
4166
4167 if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4168 PG_RETURN_NULL();
4169
4170 ZERO_tmtc(&tmtc);
4171 tm = tmtcTm(&tmtc);
4172
4173 if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4174 PG_RETURN_NULL();
4175
4176 /* wday is meaningless, yday approximates the total span in days */
4177 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4178
4179 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4180 PG_RETURN_NULL();
4181
4182 PG_RETURN_TEXT_P(res);
4183 }
4184
4185 /* ---------------------
4186 * TO_TIMESTAMP()
4187 *
4188 * Make Timestamp from date_str which is formatted at argument 'fmt'
4189 * ( to_timestamp is reverse to_char() )
4190 * ---------------------
4191 */
4192 Datum
to_timestamp(PG_FUNCTION_ARGS)4193 to_timestamp(PG_FUNCTION_ARGS)
4194 {
4195 text *date_txt = PG_GETARG_TEXT_PP(0);
4196 text *fmt = PG_GETARG_TEXT_PP(1);
4197 Oid collid = PG_GET_COLLATION();
4198 Timestamp result;
4199 int tz;
4200 struct pg_tm tm;
4201 fsec_t fsec;
4202 int fprec;
4203
4204 do_to_timestamp(date_txt, fmt, collid, false,
4205 &tm, &fsec, &fprec, NULL, NULL);
4206
4207 /* Use the specified time zone, if any. */
4208 if (tm.tm_zone)
4209 {
4210 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4211
4212 if (dterr)
4213 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4214 }
4215 else
4216 tz = DetermineTimeZoneOffset(&tm, session_timezone);
4217
4218 if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4219 ereport(ERROR,
4220 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4221 errmsg("timestamp out of range")));
4222
4223 /* Use the specified fractional precision, if any. */
4224 if (fprec)
4225 AdjustTimestampForTypmod(&result, fprec);
4226
4227 PG_RETURN_TIMESTAMP(result);
4228 }
4229
4230 /* ----------
4231 * TO_DATE
4232 * Make Date from date_str which is formatted at argument 'fmt'
4233 * ----------
4234 */
4235 Datum
to_date(PG_FUNCTION_ARGS)4236 to_date(PG_FUNCTION_ARGS)
4237 {
4238 text *date_txt = PG_GETARG_TEXT_PP(0);
4239 text *fmt = PG_GETARG_TEXT_PP(1);
4240 Oid collid = PG_GET_COLLATION();
4241 DateADT result;
4242 struct pg_tm tm;
4243 fsec_t fsec;
4244
4245 do_to_timestamp(date_txt, fmt, collid, false,
4246 &tm, &fsec, NULL, NULL, NULL);
4247
4248 /* Prevent overflow in Julian-day routines */
4249 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4250 ereport(ERROR,
4251 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4252 errmsg("date out of range: \"%s\"",
4253 text_to_cstring(date_txt))));
4254
4255 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4256
4257 /* Now check for just-out-of-range dates */
4258 if (!IS_VALID_DATE(result))
4259 ereport(ERROR,
4260 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4261 errmsg("date out of range: \"%s\"",
4262 text_to_cstring(date_txt))));
4263
4264 PG_RETURN_DATEADT(result);
4265 }
4266
4267 /*
4268 * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4269 * as a format string. The collation 'collid' may be used for case-folding
4270 * rules in some cases. 'strict' specifies standard parsing mode.
4271 *
4272 * The actual data type (returned in 'typid', 'typmod') is determined by
4273 * the presence of date/time/zone components in the format string.
4274 *
4275 * When timezone component is present, the corresponding offset is
4276 * returned in '*tz'.
4277 *
4278 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4279 * and zero value is returned.
4280 */
4281 Datum
parse_datetime(text * date_txt,text * fmt,Oid collid,bool strict,Oid * typid,int32 * typmod,int * tz,bool * have_error)4282 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4283 Oid *typid, int32 *typmod, int *tz,
4284 bool *have_error)
4285 {
4286 struct pg_tm tm;
4287 fsec_t fsec;
4288 int fprec;
4289 uint32 flags;
4290
4291 do_to_timestamp(date_txt, fmt, collid, strict,
4292 &tm, &fsec, &fprec, &flags, have_error);
4293 CHECK_ERROR;
4294
4295 *typmod = fprec ? fprec : -1; /* fractional part precision */
4296
4297 if (flags & DCH_DATED)
4298 {
4299 if (flags & DCH_TIMED)
4300 {
4301 if (flags & DCH_ZONED)
4302 {
4303 TimestampTz result;
4304
4305 if (tm.tm_zone)
4306 {
4307 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4308
4309 if (dterr)
4310 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4311 }
4312 else
4313 {
4314 /*
4315 * Time zone is present in format string, but not in input
4316 * string. Assuming do_to_timestamp() triggers no error
4317 * this should be possible only in non-strict case.
4318 */
4319 Assert(!strict);
4320
4321 RETURN_ERROR(ereport(ERROR,
4322 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4323 errmsg("missing time zone in input string for type timestamptz"))));
4324 }
4325
4326 if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4327 RETURN_ERROR(ereport(ERROR,
4328 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4329 errmsg("timestamptz out of range"))));
4330
4331 AdjustTimestampForTypmod(&result, *typmod);
4332
4333 *typid = TIMESTAMPTZOID;
4334 return TimestampTzGetDatum(result);
4335 }
4336 else
4337 {
4338 Timestamp result;
4339
4340 if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4341 RETURN_ERROR(ereport(ERROR,
4342 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4343 errmsg("timestamp out of range"))));
4344
4345 AdjustTimestampForTypmod(&result, *typmod);
4346
4347 *typid = TIMESTAMPOID;
4348 return TimestampGetDatum(result);
4349 }
4350 }
4351 else
4352 {
4353 if (flags & DCH_ZONED)
4354 {
4355 RETURN_ERROR(ereport(ERROR,
4356 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4357 errmsg("datetime format is zoned but not timed"))));
4358 }
4359 else
4360 {
4361 DateADT result;
4362
4363 /* Prevent overflow in Julian-day routines */
4364 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4365 RETURN_ERROR(ereport(ERROR,
4366 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4367 errmsg("date out of range: \"%s\"",
4368 text_to_cstring(date_txt)))));
4369
4370 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4371 POSTGRES_EPOCH_JDATE;
4372
4373 /* Now check for just-out-of-range dates */
4374 if (!IS_VALID_DATE(result))
4375 RETURN_ERROR(ereport(ERROR,
4376 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4377 errmsg("date out of range: \"%s\"",
4378 text_to_cstring(date_txt)))));
4379
4380 *typid = DATEOID;
4381 return DateADTGetDatum(result);
4382 }
4383 }
4384 }
4385 else if (flags & DCH_TIMED)
4386 {
4387 if (flags & DCH_ZONED)
4388 {
4389 TimeTzADT *result = palloc(sizeof(TimeTzADT));
4390
4391 if (tm.tm_zone)
4392 {
4393 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4394
4395 if (dterr)
4396 RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4397 }
4398 else
4399 {
4400 /*
4401 * Time zone is present in format string, but not in input
4402 * string. Assuming do_to_timestamp() triggers no error this
4403 * should be possible only in non-strict case.
4404 */
4405 Assert(!strict);
4406
4407 RETURN_ERROR(ereport(ERROR,
4408 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4409 errmsg("missing time zone in input string for type timetz"))));
4410 }
4411
4412 if (tm2timetz(&tm, fsec, *tz, result) != 0)
4413 RETURN_ERROR(ereport(ERROR,
4414 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4415 errmsg("timetz out of range"))));
4416
4417 AdjustTimeForTypmod(&result->time, *typmod);
4418
4419 *typid = TIMETZOID;
4420 return TimeTzADTPGetDatum(result);
4421 }
4422 else
4423 {
4424 TimeADT result;
4425
4426 if (tm2time(&tm, fsec, &result) != 0)
4427 RETURN_ERROR(ereport(ERROR,
4428 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4429 errmsg("time out of range"))));
4430
4431 AdjustTimeForTypmod(&result, *typmod);
4432
4433 *typid = TIMEOID;
4434 return TimeADTGetDatum(result);
4435 }
4436 }
4437 else
4438 {
4439 RETURN_ERROR(ereport(ERROR,
4440 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4441 errmsg("datetime format is not dated and not timed"))));
4442 }
4443
4444 on_error:
4445 return (Datum) 0;
4446 }
4447
4448 /*
4449 * do_to_timestamp: shared code for to_timestamp and to_date
4450 *
4451 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4452 * fractional seconds, and fractional precision.
4453 *
4454 * 'collid' identifies the collation to use, if needed.
4455 * 'std' specifies standard parsing mode.
4456 * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4457 * if that is not NULL.
4458 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4459 *
4460 * We parse 'fmt' into a list of FormatNodes, which is then passed to
4461 * DCH_from_char to populate a TmFromChar with the parsed contents of
4462 * 'date_txt'.
4463 *
4464 * The TmFromChar is then analysed and converted into the final results in
4465 * struct 'tm', 'fsec', and 'fprec'.
4466 */
4467 static void
do_to_timestamp(text * date_txt,text * fmt,Oid collid,bool std,struct pg_tm * tm,fsec_t * fsec,int * fprec,uint32 * flags,bool * have_error)4468 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4469 struct pg_tm *tm, fsec_t *fsec, int *fprec,
4470 uint32 *flags, bool *have_error)
4471 {
4472 FormatNode *format = NULL;
4473 TmFromChar tmfc;
4474 int fmt_len;
4475 char *date_str;
4476 int fmask;
4477 bool incache = false;
4478
4479 Assert(tm != NULL);
4480 Assert(fsec != NULL);
4481
4482 date_str = text_to_cstring(date_txt);
4483
4484 ZERO_tmfc(&tmfc);
4485 ZERO_tm(tm);
4486 *fsec = 0;
4487 if (fprec)
4488 *fprec = 0;
4489 if (flags)
4490 *flags = 0;
4491 fmask = 0; /* bit mask for ValidateDate() */
4492
4493 fmt_len = VARSIZE_ANY_EXHDR(fmt);
4494
4495 if (fmt_len)
4496 {
4497 char *fmt_str;
4498
4499 fmt_str = text_to_cstring(fmt);
4500
4501 if (fmt_len > DCH_CACHE_SIZE)
4502 {
4503 /*
4504 * Allocate new memory if format picture is bigger than static
4505 * cache and do not use cache (call parser always)
4506 */
4507 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4508
4509 parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4510 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4511 }
4512 else
4513 {
4514 /*
4515 * Use cache buffers
4516 */
4517 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4518
4519 incache = true;
4520 format = ent->format;
4521 }
4522
4523 #ifdef DEBUG_TO_FROM_CHAR
4524 /* dump_node(format, fmt_len); */
4525 /* dump_index(DCH_keywords, DCH_index); */
4526 #endif
4527
4528 DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4529 CHECK_ERROR;
4530
4531 pfree(fmt_str);
4532
4533 if (flags)
4534 *flags = DCH_datetime_type(format, have_error);
4535
4536 if (!incache)
4537 {
4538 pfree(format);
4539 format = NULL;
4540 }
4541
4542 CHECK_ERROR;
4543 }
4544
4545 DEBUG_TMFC(&tmfc);
4546
4547 /*
4548 * Convert to_date/to_timestamp input fields to standard 'tm'
4549 */
4550 if (tmfc.ssss)
4551 {
4552 int x = tmfc.ssss;
4553
4554 tm->tm_hour = x / SECS_PER_HOUR;
4555 x %= SECS_PER_HOUR;
4556 tm->tm_min = x / SECS_PER_MINUTE;
4557 x %= SECS_PER_MINUTE;
4558 tm->tm_sec = x;
4559 }
4560
4561 if (tmfc.ss)
4562 tm->tm_sec = tmfc.ss;
4563 if (tmfc.mi)
4564 tm->tm_min = tmfc.mi;
4565 if (tmfc.hh)
4566 tm->tm_hour = tmfc.hh;
4567
4568 if (tmfc.clock == CLOCK_12_HOUR)
4569 {
4570 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4571 {
4572 RETURN_ERROR(ereport(ERROR,
4573 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4574 errmsg("hour \"%d\" is invalid for the 12-hour clock",
4575 tm->tm_hour),
4576 errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4577 }
4578
4579 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4580 tm->tm_hour += HOURS_PER_DAY / 2;
4581 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4582 tm->tm_hour = 0;
4583 }
4584
4585 if (tmfc.year)
4586 {
4587 /*
4588 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4589 * the year in the given century. Keep in mind that the 21st century
4590 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4591 * 600BC to 501BC.
4592 */
4593 if (tmfc.cc && tmfc.yysz <= 2)
4594 {
4595 if (tmfc.bc)
4596 tmfc.cc = -tmfc.cc;
4597 tm->tm_year = tmfc.year % 100;
4598 if (tm->tm_year)
4599 {
4600 if (tmfc.cc >= 0)
4601 tm->tm_year += (tmfc.cc - 1) * 100;
4602 else
4603 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4604 }
4605 else
4606 {
4607 /* find century year for dates ending in "00" */
4608 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4609 }
4610 }
4611 else
4612 {
4613 /* If a 4-digit year is provided, we use that and ignore CC. */
4614 tm->tm_year = tmfc.year;
4615 if (tmfc.bc)
4616 tm->tm_year = -tm->tm_year;
4617 /* correct for our representation of BC years */
4618 if (tm->tm_year < 0)
4619 tm->tm_year++;
4620 }
4621 fmask |= DTK_M(YEAR);
4622 }
4623 else if (tmfc.cc)
4624 {
4625 /* use first year of century */
4626 if (tmfc.bc)
4627 tmfc.cc = -tmfc.cc;
4628 if (tmfc.cc >= 0)
4629 /* +1 because 21st century started in 2001 */
4630 tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4631 else
4632 /* +1 because year == 599 is 600 BC */
4633 tm->tm_year = tmfc.cc * 100 + 1;
4634 fmask |= DTK_M(YEAR);
4635 }
4636
4637 if (tmfc.j)
4638 {
4639 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4640 fmask |= DTK_DATE_M;
4641 }
4642
4643 if (tmfc.ww)
4644 {
4645 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4646 {
4647 /*
4648 * If tmfc.d is not set, then the date is left at the beginning of
4649 * the ISO week (Monday).
4650 */
4651 if (tmfc.d)
4652 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4653 else
4654 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4655 fmask |= DTK_DATE_M;
4656 }
4657 else
4658 tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4659 }
4660
4661 if (tmfc.w)
4662 tmfc.dd = (tmfc.w - 1) * 7 + 1;
4663 if (tmfc.dd)
4664 {
4665 tm->tm_mday = tmfc.dd;
4666 fmask |= DTK_M(DAY);
4667 }
4668 if (tmfc.mm)
4669 {
4670 tm->tm_mon = tmfc.mm;
4671 fmask |= DTK_M(MONTH);
4672 }
4673
4674 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4675 {
4676 /*
4677 * The month and day field have not been set, so we use the
4678 * day-of-year field to populate them. Depending on the date mode,
4679 * this field may be interpreted as a Gregorian day-of-year, or an ISO
4680 * week date day-of-year.
4681 */
4682
4683 if (!tm->tm_year && !tmfc.bc)
4684 {
4685 RETURN_ERROR(ereport(ERROR,
4686 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4687 errmsg("cannot calculate day of year without year information"))));
4688 }
4689
4690 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4691 {
4692 int j0; /* zeroth day of the ISO year, in Julian */
4693
4694 j0 = isoweek2j(tm->tm_year, 1) - 1;
4695
4696 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4697 fmask |= DTK_DATE_M;
4698 }
4699 else
4700 {
4701 const int *y;
4702 int i;
4703
4704 static const int ysum[2][13] = {
4705 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4706 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4707
4708 y = ysum[isleap(tm->tm_year)];
4709
4710 for (i = 1; i <= MONTHS_PER_YEAR; i++)
4711 {
4712 if (tmfc.ddd <= y[i])
4713 break;
4714 }
4715 if (tm->tm_mon <= 1)
4716 tm->tm_mon = i;
4717
4718 if (tm->tm_mday <= 1)
4719 tm->tm_mday = tmfc.ddd - y[i - 1];
4720
4721 fmask |= DTK_M(MONTH) | DTK_M(DAY);
4722 }
4723 }
4724
4725 if (tmfc.ms)
4726 *fsec += tmfc.ms * 1000;
4727 if (tmfc.us)
4728 *fsec += tmfc.us;
4729 if (fprec)
4730 *fprec = tmfc.ff; /* fractional precision, if specified */
4731
4732 /* Range-check date fields according to bit mask computed above */
4733 if (fmask != 0)
4734 {
4735 /* We already dealt with AD/BC, so pass isjulian = true */
4736 int dterr = ValidateDate(fmask, true, false, false, tm);
4737
4738 if (dterr != 0)
4739 {
4740 /*
4741 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4742 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4743 * irrelevant hint about datestyle.
4744 */
4745 RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4746 }
4747 }
4748
4749 /* Range-check time fields too */
4750 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4751 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4752 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4753 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4754 {
4755 RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4756 }
4757
4758 /* Save parsed time-zone into tm->tm_zone if it was specified */
4759 if (tmfc.tzsign)
4760 {
4761 char *tz;
4762
4763 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4764 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4765 {
4766 RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4767 }
4768
4769 tz = psprintf("%c%02d:%02d",
4770 tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4771
4772 tm->tm_zone = tz;
4773 }
4774
4775 DEBUG_TM(tm);
4776
4777 on_error:
4778
4779 if (format && !incache)
4780 pfree(format);
4781
4782 pfree(date_str);
4783 }
4784
4785
4786 /**********************************************************************
4787 * the NUMBER version part
4788 *********************************************************************/
4789
4790
4791 static char *
fill_str(char * str,int c,int max)4792 fill_str(char *str, int c, int max)
4793 {
4794 memset(str, c, max);
4795 *(str + max) = '\0';
4796 return str;
4797 }
4798
4799 #define zeroize_NUM(_n) \
4800 do { \
4801 (_n)->flag = 0; \
4802 (_n)->lsign = 0; \
4803 (_n)->pre = 0; \
4804 (_n)->post = 0; \
4805 (_n)->pre_lsign_num = 0; \
4806 (_n)->need_locale = 0; \
4807 (_n)->multi = 0; \
4808 (_n)->zero_start = 0; \
4809 (_n)->zero_end = 0; \
4810 } while(0)
4811
4812 /* This works the same as DCH_prevent_counter_overflow */
4813 static inline void
NUM_prevent_counter_overflow(void)4814 NUM_prevent_counter_overflow(void)
4815 {
4816 if (NUMCounter >= (INT_MAX - 1))
4817 {
4818 for (int i = 0; i < n_NUMCache; i++)
4819 NUMCache[i]->age >>= 1;
4820 NUMCounter >>= 1;
4821 }
4822 }
4823
4824 /* select a NUMCacheEntry to hold the given format picture */
4825 static NUMCacheEntry *
NUM_cache_getnew(const char * str)4826 NUM_cache_getnew(const char *str)
4827 {
4828 NUMCacheEntry *ent;
4829
4830 /* Ensure we can advance NUMCounter below */
4831 NUM_prevent_counter_overflow();
4832
4833 /*
4834 * If cache is full, remove oldest entry (or recycle first not-valid one)
4835 */
4836 if (n_NUMCache >= NUM_CACHE_ENTRIES)
4837 {
4838 NUMCacheEntry *old = NUMCache[0];
4839
4840 #ifdef DEBUG_TO_FROM_CHAR
4841 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4842 #endif
4843 if (old->valid)
4844 {
4845 for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4846 {
4847 ent = NUMCache[i];
4848 if (!ent->valid)
4849 {
4850 old = ent;
4851 break;
4852 }
4853 if (ent->age < old->age)
4854 old = ent;
4855 }
4856 }
4857 #ifdef DEBUG_TO_FROM_CHAR
4858 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4859 #endif
4860 old->valid = false;
4861 strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
4862 old->age = (++NUMCounter);
4863 /* caller is expected to fill format and Num, then set valid */
4864 return old;
4865 }
4866 else
4867 {
4868 #ifdef DEBUG_TO_FROM_CHAR
4869 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4870 #endif
4871 Assert(NUMCache[n_NUMCache] == NULL);
4872 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4873 MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
4874 ent->valid = false;
4875 strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
4876 ent->age = (++NUMCounter);
4877 /* caller is expected to fill format and Num, then set valid */
4878 ++n_NUMCache;
4879 return ent;
4880 }
4881 }
4882
4883 /* look for an existing NUMCacheEntry matching the given format picture */
4884 static NUMCacheEntry *
NUM_cache_search(const char * str)4885 NUM_cache_search(const char *str)
4886 {
4887 /* Ensure we can advance NUMCounter below */
4888 NUM_prevent_counter_overflow();
4889
4890 for (int i = 0; i < n_NUMCache; i++)
4891 {
4892 NUMCacheEntry *ent = NUMCache[i];
4893
4894 if (ent->valid && strcmp(ent->str, str) == 0)
4895 {
4896 ent->age = (++NUMCounter);
4897 return ent;
4898 }
4899 }
4900
4901 return NULL;
4902 }
4903
4904 /* Find or create a NUMCacheEntry for the given format picture */
4905 static NUMCacheEntry *
NUM_cache_fetch(const char * str)4906 NUM_cache_fetch(const char *str)
4907 {
4908 NUMCacheEntry *ent;
4909
4910 if ((ent = NUM_cache_search(str)) == NULL)
4911 {
4912 /*
4913 * Not in the cache, must run parser and save a new format-picture to
4914 * the cache. Do not mark the cache entry valid until parsing
4915 * succeeds.
4916 */
4917 ent = NUM_cache_getnew(str);
4918
4919 zeroize_NUM(&ent->Num);
4920
4921 parse_format(ent->format, str, NUM_keywords,
4922 NULL, NUM_index, NUM_FLAG, &ent->Num);
4923
4924 ent->valid = true;
4925 }
4926 return ent;
4927 }
4928
4929 /* ----------
4930 * Cache routine for NUM to_char version
4931 * ----------
4932 */
4933 static FormatNode *
NUM_cache(int len,NUMDesc * Num,text * pars_str,bool * shouldFree)4934 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4935 {
4936 FormatNode *format = NULL;
4937 char *str;
4938
4939 str = text_to_cstring(pars_str);
4940
4941 if (len > NUM_CACHE_SIZE)
4942 {
4943 /*
4944 * Allocate new memory if format picture is bigger than static cache
4945 * and do not use cache (call parser always)
4946 */
4947 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4948
4949 *shouldFree = true;
4950
4951 zeroize_NUM(Num);
4952
4953 parse_format(format, str, NUM_keywords,
4954 NULL, NUM_index, NUM_FLAG, Num);
4955 }
4956 else
4957 {
4958 /*
4959 * Use cache buffers
4960 */
4961 NUMCacheEntry *ent = NUM_cache_fetch(str);
4962
4963 *shouldFree = false;
4964
4965 format = ent->format;
4966
4967 /*
4968 * Copy cache to used struct
4969 */
4970 Num->flag = ent->Num.flag;
4971 Num->lsign = ent->Num.lsign;
4972 Num->pre = ent->Num.pre;
4973 Num->post = ent->Num.post;
4974 Num->pre_lsign_num = ent->Num.pre_lsign_num;
4975 Num->need_locale = ent->Num.need_locale;
4976 Num->multi = ent->Num.multi;
4977 Num->zero_start = ent->Num.zero_start;
4978 Num->zero_end = ent->Num.zero_end;
4979 }
4980
4981 #ifdef DEBUG_TO_FROM_CHAR
4982 /* dump_node(format, len); */
4983 dump_index(NUM_keywords, NUM_index);
4984 #endif
4985
4986 pfree(str);
4987 return format;
4988 }
4989
4990
4991 static char *
int_to_roman(int number)4992 int_to_roman(int number)
4993 {
4994 int len,
4995 num;
4996 char *p,
4997 *result,
4998 numstr[12];
4999
5000 result = (char *) palloc(16);
5001 *result = '\0';
5002
5003 if (number > 3999 || number < 1)
5004 {
5005 fill_str(result, '#', 15);
5006 return result;
5007 }
5008 len = snprintf(numstr, sizeof(numstr), "%d", number);
5009
5010 for (p = numstr; *p != '\0'; p++, --len)
5011 {
5012 num = *p - ('0' + 1);
5013 if (num < 0)
5014 continue;
5015
5016 if (len > 3)
5017 {
5018 while (num-- != -1)
5019 strcat(result, "M");
5020 }
5021 else
5022 {
5023 if (len == 3)
5024 strcat(result, rm100[num]);
5025 else if (len == 2)
5026 strcat(result, rm10[num]);
5027 else if (len == 1)
5028 strcat(result, rm1[num]);
5029 }
5030 }
5031 return result;
5032 }
5033
5034
5035
5036 /* ----------
5037 * Locale
5038 * ----------
5039 */
5040 static void
NUM_prepare_locale(NUMProc * Np)5041 NUM_prepare_locale(NUMProc *Np)
5042 {
5043 if (Np->Num->need_locale)
5044 {
5045 struct lconv *lconv;
5046
5047 /*
5048 * Get locales
5049 */
5050 lconv = PGLC_localeconv();
5051
5052 /*
5053 * Positive / Negative number sign
5054 */
5055 if (lconv->negative_sign && *lconv->negative_sign)
5056 Np->L_negative_sign = lconv->negative_sign;
5057 else
5058 Np->L_negative_sign = "-";
5059
5060 if (lconv->positive_sign && *lconv->positive_sign)
5061 Np->L_positive_sign = lconv->positive_sign;
5062 else
5063 Np->L_positive_sign = "+";
5064
5065 /*
5066 * Number decimal point
5067 */
5068 if (lconv->decimal_point && *lconv->decimal_point)
5069 Np->decimal = lconv->decimal_point;
5070
5071 else
5072 Np->decimal = ".";
5073
5074 if (!IS_LDECIMAL(Np->Num))
5075 Np->decimal = ".";
5076
5077 /*
5078 * Number thousands separator
5079 *
5080 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5081 * but "" for thousands_sep, so we set the thousands_sep too.
5082 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5083 */
5084 if (lconv->thousands_sep && *lconv->thousands_sep)
5085 Np->L_thousands_sep = lconv->thousands_sep;
5086 /* Make sure thousands separator doesn't match decimal point symbol. */
5087 else if (strcmp(Np->decimal, ",") != 0)
5088 Np->L_thousands_sep = ",";
5089 else
5090 Np->L_thousands_sep = ".";
5091
5092 /*
5093 * Currency symbol
5094 */
5095 if (lconv->currency_symbol && *lconv->currency_symbol)
5096 Np->L_currency_symbol = lconv->currency_symbol;
5097 else
5098 Np->L_currency_symbol = " ";
5099 }
5100 else
5101 {
5102 /*
5103 * Default values
5104 */
5105 Np->L_negative_sign = "-";
5106 Np->L_positive_sign = "+";
5107 Np->decimal = ".";
5108
5109 Np->L_thousands_sep = ",";
5110 Np->L_currency_symbol = " ";
5111 }
5112 }
5113
5114 /* ----------
5115 * Return pointer of last relevant number after decimal point
5116 * 12.0500 --> last relevant is '5'
5117 * 12.0000 --> last relevant is '.'
5118 * If there is no decimal point, return NULL (which will result in same
5119 * behavior as if FM hadn't been specified).
5120 * ----------
5121 */
5122 static char *
get_last_relevant_decnum(char * num)5123 get_last_relevant_decnum(char *num)
5124 {
5125 char *result,
5126 *p = strchr(num, '.');
5127
5128 #ifdef DEBUG_TO_FROM_CHAR
5129 elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5130 #endif
5131
5132 if (!p)
5133 return NULL;
5134
5135 result = p;
5136
5137 while (*(++p))
5138 {
5139 if (*p != '0')
5140 result = p;
5141 }
5142
5143 return result;
5144 }
5145
5146 /*
5147 * These macros are used in NUM_processor() and its subsidiary routines.
5148 * OVERLOAD_TEST: true if we've reached end of input string
5149 * AMOUNT_TEST(s): true if at least s bytes remain in string
5150 */
5151 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
5152 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
5153
5154 /* ----------
5155 * Number extraction for TO_NUMBER()
5156 * ----------
5157 */
5158 static void
NUM_numpart_from_char(NUMProc * Np,int id,int input_len)5159 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5160 {
5161 bool isread = false;
5162
5163 #ifdef DEBUG_TO_FROM_CHAR
5164 elog(DEBUG_elog_output, " --- scan start --- id=%s",
5165 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5166 #endif
5167
5168 if (OVERLOAD_TEST)
5169 return;
5170
5171 if (*Np->inout_p == ' ')
5172 Np->inout_p++;
5173
5174 if (OVERLOAD_TEST)
5175 return;
5176
5177 /*
5178 * read sign before number
5179 */
5180 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5181 (Np->read_pre + Np->read_post) == 0)
5182 {
5183 #ifdef DEBUG_TO_FROM_CHAR
5184 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5185 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5186 #endif
5187
5188 /*
5189 * locale sign
5190 */
5191 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5192 {
5193 int x = 0;
5194
5195 #ifdef DEBUG_TO_FROM_CHAR
5196 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5197 #endif
5198 if ((x = strlen(Np->L_negative_sign)) &&
5199 AMOUNT_TEST(x) &&
5200 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5201 {
5202 Np->inout_p += x;
5203 *Np->number = '-';
5204 }
5205 else if ((x = strlen(Np->L_positive_sign)) &&
5206 AMOUNT_TEST(x) &&
5207 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5208 {
5209 Np->inout_p += x;
5210 *Np->number = '+';
5211 }
5212 }
5213 else
5214 {
5215 #ifdef DEBUG_TO_FROM_CHAR
5216 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5217 #endif
5218
5219 /*
5220 * simple + - < >
5221 */
5222 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5223 *Np->inout_p == '<'))
5224 {
5225 *Np->number = '-'; /* set - */
5226 Np->inout_p++;
5227 }
5228 else if (*Np->inout_p == '+')
5229 {
5230 *Np->number = '+'; /* set + */
5231 Np->inout_p++;
5232 }
5233 }
5234 }
5235
5236 if (OVERLOAD_TEST)
5237 return;
5238
5239 #ifdef DEBUG_TO_FROM_CHAR
5240 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5241 #endif
5242
5243 /*
5244 * read digit or decimal point
5245 */
5246 if (isdigit((unsigned char) *Np->inout_p))
5247 {
5248 if (Np->read_dec && Np->read_post == Np->Num->post)
5249 return;
5250
5251 *Np->number_p = *Np->inout_p;
5252 Np->number_p++;
5253
5254 if (Np->read_dec)
5255 Np->read_post++;
5256 else
5257 Np->read_pre++;
5258
5259 isread = true;
5260
5261 #ifdef DEBUG_TO_FROM_CHAR
5262 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5263 #endif
5264 }
5265 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5266 {
5267 /*
5268 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5269 * Np->decimal is always just "." if we don't have a D format token.
5270 * So we just unconditionally match to Np->decimal.
5271 */
5272 int x = strlen(Np->decimal);
5273
5274 #ifdef DEBUG_TO_FROM_CHAR
5275 elog(DEBUG_elog_output, "Try read decimal point (%c)",
5276 *Np->inout_p);
5277 #endif
5278 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5279 {
5280 Np->inout_p += x - 1;
5281 *Np->number_p = '.';
5282 Np->number_p++;
5283 Np->read_dec = true;
5284 isread = true;
5285 }
5286 }
5287
5288 if (OVERLOAD_TEST)
5289 return;
5290
5291 /*
5292 * Read sign behind "last" number
5293 *
5294 * We need sign detection because determine exact position of post-sign is
5295 * difficult:
5296 *
5297 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
5298 * 5.01-
5299 */
5300 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5301 {
5302 /*
5303 * locale sign (NUM_S) is always anchored behind a last number, if: -
5304 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5305 * next char is not digit
5306 */
5307 if (IS_LSIGN(Np->Num) && isread &&
5308 (Np->inout_p + 1) < Np->inout + input_len &&
5309 !isdigit((unsigned char) *(Np->inout_p + 1)))
5310 {
5311 int x;
5312 char *tmp = Np->inout_p++;
5313
5314 #ifdef DEBUG_TO_FROM_CHAR
5315 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5316 #endif
5317 if ((x = strlen(Np->L_negative_sign)) &&
5318 AMOUNT_TEST(x) &&
5319 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5320 {
5321 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5322 *Np->number = '-';
5323 }
5324 else if ((x = strlen(Np->L_positive_sign)) &&
5325 AMOUNT_TEST(x) &&
5326 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5327 {
5328 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5329 *Np->number = '+';
5330 }
5331 if (*Np->number == ' ')
5332 /* no sign read */
5333 Np->inout_p = tmp;
5334 }
5335
5336 /*
5337 * try read non-locale sign, it's happen only if format is not exact
5338 * and we cannot determine sign position of MI/PL/SG, an example:
5339 *
5340 * FM9.999999MI -> 5.01-
5341 *
5342 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5343 * like to_number('1 -', '9S') where sign is not anchored to last
5344 * number.
5345 */
5346 else if (isread == false && IS_LSIGN(Np->Num) == false &&
5347 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5348 {
5349 #ifdef DEBUG_TO_FROM_CHAR
5350 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5351 #endif
5352
5353 /*
5354 * simple + -
5355 */
5356 if (*Np->inout_p == '-' || *Np->inout_p == '+')
5357 /* NUM_processor() do inout_p++ */
5358 *Np->number = *Np->inout_p;
5359 }
5360 }
5361 }
5362
5363 #define IS_PREDEC_SPACE(_n) \
5364 (IS_ZERO((_n)->Num)==false && \
5365 (_n)->number == (_n)->number_p && \
5366 *(_n)->number == '0' && \
5367 (_n)->Num->post != 0)
5368
5369 /* ----------
5370 * Add digit or sign to number-string
5371 * ----------
5372 */
5373 static void
NUM_numpart_to_char(NUMProc * Np,int id)5374 NUM_numpart_to_char(NUMProc *Np, int id)
5375 {
5376 int end;
5377
5378 if (IS_ROMAN(Np->Num))
5379 return;
5380
5381 /* Note: in this elog() output not set '\0' in 'inout' */
5382
5383 #ifdef DEBUG_TO_FROM_CHAR
5384
5385 /*
5386 * Np->num_curr is number of current item in format-picture, it is not
5387 * current position in inout!
5388 */
5389 elog(DEBUG_elog_output,
5390 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5391 Np->sign_wrote,
5392 Np->num_curr,
5393 Np->number_p,
5394 Np->inout);
5395 #endif
5396 Np->num_in = false;
5397
5398 /*
5399 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5400 * handle "9.9" --> " .1"
5401 */
5402 if (Np->sign_wrote == false &&
5403 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5404 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5405 {
5406 if (IS_LSIGN(Np->Num))
5407 {
5408 if (Np->Num->lsign == NUM_LSIGN_PRE)
5409 {
5410 if (Np->sign == '-')
5411 strcpy(Np->inout_p, Np->L_negative_sign);
5412 else
5413 strcpy(Np->inout_p, Np->L_positive_sign);
5414 Np->inout_p += strlen(Np->inout_p);
5415 Np->sign_wrote = true;
5416 }
5417 }
5418 else if (IS_BRACKET(Np->Num))
5419 {
5420 *Np->inout_p = Np->sign == '+' ? ' ' : '<';
5421 ++Np->inout_p;
5422 Np->sign_wrote = true;
5423 }
5424 else if (Np->sign == '+')
5425 {
5426 if (!IS_FILLMODE(Np->Num))
5427 {
5428 *Np->inout_p = ' '; /* Write + */
5429 ++Np->inout_p;
5430 }
5431 Np->sign_wrote = true;
5432 }
5433 else if (Np->sign == '-')
5434 { /* Write - */
5435 *Np->inout_p = '-';
5436 ++Np->inout_p;
5437 Np->sign_wrote = true;
5438 }
5439 }
5440
5441
5442 /*
5443 * digits / FM / Zero / Dec. point
5444 */
5445 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
5446 {
5447 if (Np->num_curr < Np->out_pre_spaces &&
5448 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
5449 {
5450 /*
5451 * Write blank space
5452 */
5453 if (!IS_FILLMODE(Np->Num))
5454 {
5455 *Np->inout_p = ' '; /* Write ' ' */
5456 ++Np->inout_p;
5457 }
5458 }
5459 else if (IS_ZERO(Np->Num) &&
5460 Np->num_curr < Np->out_pre_spaces &&
5461 Np->Num->zero_start <= Np->num_curr)
5462 {
5463 /*
5464 * Write ZERO
5465 */
5466 *Np->inout_p = '0'; /* Write '0' */
5467 ++Np->inout_p;
5468 Np->num_in = true;
5469 }
5470 else
5471 {
5472 /*
5473 * Write Decimal point
5474 */
5475 if (*Np->number_p == '.')
5476 {
5477 if (!Np->last_relevant || *Np->last_relevant != '.')
5478 {
5479 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
5480 Np->inout_p += strlen(Np->inout_p);
5481 }
5482
5483 /*
5484 * Ora 'n' -- FM9.9 --> 'n.'
5485 */
5486 else if (IS_FILLMODE(Np->Num) &&
5487 Np->last_relevant && *Np->last_relevant == '.')
5488 {
5489 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
5490 Np->inout_p += strlen(Np->inout_p);
5491 }
5492 }
5493 else
5494 {
5495 /*
5496 * Write Digits
5497 */
5498 if (Np->last_relevant && Np->number_p > Np->last_relevant &&
5499 id != NUM_0)
5500 ;
5501
5502 /*
5503 * '0.1' -- 9.9 --> ' .1'
5504 */
5505 else if (IS_PREDEC_SPACE(Np))
5506 {
5507 if (!IS_FILLMODE(Np->Num))
5508 {
5509 *Np->inout_p = ' ';
5510 ++Np->inout_p;
5511 }
5512
5513 /*
5514 * '0' -- FM9.9 --> '0.'
5515 */
5516 else if (Np->last_relevant && *Np->last_relevant == '.')
5517 {
5518 *Np->inout_p = '0';
5519 ++Np->inout_p;
5520 }
5521 }
5522 else
5523 {
5524 *Np->inout_p = *Np->number_p; /* Write DIGIT */
5525 ++Np->inout_p;
5526 Np->num_in = true;
5527 }
5528 }
5529 /* do no exceed string length */
5530 if (*Np->number_p)
5531 ++Np->number_p;
5532 }
5533
5534 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
5535
5536 if (Np->last_relevant && Np->last_relevant == Np->number_p)
5537 end = Np->num_curr;
5538
5539 if (Np->num_curr + 1 == end)
5540 {
5541 if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
5542 {
5543 *Np->inout_p = Np->sign == '+' ? ' ' : '>';
5544 ++Np->inout_p;
5545 }
5546 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
5547 {
5548 if (Np->sign == '-')
5549 strcpy(Np->inout_p, Np->L_negative_sign);
5550 else
5551 strcpy(Np->inout_p, Np->L_positive_sign);
5552 Np->inout_p += strlen(Np->inout_p);
5553 }
5554 }
5555 }
5556
5557 ++Np->num_curr;
5558 }
5559
5560 /*
5561 * Skip over "n" input characters, but only if they aren't numeric data
5562 */
5563 static void
NUM_eat_non_data_chars(NUMProc * Np,int n,int input_len)5564 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
5565 {
5566 while (n-- > 0)
5567 {
5568 if (OVERLOAD_TEST)
5569 break; /* end of input */
5570 if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
5571 break; /* it's a data character */
5572 Np->inout_p += pg_mblen(Np->inout_p);
5573 }
5574 }
5575
5576 static char *
NUM_processor(FormatNode * node,NUMDesc * Num,char * inout,char * number,int input_len,int to_char_out_pre_spaces,int sign,bool is_to_char,Oid collid)5577 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
5578 char *number, int input_len, int to_char_out_pre_spaces,
5579 int sign, bool is_to_char, Oid collid)
5580 {
5581 FormatNode *n;
5582 NUMProc _Np,
5583 *Np = &_Np;
5584 const char *pattern;
5585 int pattern_len;
5586
5587 MemSet(Np, 0, sizeof(NUMProc));
5588
5589 Np->Num = Num;
5590 Np->is_to_char = is_to_char;
5591 Np->number = number;
5592 Np->inout = inout;
5593 Np->last_relevant = NULL;
5594 Np->read_post = 0;
5595 Np->read_pre = 0;
5596 Np->read_dec = false;
5597
5598 if (Np->Num->zero_start)
5599 --Np->Num->zero_start;
5600
5601 if (IS_EEEE(Np->Num))
5602 {
5603 if (!Np->is_to_char)
5604 ereport(ERROR,
5605 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5606 errmsg("\"EEEE\" not supported for input")));
5607 return strcpy(inout, number);
5608 }
5609
5610 /*
5611 * Roman correction
5612 */
5613 if (IS_ROMAN(Np->Num))
5614 {
5615 if (!Np->is_to_char)
5616 ereport(ERROR,
5617 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5618 errmsg("\"RN\" not supported for input")));
5619
5620 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
5621 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
5622
5623 if (IS_FILLMODE(Np->Num))
5624 {
5625 Np->Num->flag = 0;
5626 Np->Num->flag |= NUM_F_FILLMODE;
5627 }
5628 else
5629 Np->Num->flag = 0;
5630 Np->Num->flag |= NUM_F_ROMAN;
5631 }
5632
5633 /*
5634 * Sign
5635 */
5636 if (is_to_char)
5637 {
5638 Np->sign = sign;
5639
5640 /* MI/PL/SG - write sign itself and not in number */
5641 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
5642 {
5643 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
5644 Np->sign_wrote = false; /* need sign */
5645 else
5646 Np->sign_wrote = true; /* needn't sign */
5647 }
5648 else
5649 {
5650 if (Np->sign != '-')
5651 {
5652 if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
5653 Np->Num->flag &= ~NUM_F_BRACKET;
5654 if (IS_MINUS(Np->Num))
5655 Np->Num->flag &= ~NUM_F_MINUS;
5656 }
5657 else if (Np->sign != '+' && IS_PLUS(Np->Num))
5658 Np->Num->flag &= ~NUM_F_PLUS;
5659
5660 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
5661 Np->sign_wrote = true; /* needn't sign */
5662 else
5663 Np->sign_wrote = false; /* need sign */
5664
5665 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
5666 Np->Num->lsign = NUM_LSIGN_POST;
5667 }
5668 }
5669 else
5670 Np->sign = false;
5671
5672 /*
5673 * Count
5674 */
5675 Np->num_count = Np->Num->post + Np->Num->pre - 1;
5676
5677 if (is_to_char)
5678 {
5679 Np->out_pre_spaces = to_char_out_pre_spaces;
5680
5681 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
5682 {
5683 Np->last_relevant = get_last_relevant_decnum(Np->number);
5684
5685 /*
5686 * If any '0' specifiers are present, make sure we don't strip
5687 * those digits.
5688 */
5689 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
5690 {
5691 char *last_zero;
5692
5693 last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
5694 if (Np->last_relevant < last_zero)
5695 Np->last_relevant = last_zero;
5696 }
5697 }
5698
5699 if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
5700 ++Np->num_count;
5701 }
5702 else
5703 {
5704 Np->out_pre_spaces = 0;
5705 *Np->number = ' '; /* sign space */
5706 *(Np->number + 1) = '\0';
5707 }
5708
5709 Np->num_in = 0;
5710 Np->num_curr = 0;
5711
5712 #ifdef DEBUG_TO_FROM_CHAR
5713 elog(DEBUG_elog_output,
5714 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5715 Np->sign,
5716 Np->number,
5717 Np->Num->pre,
5718 Np->Num->post,
5719 Np->num_count,
5720 Np->out_pre_spaces,
5721 Np->sign_wrote ? "Yes" : "No",
5722 IS_ZERO(Np->Num) ? "Yes" : "No",
5723 Np->Num->zero_start,
5724 Np->Num->zero_end,
5725 Np->last_relevant ? Np->last_relevant : "<not set>",
5726 IS_BRACKET(Np->Num) ? "Yes" : "No",
5727 IS_PLUS(Np->Num) ? "Yes" : "No",
5728 IS_MINUS(Np->Num) ? "Yes" : "No",
5729 IS_FILLMODE(Np->Num) ? "Yes" : "No",
5730 IS_ROMAN(Np->Num) ? "Yes" : "No",
5731 IS_EEEE(Np->Num) ? "Yes" : "No"
5732 );
5733 #endif
5734
5735 /*
5736 * Locale
5737 */
5738 NUM_prepare_locale(Np);
5739
5740 /*
5741 * Processor direct cycle
5742 */
5743 if (Np->is_to_char)
5744 Np->number_p = Np->number;
5745 else
5746 Np->number_p = Np->number + 1; /* first char is space for sign */
5747
5748 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5749 {
5750 if (!Np->is_to_char)
5751 {
5752 /*
5753 * Check at least one byte remains to be scanned. (In actions
5754 * below, must use AMOUNT_TEST if we want to read more bytes than
5755 * that.)
5756 */
5757 if (OVERLOAD_TEST)
5758 break;
5759 }
5760
5761 /*
5762 * Format pictures actions
5763 */
5764 if (n->type == NODE_TYPE_ACTION)
5765 {
5766 /*
5767 * Create/read digit/zero/blank/sign/special-case
5768 *
5769 * 'NUM_S' note: The locale sign is anchored to number and we
5770 * read/write it when we work with first or last number
5771 * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
5772 *
5773 * Notice the "Np->inout_p++" at the bottom of the loop. This is
5774 * why most of the actions advance inout_p one less than you might
5775 * expect. In cases where we don't want that increment to happen,
5776 * a switch case ends with "continue" not "break".
5777 */
5778 switch (n->key->id)
5779 {
5780 case NUM_9:
5781 case NUM_0:
5782 case NUM_DEC:
5783 case NUM_D:
5784 if (Np->is_to_char)
5785 {
5786 NUM_numpart_to_char(Np, n->key->id);
5787 continue; /* for() */
5788 }
5789 else
5790 {
5791 NUM_numpart_from_char(Np, n->key->id, input_len);
5792 break; /* switch() case: */
5793 }
5794
5795 case NUM_COMMA:
5796 if (Np->is_to_char)
5797 {
5798 if (!Np->num_in)
5799 {
5800 if (IS_FILLMODE(Np->Num))
5801 continue;
5802 else
5803 *Np->inout_p = ' ';
5804 }
5805 else
5806 *Np->inout_p = ',';
5807 }
5808 else
5809 {
5810 if (!Np->num_in)
5811 {
5812 if (IS_FILLMODE(Np->Num))
5813 continue;
5814 }
5815 if (*Np->inout_p != ',')
5816 continue;
5817 }
5818 break;
5819
5820 case NUM_G:
5821 pattern = Np->L_thousands_sep;
5822 pattern_len = strlen(pattern);
5823 if (Np->is_to_char)
5824 {
5825 if (!Np->num_in)
5826 {
5827 if (IS_FILLMODE(Np->Num))
5828 continue;
5829 else
5830 {
5831 /* just in case there are MB chars */
5832 pattern_len = pg_mbstrlen(pattern);
5833 memset(Np->inout_p, ' ', pattern_len);
5834 Np->inout_p += pattern_len - 1;
5835 }
5836 }
5837 else
5838 {
5839 strcpy(Np->inout_p, pattern);
5840 Np->inout_p += pattern_len - 1;
5841 }
5842 }
5843 else
5844 {
5845 if (!Np->num_in)
5846 {
5847 if (IS_FILLMODE(Np->Num))
5848 continue;
5849 }
5850
5851 /*
5852 * Because L_thousands_sep typically contains data
5853 * characters (either '.' or ','), we can't use
5854 * NUM_eat_non_data_chars here. Instead skip only if
5855 * the input matches L_thousands_sep.
5856 */
5857 if (AMOUNT_TEST(pattern_len) &&
5858 strncmp(Np->inout_p, pattern, pattern_len) == 0)
5859 Np->inout_p += pattern_len - 1;
5860 else
5861 continue;
5862 }
5863 break;
5864
5865 case NUM_L:
5866 pattern = Np->L_currency_symbol;
5867 if (Np->is_to_char)
5868 {
5869 strcpy(Np->inout_p, pattern);
5870 Np->inout_p += strlen(pattern) - 1;
5871 }
5872 else
5873 {
5874 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5875 continue;
5876 }
5877 break;
5878
5879 case NUM_RN:
5880 if (IS_FILLMODE(Np->Num))
5881 {
5882 strcpy(Np->inout_p, Np->number_p);
5883 Np->inout_p += strlen(Np->inout_p) - 1;
5884 }
5885 else
5886 {
5887 sprintf(Np->inout_p, "%15s", Np->number_p);
5888 Np->inout_p += strlen(Np->inout_p) - 1;
5889 }
5890 break;
5891
5892 case NUM_rn:
5893 if (IS_FILLMODE(Np->Num))
5894 {
5895 strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5896 Np->inout_p += strlen(Np->inout_p) - 1;
5897 }
5898 else
5899 {
5900 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5901 Np->inout_p += strlen(Np->inout_p) - 1;
5902 }
5903 break;
5904
5905 case NUM_th:
5906 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5907 Np->sign == '-' || IS_DECIMAL(Np->Num))
5908 continue;
5909
5910 if (Np->is_to_char)
5911 {
5912 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5913 Np->inout_p += 1;
5914 }
5915 else
5916 {
5917 /* All variants of 'th' occupy 2 characters */
5918 NUM_eat_non_data_chars(Np, 2, input_len);
5919 continue;
5920 }
5921 break;
5922
5923 case NUM_TH:
5924 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5925 Np->sign == '-' || IS_DECIMAL(Np->Num))
5926 continue;
5927
5928 if (Np->is_to_char)
5929 {
5930 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5931 Np->inout_p += 1;
5932 }
5933 else
5934 {
5935 /* All variants of 'TH' occupy 2 characters */
5936 NUM_eat_non_data_chars(Np, 2, input_len);
5937 continue;
5938 }
5939 break;
5940
5941 case NUM_MI:
5942 if (Np->is_to_char)
5943 {
5944 if (Np->sign == '-')
5945 *Np->inout_p = '-';
5946 else if (IS_FILLMODE(Np->Num))
5947 continue;
5948 else
5949 *Np->inout_p = ' ';
5950 }
5951 else
5952 {
5953 if (*Np->inout_p == '-')
5954 *Np->number = '-';
5955 else
5956 {
5957 NUM_eat_non_data_chars(Np, 1, input_len);
5958 continue;
5959 }
5960 }
5961 break;
5962
5963 case NUM_PL:
5964 if (Np->is_to_char)
5965 {
5966 if (Np->sign == '+')
5967 *Np->inout_p = '+';
5968 else if (IS_FILLMODE(Np->Num))
5969 continue;
5970 else
5971 *Np->inout_p = ' ';
5972 }
5973 else
5974 {
5975 if (*Np->inout_p == '+')
5976 *Np->number = '+';
5977 else
5978 {
5979 NUM_eat_non_data_chars(Np, 1, input_len);
5980 continue;
5981 }
5982 }
5983 break;
5984
5985 case NUM_SG:
5986 if (Np->is_to_char)
5987 *Np->inout_p = Np->sign;
5988 else
5989 {
5990 if (*Np->inout_p == '-')
5991 *Np->number = '-';
5992 else if (*Np->inout_p == '+')
5993 *Np->number = '+';
5994 else
5995 {
5996 NUM_eat_non_data_chars(Np, 1, input_len);
5997 continue;
5998 }
5999 }
6000 break;
6001
6002 default:
6003 continue;
6004 break;
6005 }
6006 }
6007 else
6008 {
6009 /*
6010 * In TO_CHAR, non-pattern characters in the format are copied to
6011 * the output. In TO_NUMBER, we skip one input character for each
6012 * non-pattern format character, whether or not it matches the
6013 * format character.
6014 */
6015 if (Np->is_to_char)
6016 {
6017 strcpy(Np->inout_p, n->character);
6018 Np->inout_p += strlen(Np->inout_p);
6019 }
6020 else
6021 {
6022 Np->inout_p += pg_mblen(Np->inout_p);
6023 }
6024 continue;
6025 }
6026 Np->inout_p++;
6027 }
6028
6029 if (Np->is_to_char)
6030 {
6031 *Np->inout_p = '\0';
6032 return Np->inout;
6033 }
6034 else
6035 {
6036 if (*(Np->number_p - 1) == '.')
6037 *(Np->number_p - 1) = '\0';
6038 else
6039 *Np->number_p = '\0';
6040
6041 /*
6042 * Correction - precision of dec. number
6043 */
6044 Np->Num->post = Np->read_post;
6045
6046 #ifdef DEBUG_TO_FROM_CHAR
6047 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
6048 #endif
6049 return Np->number;
6050 }
6051 }
6052
6053 /* ----------
6054 * MACRO: Start part of NUM - for all NUM's to_char variants
6055 * (sorry, but I hate copy same code - macro is better..)
6056 * ----------
6057 */
6058 #define NUM_TOCHAR_prepare \
6059 do { \
6060 int len = VARSIZE_ANY_EXHDR(fmt); \
6061 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
6062 PG_RETURN_TEXT_P(cstring_to_text("")); \
6063 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
6064 format = NUM_cache(len, &Num, fmt, &shouldFree); \
6065 } while (0)
6066
6067 /* ----------
6068 * MACRO: Finish part of NUM
6069 * ----------
6070 */
6071 #define NUM_TOCHAR_finish \
6072 do { \
6073 int len; \
6074 \
6075 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
6076 \
6077 if (shouldFree) \
6078 pfree(format); \
6079 \
6080 /* \
6081 * Convert null-terminated representation of result to standard text. \
6082 * The result is usually much bigger than it needs to be, but there \
6083 * seems little point in realloc'ing it smaller. \
6084 */ \
6085 len = strlen(VARDATA(result)); \
6086 SET_VARSIZE(result, len + VARHDRSZ); \
6087 } while (0)
6088
6089 /* -------------------
6090 * NUMERIC to_number() (convert string to numeric)
6091 * -------------------
6092 */
6093 Datum
numeric_to_number(PG_FUNCTION_ARGS)6094 numeric_to_number(PG_FUNCTION_ARGS)
6095 {
6096 text *value = PG_GETARG_TEXT_PP(0);
6097 text *fmt = PG_GETARG_TEXT_PP(1);
6098 NUMDesc Num;
6099 Datum result;
6100 FormatNode *format;
6101 char *numstr;
6102 bool shouldFree;
6103 int len = 0;
6104 int scale,
6105 precision;
6106
6107 len = VARSIZE_ANY_EXHDR(fmt);
6108
6109 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
6110 PG_RETURN_NULL();
6111
6112 format = NUM_cache(len, &Num, fmt, &shouldFree);
6113
6114 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
6115
6116 NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
6117 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
6118
6119 scale = Num.post;
6120 precision = Num.pre + Num.multi + scale;
6121
6122 if (shouldFree)
6123 pfree(format);
6124
6125 result = DirectFunctionCall3(numeric_in,
6126 CStringGetDatum(numstr),
6127 ObjectIdGetDatum(InvalidOid),
6128 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
6129
6130 if (IS_MULTI(&Num))
6131 {
6132 Numeric x;
6133 Numeric a = int64_to_numeric(10);
6134 Numeric b = int64_to_numeric(-Num.multi);
6135
6136 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6137 NumericGetDatum(a),
6138 NumericGetDatum(b)));
6139 result = DirectFunctionCall2(numeric_mul,
6140 result,
6141 NumericGetDatum(x));
6142 }
6143
6144 pfree(numstr);
6145 return result;
6146 }
6147
6148 /* ------------------
6149 * NUMERIC to_char()
6150 * ------------------
6151 */
6152 Datum
numeric_to_char(PG_FUNCTION_ARGS)6153 numeric_to_char(PG_FUNCTION_ARGS)
6154 {
6155 Numeric value = PG_GETARG_NUMERIC(0);
6156 text *fmt = PG_GETARG_TEXT_PP(1);
6157 NUMDesc Num;
6158 FormatNode *format;
6159 text *result;
6160 bool shouldFree;
6161 int out_pre_spaces = 0,
6162 sign = 0;
6163 char *numstr,
6164 *orgnum,
6165 *p;
6166 Numeric x;
6167
6168 NUM_TOCHAR_prepare;
6169
6170 /*
6171 * On DateType depend part (numeric)
6172 */
6173 if (IS_ROMAN(&Num))
6174 {
6175 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6176 NumericGetDatum(value),
6177 Int32GetDatum(0)));
6178 numstr =
6179 int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
6180 NumericGetDatum(x))));
6181 }
6182 else if (IS_EEEE(&Num))
6183 {
6184 orgnum = numeric_out_sci(value, Num.post);
6185
6186 /*
6187 * numeric_out_sci() does not emit a sign for positive numbers. We
6188 * need to add a space in this case so that positive and negative
6189 * numbers are aligned. Also must check for NaN/infinity cases, which
6190 * we handle the same way as in float8_to_char.
6191 */
6192 if (strcmp(orgnum, "NaN") == 0 ||
6193 strcmp(orgnum, "Infinity") == 0 ||
6194 strcmp(orgnum, "-Infinity") == 0)
6195 {
6196 /*
6197 * Allow 6 characters for the leading sign, the decimal point,
6198 * "e", the exponent's sign and two exponent digits.
6199 */
6200 numstr = (char *) palloc(Num.pre + Num.post + 7);
6201 fill_str(numstr, '#', Num.pre + Num.post + 6);
6202 *numstr = ' ';
6203 *(numstr + Num.pre + 1) = '.';
6204 }
6205 else if (*orgnum != '-')
6206 {
6207 numstr = (char *) palloc(strlen(orgnum) + 2);
6208 *numstr = ' ';
6209 strcpy(numstr + 1, orgnum);
6210 }
6211 else
6212 {
6213 numstr = orgnum;
6214 }
6215 }
6216 else
6217 {
6218 int numstr_pre_len;
6219 Numeric val = value;
6220
6221 if (IS_MULTI(&Num))
6222 {
6223 Numeric a = int64_to_numeric(10);
6224 Numeric b = int64_to_numeric(Num.multi);
6225
6226 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6227 NumericGetDatum(a),
6228 NumericGetDatum(b)));
6229 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
6230 NumericGetDatum(value),
6231 NumericGetDatum(x)));
6232 Num.pre += Num.multi;
6233 }
6234
6235 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6236 NumericGetDatum(val),
6237 Int32GetDatum(Num.post)));
6238 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
6239 NumericGetDatum(x)));
6240
6241 if (*orgnum == '-')
6242 {
6243 sign = '-';
6244 numstr = orgnum + 1;
6245 }
6246 else
6247 {
6248 sign = '+';
6249 numstr = orgnum;
6250 }
6251
6252 if ((p = strchr(numstr, '.')))
6253 numstr_pre_len = p - numstr;
6254 else
6255 numstr_pre_len = strlen(numstr);
6256
6257 /* needs padding? */
6258 if (numstr_pre_len < Num.pre)
6259 out_pre_spaces = Num.pre - numstr_pre_len;
6260 /* overflowed prefix digit format? */
6261 else if (numstr_pre_len > Num.pre)
6262 {
6263 numstr = (char *) palloc(Num.pre + Num.post + 2);
6264 fill_str(numstr, '#', Num.pre + Num.post + 1);
6265 *(numstr + Num.pre) = '.';
6266 }
6267 }
6268
6269 NUM_TOCHAR_finish;
6270 PG_RETURN_TEXT_P(result);
6271 }
6272
6273 /* ---------------
6274 * INT4 to_char()
6275 * ---------------
6276 */
6277 Datum
int4_to_char(PG_FUNCTION_ARGS)6278 int4_to_char(PG_FUNCTION_ARGS)
6279 {
6280 int32 value = PG_GETARG_INT32(0);
6281 text *fmt = PG_GETARG_TEXT_PP(1);
6282 NUMDesc Num;
6283 FormatNode *format;
6284 text *result;
6285 bool shouldFree;
6286 int out_pre_spaces = 0,
6287 sign = 0;
6288 char *numstr,
6289 *orgnum;
6290
6291 NUM_TOCHAR_prepare;
6292
6293 /*
6294 * On DateType depend part (int32)
6295 */
6296 if (IS_ROMAN(&Num))
6297 numstr = int_to_roman(value);
6298 else if (IS_EEEE(&Num))
6299 {
6300 /* we can do it easily because float8 won't lose any precision */
6301 float8 val = (float8) value;
6302
6303 orgnum = (char *) psprintf("%+.*e", Num.post, val);
6304
6305 /*
6306 * Swap a leading positive sign for a space.
6307 */
6308 if (*orgnum == '+')
6309 *orgnum = ' ';
6310
6311 numstr = orgnum;
6312 }
6313 else
6314 {
6315 int numstr_pre_len;
6316
6317 if (IS_MULTI(&Num))
6318 {
6319 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6320 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
6321 Num.pre += Num.multi;
6322 }
6323 else
6324 {
6325 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6326 Int32GetDatum(value)));
6327 }
6328
6329 if (*orgnum == '-')
6330 {
6331 sign = '-';
6332 orgnum++;
6333 }
6334 else
6335 sign = '+';
6336
6337 numstr_pre_len = strlen(orgnum);
6338
6339 /* post-decimal digits? Pad out with zeros. */
6340 if (Num.post)
6341 {
6342 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6343 strcpy(numstr, orgnum);
6344 *(numstr + numstr_pre_len) = '.';
6345 memset(numstr + numstr_pre_len + 1, '0', Num.post);
6346 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6347 }
6348 else
6349 numstr = orgnum;
6350
6351 /* needs padding? */
6352 if (numstr_pre_len < Num.pre)
6353 out_pre_spaces = Num.pre - numstr_pre_len;
6354 /* overflowed prefix digit format? */
6355 else if (numstr_pre_len > Num.pre)
6356 {
6357 numstr = (char *) palloc(Num.pre + Num.post + 2);
6358 fill_str(numstr, '#', Num.pre + Num.post + 1);
6359 *(numstr + Num.pre) = '.';
6360 }
6361 }
6362
6363 NUM_TOCHAR_finish;
6364 PG_RETURN_TEXT_P(result);
6365 }
6366
6367 /* ---------------
6368 * INT8 to_char()
6369 * ---------------
6370 */
6371 Datum
int8_to_char(PG_FUNCTION_ARGS)6372 int8_to_char(PG_FUNCTION_ARGS)
6373 {
6374 int64 value = PG_GETARG_INT64(0);
6375 text *fmt = PG_GETARG_TEXT_PP(1);
6376 NUMDesc Num;
6377 FormatNode *format;
6378 text *result;
6379 bool shouldFree;
6380 int out_pre_spaces = 0,
6381 sign = 0;
6382 char *numstr,
6383 *orgnum;
6384
6385 NUM_TOCHAR_prepare;
6386
6387 /*
6388 * On DateType depend part (int32)
6389 */
6390 if (IS_ROMAN(&Num))
6391 {
6392 /* Currently don't support int8 conversion to roman... */
6393 numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
6394 }
6395 else if (IS_EEEE(&Num))
6396 {
6397 /* to avoid loss of precision, must go via numeric not float8 */
6398 orgnum = numeric_out_sci(int64_to_numeric(value),
6399 Num.post);
6400
6401 /*
6402 * numeric_out_sci() does not emit a sign for positive numbers. We
6403 * need to add a space in this case so that positive and negative
6404 * numbers are aligned. We don't have to worry about NaN/inf here.
6405 */
6406 if (*orgnum != '-')
6407 {
6408 numstr = (char *) palloc(strlen(orgnum) + 2);
6409 *numstr = ' ';
6410 strcpy(numstr + 1, orgnum);
6411 }
6412 else
6413 {
6414 numstr = orgnum;
6415 }
6416 }
6417 else
6418 {
6419 int numstr_pre_len;
6420
6421 if (IS_MULTI(&Num))
6422 {
6423 double multi = pow((double) 10, (double) Num.multi);
6424
6425 value = DatumGetInt64(DirectFunctionCall2(int8mul,
6426 Int64GetDatum(value),
6427 DirectFunctionCall1(dtoi8,
6428 Float8GetDatum(multi))));
6429 Num.pre += Num.multi;
6430 }
6431
6432 orgnum = DatumGetCString(DirectFunctionCall1(int8out,
6433 Int64GetDatum(value)));
6434
6435 if (*orgnum == '-')
6436 {
6437 sign = '-';
6438 orgnum++;
6439 }
6440 else
6441 sign = '+';
6442
6443 numstr_pre_len = strlen(orgnum);
6444
6445 /* post-decimal digits? Pad out with zeros. */
6446 if (Num.post)
6447 {
6448 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6449 strcpy(numstr, orgnum);
6450 *(numstr + numstr_pre_len) = '.';
6451 memset(numstr + numstr_pre_len + 1, '0', Num.post);
6452 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6453 }
6454 else
6455 numstr = orgnum;
6456
6457 /* needs padding? */
6458 if (numstr_pre_len < Num.pre)
6459 out_pre_spaces = Num.pre - numstr_pre_len;
6460 /* overflowed prefix digit format? */
6461 else if (numstr_pre_len > Num.pre)
6462 {
6463 numstr = (char *) palloc(Num.pre + Num.post + 2);
6464 fill_str(numstr, '#', Num.pre + Num.post + 1);
6465 *(numstr + Num.pre) = '.';
6466 }
6467 }
6468
6469 NUM_TOCHAR_finish;
6470 PG_RETURN_TEXT_P(result);
6471 }
6472
6473 /* -----------------
6474 * FLOAT4 to_char()
6475 * -----------------
6476 */
6477 Datum
float4_to_char(PG_FUNCTION_ARGS)6478 float4_to_char(PG_FUNCTION_ARGS)
6479 {
6480 float4 value = PG_GETARG_FLOAT4(0);
6481 text *fmt = PG_GETARG_TEXT_PP(1);
6482 NUMDesc Num;
6483 FormatNode *format;
6484 text *result;
6485 bool shouldFree;
6486 int out_pre_spaces = 0,
6487 sign = 0;
6488 char *numstr,
6489 *p;
6490
6491 NUM_TOCHAR_prepare;
6492
6493 if (IS_ROMAN(&Num))
6494 numstr = int_to_roman((int) rint(value));
6495 else if (IS_EEEE(&Num))
6496 {
6497 if (isnan(value) || isinf(value))
6498 {
6499 /*
6500 * Allow 6 characters for the leading sign, the decimal point,
6501 * "e", the exponent's sign and two exponent digits.
6502 */
6503 numstr = (char *) palloc(Num.pre + Num.post + 7);
6504 fill_str(numstr, '#', Num.pre + Num.post + 6);
6505 *numstr = ' ';
6506 *(numstr + Num.pre + 1) = '.';
6507 }
6508 else
6509 {
6510 numstr = psprintf("%+.*e", Num.post, value);
6511
6512 /*
6513 * Swap a leading positive sign for a space.
6514 */
6515 if (*numstr == '+')
6516 *numstr = ' ';
6517 }
6518 }
6519 else
6520 {
6521 float4 val = value;
6522 char *orgnum;
6523 int numstr_pre_len;
6524
6525 if (IS_MULTI(&Num))
6526 {
6527 float multi = pow((double) 10, (double) Num.multi);
6528
6529 val = value * multi;
6530 Num.pre += Num.multi;
6531 }
6532
6533 orgnum = psprintf("%.0f", fabs(val));
6534 numstr_pre_len = strlen(orgnum);
6535
6536 /* adjust post digits to fit max float digits */
6537 if (numstr_pre_len >= FLT_DIG)
6538 Num.post = 0;
6539 else if (numstr_pre_len + Num.post > FLT_DIG)
6540 Num.post = FLT_DIG - numstr_pre_len;
6541 orgnum = psprintf("%.*f", Num.post, val);
6542
6543 if (*orgnum == '-')
6544 { /* < 0 */
6545 sign = '-';
6546 numstr = orgnum + 1;
6547 }
6548 else
6549 {
6550 sign = '+';
6551 numstr = orgnum;
6552 }
6553
6554 if ((p = strchr(numstr, '.')))
6555 numstr_pre_len = p - numstr;
6556 else
6557 numstr_pre_len = strlen(numstr);
6558
6559 /* needs padding? */
6560 if (numstr_pre_len < Num.pre)
6561 out_pre_spaces = Num.pre - numstr_pre_len;
6562 /* overflowed prefix digit format? */
6563 else if (numstr_pre_len > Num.pre)
6564 {
6565 numstr = (char *) palloc(Num.pre + Num.post + 2);
6566 fill_str(numstr, '#', Num.pre + Num.post + 1);
6567 *(numstr + Num.pre) = '.';
6568 }
6569 }
6570
6571 NUM_TOCHAR_finish;
6572 PG_RETURN_TEXT_P(result);
6573 }
6574
6575 /* -----------------
6576 * FLOAT8 to_char()
6577 * -----------------
6578 */
6579 Datum
float8_to_char(PG_FUNCTION_ARGS)6580 float8_to_char(PG_FUNCTION_ARGS)
6581 {
6582 float8 value = PG_GETARG_FLOAT8(0);
6583 text *fmt = PG_GETARG_TEXT_PP(1);
6584 NUMDesc Num;
6585 FormatNode *format;
6586 text *result;
6587 bool shouldFree;
6588 int out_pre_spaces = 0,
6589 sign = 0;
6590 char *numstr,
6591 *p;
6592
6593 NUM_TOCHAR_prepare;
6594
6595 if (IS_ROMAN(&Num))
6596 numstr = int_to_roman((int) rint(value));
6597 else if (IS_EEEE(&Num))
6598 {
6599 if (isnan(value) || isinf(value))
6600 {
6601 /*
6602 * Allow 6 characters for the leading sign, the decimal point,
6603 * "e", the exponent's sign and two exponent digits.
6604 */
6605 numstr = (char *) palloc(Num.pre + Num.post + 7);
6606 fill_str(numstr, '#', Num.pre + Num.post + 6);
6607 *numstr = ' ';
6608 *(numstr + Num.pre + 1) = '.';
6609 }
6610 else
6611 {
6612 numstr = psprintf("%+.*e", Num.post, value);
6613
6614 /*
6615 * Swap a leading positive sign for a space.
6616 */
6617 if (*numstr == '+')
6618 *numstr = ' ';
6619 }
6620 }
6621 else
6622 {
6623 float8 val = value;
6624 char *orgnum;
6625 int numstr_pre_len;
6626
6627 if (IS_MULTI(&Num))
6628 {
6629 double multi = pow((double) 10, (double) Num.multi);
6630
6631 val = value * multi;
6632 Num.pre += Num.multi;
6633 }
6634
6635 orgnum = psprintf("%.0f", fabs(val));
6636 numstr_pre_len = strlen(orgnum);
6637
6638 /* adjust post digits to fit max double digits */
6639 if (numstr_pre_len >= DBL_DIG)
6640 Num.post = 0;
6641 else if (numstr_pre_len + Num.post > DBL_DIG)
6642 Num.post = DBL_DIG - numstr_pre_len;
6643 orgnum = psprintf("%.*f", Num.post, val);
6644
6645 if (*orgnum == '-')
6646 { /* < 0 */
6647 sign = '-';
6648 numstr = orgnum + 1;
6649 }
6650 else
6651 {
6652 sign = '+';
6653 numstr = orgnum;
6654 }
6655
6656 if ((p = strchr(numstr, '.')))
6657 numstr_pre_len = p - numstr;
6658 else
6659 numstr_pre_len = strlen(numstr);
6660
6661 /* needs padding? */
6662 if (numstr_pre_len < Num.pre)
6663 out_pre_spaces = Num.pre - numstr_pre_len;
6664 /* overflowed prefix digit format? */
6665 else if (numstr_pre_len > Num.pre)
6666 {
6667 numstr = (char *) palloc(Num.pre + Num.post + 2);
6668 fill_str(numstr, '#', Num.pre + Num.post + 1);
6669 *(numstr + Num.pre) = '.';
6670 }
6671 }
6672
6673 NUM_TOCHAR_finish;
6674 PG_RETURN_TEXT_P(result);
6675 }
6676