1 /* -----------------------------------------------------------------------
2 * formatting.c
3 *
4 * src/backend/utils/adt/formatting.c
5 *
6 *
7 * Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group
8 *
9 *
10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11 *
12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14 *
15 *
16 * Cache & Memory:
17 * Routines use (itself) internal cache for format pictures.
18 *
19 * The cache uses a static buffer and is persistent across transactions. If
20 * the format-picture is bigger than the cache buffer, the parser is called
21 * always.
22 *
23 * NOTE for Number version:
24 * All in this version is implemented as keywords ( => not used
25 * suffixes), because a format picture is for *one* item (number)
26 * only. It not is as a timestamp version, where each keyword (can)
27 * has suffix.
28 *
29 * NOTE for Timestamp routines:
30 * In this module the POSIX 'struct tm' type is *not* used, but rather
31 * PgSQL type, which has tm_mon based on one (*non* zero) and
32 * year *not* based on 1900, but is used full year number.
33 * Module supports AD / BC / AM / PM.
34 *
35 * Supported types for to_char():
36 *
37 * Timestamp, Numeric, int4, int8, float4, float8
38 *
39 * Supported types for reverse conversion:
40 *
41 * Timestamp - to_timestamp()
42 * Date - to_date()
43 * Numeric - to_number()
44 *
45 *
46 * Karel Zak
47 *
48 * TODO
49 * - better number building (formatting) / parsing, now it isn't
50 * ideal code
51 * - use Assert()
52 * - add support for roman number to standard number conversion
53 * - add support for number spelling
54 * - add support for string to string formatting (we must be better
55 * than Oracle :-),
56 * to_char('Hello', 'X X X X X') -> 'H e l l o'
57 *
58 * -----------------------------------------------------------------------
59 */
60
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64
65 #include "postgres.h"
66
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72
73 /*
74 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75 * declare them in <wchar.h>, so include that too.
76 */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99
100 /* ----------
101 * Convenience macros for error handling
102 * ----------
103 *
104 * Two macros below help to handle errors in functions that take
105 * 'bool *have_error' argument. When this argument is not NULL, it's expected
106 * that function will suppress ereports when possible. Instead it should
107 * return some default value and set *have_error flag.
108 *
109 * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
110 * function argument is not NULL, then instead of ereport'ing we set
111 * *have_error flag and go to on_error label. It's supposed that jump
112 * resources will be freed and some 'default' value returned.
113 *
114 * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115 * It's supposed to be used for immediate exit from the function on error
116 * after call of another function with 'bool *have_error' argument.
117 */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120 if (have_error) \
121 { \
122 *have_error = true; \
123 goto on_error; \
124 } \
125 else \
126 { \
127 throw_error; \
128 } \
129 } while (0)
130
131 #define CHECK_ERROR \
132 do { \
133 if (have_error && *have_error) \
134 goto on_error; \
135 } while (0)
136
137 /* ----------
138 * Routines flags
139 * ----------
140 */
141 #define DCH_FLAG 0x1 /* DATE-TIME flag */
142 #define NUM_FLAG 0x2 /* NUMBER flag */
143 #define STD_FLAG 0x4 /* STANDARD flag */
144
145 /* ----------
146 * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147 * ----------
148 */
149 #define KeyWord_INDEX_SIZE ('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151
152 /* ----------
153 * Maximal length of one node
154 * ----------
155 */
156 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
157 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
158
159
160 /* ----------
161 * Format parser structs
162 * ----------
163 */
164 typedef struct
165 {
166 const char *name; /* suffix string */
167 int len, /* suffix length */
168 id, /* used in node->suffix */
169 type; /* prefix / postfix */
170 } KeySuffix;
171
172 /* ----------
173 * FromCharDateMode
174 * ----------
175 *
176 * This value is used to nominate one of several distinct (and mutually
177 * exclusive) date conventions that a keyword can belong to.
178 */
179 typedef enum
180 {
181 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
182 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
183 FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
184 } FromCharDateMode;
185
186 typedef struct
187 {
188 const char *name;
189 int len;
190 int id;
191 bool is_digit;
192 FromCharDateMode date_mode;
193 } KeyWord;
194
195 typedef struct
196 {
197 uint8 type; /* NODE_TYPE_XXX, see below */
198 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
199 uint8 suffix; /* keyword prefix/suffix code, if any */
200 const KeyWord *key; /* if type is ACTION */
201 } FormatNode;
202
203 #define NODE_TYPE_END 1
204 #define NODE_TYPE_ACTION 2
205 #define NODE_TYPE_CHAR 3
206 #define NODE_TYPE_SEPARATOR 4
207 #define NODE_TYPE_SPACE 5
208
209 #define SUFFTYPE_PREFIX 1
210 #define SUFFTYPE_POSTFIX 2
211
212 #define CLOCK_24_HOUR 0
213 #define CLOCK_12_HOUR 1
214
215
216 /* ----------
217 * Full months
218 * ----------
219 */
220 static const char *const months_full[] = {
221 "January", "February", "March", "April", "May", "June", "July",
222 "August", "September", "October", "November", "December", NULL
223 };
224
225 static const char *const days_short[] = {
226 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228
229 /* ----------
230 * AD / BC
231 * ----------
232 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
233 * positive and map year == -1 to year zero, and shift all negative
234 * years up one. For interval years, we just return the year.
235 */
236 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237
238 #define A_D_STR "A.D."
239 #define a_d_STR "a.d."
240 #define AD_STR "AD"
241 #define ad_STR "ad"
242
243 #define B_C_STR "B.C."
244 #define b_c_STR "b.c."
245 #define BC_STR "BC"
246 #define bc_STR "bc"
247
248 /*
249 * AD / BC strings for seq_search.
250 *
251 * These are given in two variants, a long form with periods and a standard
252 * form without.
253 *
254 * The array is laid out such that matches for AD have an even index, and
255 * matches for BC have an odd index. So the boolean value for BC is given by
256 * taking the array index of the match, modulo 2.
257 */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260
261 /* ----------
262 * AM / PM
263 * ----------
264 */
265 #define A_M_STR "A.M."
266 #define a_m_STR "a.m."
267 #define AM_STR "AM"
268 #define am_STR "am"
269
270 #define P_M_STR "P.M."
271 #define p_m_STR "p.m."
272 #define PM_STR "PM"
273 #define pm_STR "pm"
274
275 /*
276 * AM / PM strings for seq_search.
277 *
278 * These are given in two variants, a long form with periods and a standard
279 * form without.
280 *
281 * The array is laid out such that matches for AM have an even index, and
282 * matches for PM have an odd index. So the boolean value for PM is given by
283 * taking the array index of the match, modulo 2.
284 */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287
288 /* ----------
289 * Months in roman-numeral
290 * (Must be in reverse order for seq_search (in FROM_CHAR), because
291 * 'VIII' must have higher precedence than 'V')
292 * ----------
293 */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299
300 /* ----------
301 * Roman numbers
302 * ----------
303 */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307
308 /* ----------
309 * Ordinal postfixes
310 * ----------
311 */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314
315 /* ----------
316 * Flags & Options:
317 * ----------
318 */
319 #define TH_UPPER 1
320 #define TH_LOWER 2
321
322 /* ----------
323 * Number description struct
324 * ----------
325 */
326 typedef struct
327 {
328 int pre, /* (count) numbers before decimal */
329 post, /* (count) numbers after decimal */
330 lsign, /* want locales sign */
331 flag, /* number parameters */
332 pre_lsign_num, /* tmp value for lsign */
333 multi, /* multiplier for 'V' */
334 zero_start, /* position of first zero */
335 zero_end, /* position of last zero */
336 need_locale; /* needs it locale */
337 } NUMDesc;
338
339 /* ----------
340 * Flags for NUMBER version
341 * ----------
342 */
343 #define NUM_F_DECIMAL (1 << 1)
344 #define NUM_F_LDECIMAL (1 << 2)
345 #define NUM_F_ZERO (1 << 3)
346 #define NUM_F_BLANK (1 << 4)
347 #define NUM_F_FILLMODE (1 << 5)
348 #define NUM_F_LSIGN (1 << 6)
349 #define NUM_F_BRACKET (1 << 7)
350 #define NUM_F_MINUS (1 << 8)
351 #define NUM_F_PLUS (1 << 9)
352 #define NUM_F_ROMAN (1 << 10)
353 #define NUM_F_MULTI (1 << 11)
354 #define NUM_F_PLUS_POST (1 << 12)
355 #define NUM_F_MINUS_POST (1 << 13)
356 #define NUM_F_EEEE (1 << 14)
357
358 #define NUM_LSIGN_PRE (-1)
359 #define NUM_LSIGN_POST 1
360 #define NUM_LSIGN_NONE 0
361
362 /* ----------
363 * Tests
364 * ----------
365 */
366 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
378
379 /* ----------
380 * Format picture cache
381 *
382 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383 * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384 *
385 * For simplicity, the cache entries are fixed-size, so they allow for the
386 * worst case of a FormatNode for each byte in the picture string.
387 *
388 * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389 * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390 * we don't waste too much space by palloc'ing them individually. Be sure
391 * to adjust those macros if you add fields to those structs.
392 *
393 * The max number of entries in each cache is DCH_CACHE_ENTRIES
394 * resp. NUM_CACHE_ENTRIES.
395 * ----------
396 */
397 #define DCH_CACHE_OVERHEAD \
398 MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400 MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401
402 #define DCH_CACHE_SIZE \
403 ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405 ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406
407 #define DCH_CACHE_ENTRIES 20
408 #define NUM_CACHE_ENTRIES 20
409
410 typedef struct
411 {
412 FormatNode format[DCH_CACHE_SIZE + 1];
413 char str[DCH_CACHE_SIZE + 1];
414 bool std;
415 bool valid;
416 int age;
417 } DCHCacheEntry;
418
419 typedef struct
420 {
421 FormatNode format[NUM_CACHE_SIZE + 1];
422 char str[NUM_CACHE_SIZE + 1];
423 bool valid;
424 int age;
425 NUMDesc Num;
426 } NUMCacheEntry;
427
428 /* global cache for date/time format pictures */
429 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
430 static int n_DCHCache = 0; /* current number of entries */
431 static int DCHCounter = 0; /* aging-event counter */
432
433 /* global cache for number format pictures */
434 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
435 static int n_NUMCache = 0; /* current number of entries */
436 static int NUMCounter = 0; /* aging-event counter */
437
438 /* ----------
439 * For char->date/time conversion
440 * ----------
441 */
442 typedef struct
443 {
444 FromCharDateMode mode;
445 int hh,
446 pm,
447 mi,
448 ss,
449 ssss,
450 d, /* stored as 1-7, Sunday = 1, 0 means missing */
451 dd,
452 ddd,
453 mm,
454 ms,
455 year,
456 bc,
457 ww,
458 w,
459 cc,
460 j,
461 us,
462 yysz, /* is it YY or YYYY ? */
463 clock, /* 12 or 24 hour clock? */
464 tzsign, /* +1, -1 or 0 if timezone info is absent */
465 tzh,
466 tzm,
467 ff; /* fractional precision */
468 } TmFromChar;
469
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471
472 /* ----------
473 * Debug
474 * ----------
475 */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482 (_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485 (_X)->tm_sec, (_X)->tm_year,\
486 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492
493 /* ----------
494 * Datetime to char conversion
495 * ----------
496 */
497 typedef struct TmToChar
498 {
499 struct pg_tm tm; /* classic 'tm' struct */
500 fsec_t fsec; /* fractional seconds */
501 const char *tzn; /* timezone */
502 } TmToChar;
503
504 #define tmtcTm(_X) (&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X) ((_X)->fsec)
507
508 #define ZERO_tm(_X) \
509 do { \
510 (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511 (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512 (_X)->tm_mday = (_X)->tm_mon = 1; \
513 (_X)->tm_zone = NULL; \
514 } while(0)
515
516 #define ZERO_tmtc(_X) \
517 do { \
518 ZERO_tm( tmtcTm(_X) ); \
519 tmtcFsec(_X) = 0; \
520 tmtcTzn(_X) = NULL; \
521 } while(0)
522
523 /*
524 * to_char(time) appears to to_char() as an interval, so this check
525 * is really for interval and time data types.
526 */
527 #define INVALID_FOR_INTERVAL \
528 do { \
529 if (is_interval) \
530 ereport(ERROR, \
531 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532 errmsg("invalid format specification for an interval value"), \
533 errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535
536 /*****************************************************************************
537 * KeyWord definitions
538 *****************************************************************************/
539
540 /* ----------
541 * Suffixes (FormatNode.suffix is an OR of these codes)
542 * ----------
543 */
544 #define DCH_S_FM 0x01
545 #define DCH_S_TH 0x02
546 #define DCH_S_th 0x04
547 #define DCH_S_SP 0x08
548 #define DCH_S_TM 0x10
549
550 /* ----------
551 * Suffix tests
552 * ----------
553 */
554 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
563
564 /* ----------
565 * Suffixes definition for DATE-TIME TO/FROM CHAR
566 * ----------
567 */
568 #define TM_SUFFIX_LEN 2
569
570 static const KeySuffix DCH_suff[] = {
571 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578 /* last */
579 {NULL, 0, 0, 0}
580 };
581
582
583 /* ----------
584 * Format-pictures (KeyWord).
585 *
586 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587 * complicated -to-> easy:
588 *
589 * (example: "DDD","DD","Day","D" )
590 *
591 * (this specific sort needs the algorithm for sequential search for strings,
592 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593 * or "HH12"? You must first try "HH12", because "HH" is in string, but
594 * it is not good.
595 *
596 * (!)
597 * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598 * (!)
599 *
600 * For fast search is used the 'int index[]', index is ascii table from position
601 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602 * position or -1 if char is not used in the KeyWord. Search example for
603 * string "MM":
604 * 1) see in index to index['M' - 32],
605 * 2) take keywords position (enum DCH_MI) from index
606 * 3) run sequential search in keywords[] from this position
607 *
608 * ----------
609 */
610
611 typedef enum
612 {
613 DCH_A_D,
614 DCH_A_M,
615 DCH_AD,
616 DCH_AM,
617 DCH_B_C,
618 DCH_BC,
619 DCH_CC,
620 DCH_DAY,
621 DCH_DDD,
622 DCH_DD,
623 DCH_DY,
624 DCH_Day,
625 DCH_Dy,
626 DCH_D,
627 DCH_FF1,
628 DCH_FF2,
629 DCH_FF3,
630 DCH_FF4,
631 DCH_FF5,
632 DCH_FF6,
633 DCH_FX, /* global suffix */
634 DCH_HH24,
635 DCH_HH12,
636 DCH_HH,
637 DCH_IDDD,
638 DCH_ID,
639 DCH_IW,
640 DCH_IYYY,
641 DCH_IYY,
642 DCH_IY,
643 DCH_I,
644 DCH_J,
645 DCH_MI,
646 DCH_MM,
647 DCH_MONTH,
648 DCH_MON,
649 DCH_MS,
650 DCH_Month,
651 DCH_Mon,
652 DCH_OF,
653 DCH_P_M,
654 DCH_PM,
655 DCH_Q,
656 DCH_RM,
657 DCH_SSSSS,
658 DCH_SSSS,
659 DCH_SS,
660 DCH_TZH,
661 DCH_TZM,
662 DCH_TZ,
663 DCH_US,
664 DCH_WW,
665 DCH_W,
666 DCH_Y_YYY,
667 DCH_YYYY,
668 DCH_YYY,
669 DCH_YY,
670 DCH_Y,
671 DCH_a_d,
672 DCH_a_m,
673 DCH_ad,
674 DCH_am,
675 DCH_b_c,
676 DCH_bc,
677 DCH_cc,
678 DCH_day,
679 DCH_ddd,
680 DCH_dd,
681 DCH_dy,
682 DCH_d,
683 DCH_ff1,
684 DCH_ff2,
685 DCH_ff3,
686 DCH_ff4,
687 DCH_ff5,
688 DCH_ff6,
689 DCH_fx,
690 DCH_hh24,
691 DCH_hh12,
692 DCH_hh,
693 DCH_iddd,
694 DCH_id,
695 DCH_iw,
696 DCH_iyyy,
697 DCH_iyy,
698 DCH_iy,
699 DCH_i,
700 DCH_j,
701 DCH_mi,
702 DCH_mm,
703 DCH_month,
704 DCH_mon,
705 DCH_ms,
706 DCH_p_m,
707 DCH_pm,
708 DCH_q,
709 DCH_rm,
710 DCH_sssss,
711 DCH_ssss,
712 DCH_ss,
713 DCH_tz,
714 DCH_us,
715 DCH_ww,
716 DCH_w,
717 DCH_y_yyy,
718 DCH_yyyy,
719 DCH_yyy,
720 DCH_yy,
721 DCH_y,
722
723 /* last */
724 _DCH_last_
725 } DCH_poz;
726
727 typedef enum
728 {
729 NUM_COMMA,
730 NUM_DEC,
731 NUM_0,
732 NUM_9,
733 NUM_B,
734 NUM_C,
735 NUM_D,
736 NUM_E,
737 NUM_FM,
738 NUM_G,
739 NUM_L,
740 NUM_MI,
741 NUM_PL,
742 NUM_PR,
743 NUM_RN,
744 NUM_SG,
745 NUM_SP,
746 NUM_S,
747 NUM_TH,
748 NUM_V,
749 NUM_b,
750 NUM_c,
751 NUM_d,
752 NUM_e,
753 NUM_fm,
754 NUM_g,
755 NUM_l,
756 NUM_mi,
757 NUM_pl,
758 NUM_pr,
759 NUM_rn,
760 NUM_sg,
761 NUM_sp,
762 NUM_s,
763 NUM_th,
764 NUM_v,
765
766 /* last */
767 _NUM_last_
768 } NUM_poz;
769
770 /* ----------
771 * KeyWords for DATE-TIME version
772 * ----------
773 */
774 static const KeyWord DCH_keywords[] = {
775 /* name, len, id, is_digit, date_mode */
776 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
777 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
781 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
783 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
784 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790 {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
791 {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792 {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793 {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794 {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795 {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
798 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
801 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
809 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
816 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
817 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820 {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
821 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
824 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
827 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
828 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
830 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
835 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
839 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
841 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
842 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846 {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
847 {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848 {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849 {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850 {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851 {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
854 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
857 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
865 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
870 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873 {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
874 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
877 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
878 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
879 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
881 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885
886 /* last */
887 {NULL, 0, 0, 0, 0}
888 };
889
890 /* ----------
891 * KeyWords for NUMBER version
892 *
893 * The is_digit and date_mode fields are not relevant here.
894 * ----------
895 */
896 static const KeyWord NUM_keywords[] = {
897 /* name, len, id is in Index */
898 {",", 1, NUM_COMMA}, /* , */
899 {".", 1, NUM_DEC}, /* . */
900 {"0", 1, NUM_0}, /* 0 */
901 {"9", 1, NUM_9}, /* 9 */
902 {"B", 1, NUM_B}, /* B */
903 {"C", 1, NUM_C}, /* C */
904 {"D", 1, NUM_D}, /* D */
905 {"EEEE", 4, NUM_E}, /* E */
906 {"FM", 2, NUM_FM}, /* F */
907 {"G", 1, NUM_G}, /* G */
908 {"L", 1, NUM_L}, /* L */
909 {"MI", 2, NUM_MI}, /* M */
910 {"PL", 2, NUM_PL}, /* P */
911 {"PR", 2, NUM_PR},
912 {"RN", 2, NUM_RN}, /* R */
913 {"SG", 2, NUM_SG}, /* S */
914 {"SP", 2, NUM_SP},
915 {"S", 1, NUM_S},
916 {"TH", 2, NUM_TH}, /* T */
917 {"V", 1, NUM_V}, /* V */
918 {"b", 1, NUM_B}, /* b */
919 {"c", 1, NUM_C}, /* c */
920 {"d", 1, NUM_D}, /* d */
921 {"eeee", 4, NUM_E}, /* e */
922 {"fm", 2, NUM_FM}, /* f */
923 {"g", 1, NUM_G}, /* g */
924 {"l", 1, NUM_L}, /* l */
925 {"mi", 2, NUM_MI}, /* m */
926 {"pl", 2, NUM_PL}, /* p */
927 {"pr", 2, NUM_PR},
928 {"rn", 2, NUM_rn}, /* r */
929 {"sg", 2, NUM_SG}, /* s */
930 {"sp", 2, NUM_SP},
931 {"s", 1, NUM_S},
932 {"th", 2, NUM_th}, /* t */
933 {"v", 1, NUM_V}, /* v */
934
935 /* last */
936 {NULL, 0, 0}
937 };
938
939
940 /* ----------
941 * KeyWords index for DATE-TIME version
942 * ----------
943 */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0 1 2 3 4 5 6 7 8 9
947 */
948 /*---- first 0..31 chars are skipped ----*/
949
950 -1, -1, -1, -1, -1, -1, -1, -1,
951 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954 DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
955 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
956 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957 DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958 -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959 -1, DCH_y_yyy, -1, -1, -1, -1
960
961 /*---- chars over 126 are skipped ----*/
962 };
963
964 /* ----------
965 * KeyWords index for NUMBER version
966 * ----------
967 */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0 1 2 3 4 5 6 7 8 9
971 */
972 /*---- first 0..31 chars are skipped ----*/
973
974 -1, -1, -1, -1, -1, -1, -1, -1,
975 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983 -1, -1, -1, -1, -1, -1
984
985 /*---- chars over 126 are skipped ----*/
986 };
987
988 /* ----------
989 * Number processor struct
990 * ----------
991 */
992 typedef struct NUMProc
993 {
994 bool is_to_char;
995 NUMDesc *Num; /* number description */
996
997 int sign, /* '-' or '+' */
998 sign_wrote, /* was sign write */
999 num_count, /* number of write digits */
1000 num_in, /* is inside number */
1001 num_curr, /* current position in number */
1002 out_pre_spaces, /* spaces before first digit */
1003
1004 read_dec, /* to_number - was read dec. point */
1005 read_post, /* to_number - number of dec. digit */
1006 read_pre; /* to_number - number non-dec. digit */
1007
1008 char *number, /* string with number */
1009 *number_p, /* pointer to current number position */
1010 *inout, /* in / out buffer */
1011 *inout_p, /* pointer to current inout position */
1012 *last_relevant, /* last relevant number after decimal point */
1013
1014 *L_negative_sign, /* Locale */
1015 *L_positive_sign,
1016 *decimal,
1017 *L_thousands_sep,
1018 *L_currency_symbol;
1019 } NUMProc;
1020
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED 0x01
1023 #define DCH_TIMED 0x02
1024 #define DCH_ZONED 0x04
1025
1026 /* ----------
1027 * Functions
1028 * ----------
1029 */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031 const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039 TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041 Oid collid, bool std, bool *have_error);
1042
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int adjust_partial_year_to_2020(int year);
1051 static int strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053 bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055 bool *have_error);
1056 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1057 FormatNode *node, bool *have_error);
1058 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059 bool *have_error);
1060 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int seq_search_localized(const char *name, char **array, int *len,
1062 Oid collid);
1063 static int from_char_seq_search(int *dest, const char **src,
1064 const char *const *array,
1065 char **localized_array, Oid collid,
1066 FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068 struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069 uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078 char *number, int input_len, int to_char_out_pre_spaces,
1079 int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086
1087
1088 /* ----------
1089 * Fast sequential search, use index for data selection which
1090 * go to seq. cycle (it is very fast for unwanted strings)
1091 * (can't be used binary search in format parsing)
1092 * ----------
1093 */
1094 static const KeyWord *
index_seq_search(const char * str,const KeyWord * kw,const int * index)1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097 int poz;
1098
1099 if (!KeyWord_INDEX_FILTER(*str))
1100 return NULL;
1101
1102 if ((poz = *(index + (*str - ' '))) > -1)
1103 {
1104 const KeyWord *k = kw + poz;
1105
1106 do
1107 {
1108 if (strncmp(str, k->name, k->len) == 0)
1109 return k;
1110 k++;
1111 if (!k->name)
1112 return NULL;
1113 } while (*str == *k->name);
1114 }
1115 return NULL;
1116 }
1117
1118 static const KeySuffix *
suff_search(const char * str,const KeySuffix * suf,int type)1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121 const KeySuffix *s;
1122
1123 for (s = suf; s->name != NULL; s++)
1124 {
1125 if (s->type != type)
1126 continue;
1127
1128 if (strncmp(str, s->name, s->len) == 0)
1129 return s;
1130 }
1131 return NULL;
1132 }
1133
1134 static bool
is_separator_char(const char * str)1135 is_separator_char(const char *str)
1136 {
1137 /* ASCII printable character, but not letter or digit */
1138 return (*str > 0x20 && *str < 0x7F &&
1139 !(*str >= 'A' && *str <= 'Z') &&
1140 !(*str >= 'a' && *str <= 'z') &&
1141 !(*str >= '0' && *str <= '9'));
1142 }
1143
1144 /* ----------
1145 * Prepare NUMDesc (number description struct) via FormatNode struct
1146 * ----------
1147 */
1148 static void
NUMDesc_prepare(NUMDesc * num,FormatNode * n)1149 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1150 {
1151 if (n->type != NODE_TYPE_ACTION)
1152 return;
1153
1154 if (IS_EEEE(num) && n->key->id != NUM_E)
1155 ereport(ERROR,
1156 (errcode(ERRCODE_SYNTAX_ERROR),
1157 errmsg("\"EEEE\" must be the last pattern used")));
1158
1159 switch (n->key->id)
1160 {
1161 case NUM_9:
1162 if (IS_BRACKET(num))
1163 ereport(ERROR,
1164 (errcode(ERRCODE_SYNTAX_ERROR),
1165 errmsg("\"9\" must be ahead of \"PR\"")));
1166 if (IS_MULTI(num))
1167 {
1168 ++num->multi;
1169 break;
1170 }
1171 if (IS_DECIMAL(num))
1172 ++num->post;
1173 else
1174 ++num->pre;
1175 break;
1176
1177 case NUM_0:
1178 if (IS_BRACKET(num))
1179 ereport(ERROR,
1180 (errcode(ERRCODE_SYNTAX_ERROR),
1181 errmsg("\"0\" must be ahead of \"PR\"")));
1182 if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183 {
1184 num->flag |= NUM_F_ZERO;
1185 num->zero_start = num->pre + 1;
1186 }
1187 if (!IS_DECIMAL(num))
1188 ++num->pre;
1189 else
1190 ++num->post;
1191
1192 num->zero_end = num->pre + num->post;
1193 break;
1194
1195 case NUM_B:
1196 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197 num->flag |= NUM_F_BLANK;
1198 break;
1199
1200 case NUM_D:
1201 num->flag |= NUM_F_LDECIMAL;
1202 num->need_locale = true;
1203 /* FALLTHROUGH */
1204 case NUM_DEC:
1205 if (IS_DECIMAL(num))
1206 ereport(ERROR,
1207 (errcode(ERRCODE_SYNTAX_ERROR),
1208 errmsg("multiple decimal points")));
1209 if (IS_MULTI(num))
1210 ereport(ERROR,
1211 (errcode(ERRCODE_SYNTAX_ERROR),
1212 errmsg("cannot use \"V\" and decimal point together")));
1213 num->flag |= NUM_F_DECIMAL;
1214 break;
1215
1216 case NUM_FM:
1217 num->flag |= NUM_F_FILLMODE;
1218 break;
1219
1220 case NUM_S:
1221 if (IS_LSIGN(num))
1222 ereport(ERROR,
1223 (errcode(ERRCODE_SYNTAX_ERROR),
1224 errmsg("cannot use \"S\" twice")));
1225 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226 ereport(ERROR,
1227 (errcode(ERRCODE_SYNTAX_ERROR),
1228 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229 if (!IS_DECIMAL(num))
1230 {
1231 num->lsign = NUM_LSIGN_PRE;
1232 num->pre_lsign_num = num->pre;
1233 num->need_locale = true;
1234 num->flag |= NUM_F_LSIGN;
1235 }
1236 else if (num->lsign == NUM_LSIGN_NONE)
1237 {
1238 num->lsign = NUM_LSIGN_POST;
1239 num->need_locale = true;
1240 num->flag |= NUM_F_LSIGN;
1241 }
1242 break;
1243
1244 case NUM_MI:
1245 if (IS_LSIGN(num))
1246 ereport(ERROR,
1247 (errcode(ERRCODE_SYNTAX_ERROR),
1248 errmsg("cannot use \"S\" and \"MI\" together")));
1249 num->flag |= NUM_F_MINUS;
1250 if (IS_DECIMAL(num))
1251 num->flag |= NUM_F_MINUS_POST;
1252 break;
1253
1254 case NUM_PL:
1255 if (IS_LSIGN(num))
1256 ereport(ERROR,
1257 (errcode(ERRCODE_SYNTAX_ERROR),
1258 errmsg("cannot use \"S\" and \"PL\" together")));
1259 num->flag |= NUM_F_PLUS;
1260 if (IS_DECIMAL(num))
1261 num->flag |= NUM_F_PLUS_POST;
1262 break;
1263
1264 case NUM_SG:
1265 if (IS_LSIGN(num))
1266 ereport(ERROR,
1267 (errcode(ERRCODE_SYNTAX_ERROR),
1268 errmsg("cannot use \"S\" and \"SG\" together")));
1269 num->flag |= NUM_F_MINUS;
1270 num->flag |= NUM_F_PLUS;
1271 break;
1272
1273 case NUM_PR:
1274 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275 ereport(ERROR,
1276 (errcode(ERRCODE_SYNTAX_ERROR),
1277 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278 num->flag |= NUM_F_BRACKET;
1279 break;
1280
1281 case NUM_rn:
1282 case NUM_RN:
1283 num->flag |= NUM_F_ROMAN;
1284 break;
1285
1286 case NUM_L:
1287 case NUM_G:
1288 num->need_locale = true;
1289 break;
1290
1291 case NUM_V:
1292 if (IS_DECIMAL(num))
1293 ereport(ERROR,
1294 (errcode(ERRCODE_SYNTAX_ERROR),
1295 errmsg("cannot use \"V\" and decimal point together")));
1296 num->flag |= NUM_F_MULTI;
1297 break;
1298
1299 case NUM_E:
1300 if (IS_EEEE(num))
1301 ereport(ERROR,
1302 (errcode(ERRCODE_SYNTAX_ERROR),
1303 errmsg("cannot use \"EEEE\" twice")));
1304 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306 IS_ROMAN(num) || IS_MULTI(num))
1307 ereport(ERROR,
1308 (errcode(ERRCODE_SYNTAX_ERROR),
1309 errmsg("\"EEEE\" is incompatible with other formats"),
1310 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311 num->flag |= NUM_F_EEEE;
1312 break;
1313 }
1314 }
1315
1316 /* ----------
1317 * Format parser, search small keywords and keyword's suffixes, and make
1318 * format-node tree.
1319 *
1320 * for DATE-TIME & NUMBER version
1321 * ----------
1322 */
1323 static void
parse_format(FormatNode * node,const char * str,const KeyWord * kw,const KeySuffix * suf,const int * index,uint32 flags,NUMDesc * Num)1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327 FormatNode *n;
1328
1329 #ifdef DEBUG_TO_FROM_CHAR
1330 elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332
1333 n = node;
1334
1335 while (*str)
1336 {
1337 int suffix = 0;
1338 const KeySuffix *s;
1339
1340 /*
1341 * Prefix
1342 */
1343 if ((flags & DCH_FLAG) &&
1344 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345 {
1346 suffix |= s->id;
1347 if (s->len)
1348 str += s->len;
1349 }
1350
1351 /*
1352 * Keyword
1353 */
1354 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355 {
1356 n->type = NODE_TYPE_ACTION;
1357 n->suffix = suffix;
1358 if (n->key->len)
1359 str += n->key->len;
1360
1361 /*
1362 * NUM version: Prepare global NUMDesc struct
1363 */
1364 if (flags & NUM_FLAG)
1365 NUMDesc_prepare(Num, n);
1366
1367 /*
1368 * Postfix
1369 */
1370 if ((flags & DCH_FLAG) && *str &&
1371 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372 {
1373 n->suffix |= s->id;
1374 if (s->len)
1375 str += s->len;
1376 }
1377
1378 n++;
1379 }
1380 else if (*str)
1381 {
1382 int chlen;
1383
1384 if ((flags & STD_FLAG) && *str != '"')
1385 {
1386 /*
1387 * Standard mode, allow only following separators: "-./,':; ".
1388 * However, we support double quotes even in standard mode
1389 * (see below). This is our extension of standard mode.
1390 */
1391 if (strchr("-./,':; ", *str) == NULL)
1392 ereport(ERROR,
1393 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1394 errmsg("invalid datetime format separator: \"%s\"",
1395 pnstrdup(str, pg_mblen(str)))));
1396
1397 if (*str == ' ')
1398 n->type = NODE_TYPE_SPACE;
1399 else
1400 n->type = NODE_TYPE_SEPARATOR;
1401
1402 n->character[0] = *str;
1403 n->character[1] = '\0';
1404 n->key = NULL;
1405 n->suffix = 0;
1406 n++;
1407 str++;
1408 }
1409 else if (*str == '"')
1410 {
1411 /*
1412 * Process double-quoted literal string, if any
1413 */
1414 str++;
1415 while (*str)
1416 {
1417 if (*str == '"')
1418 {
1419 str++;
1420 break;
1421 }
1422 /* backslash quotes the next character, if any */
1423 if (*str == '\\' && *(str + 1))
1424 str++;
1425 chlen = pg_mblen(str);
1426 n->type = NODE_TYPE_CHAR;
1427 memcpy(n->character, str, chlen);
1428 n->character[chlen] = '\0';
1429 n->key = NULL;
1430 n->suffix = 0;
1431 n++;
1432 str += chlen;
1433 }
1434 }
1435 else
1436 {
1437 /*
1438 * Outside double-quoted strings, backslash is only special if
1439 * it immediately precedes a double quote.
1440 */
1441 if (*str == '\\' && *(str + 1) == '"')
1442 str++;
1443 chlen = pg_mblen(str);
1444
1445 if ((flags & DCH_FLAG) && is_separator_char(str))
1446 n->type = NODE_TYPE_SEPARATOR;
1447 else if (isspace((unsigned char) *str))
1448 n->type = NODE_TYPE_SPACE;
1449 else
1450 n->type = NODE_TYPE_CHAR;
1451
1452 memcpy(n->character, str, chlen);
1453 n->character[chlen] = '\0';
1454 n->key = NULL;
1455 n->suffix = 0;
1456 n++;
1457 str += chlen;
1458 }
1459 }
1460 }
1461
1462 n->type = NODE_TYPE_END;
1463 n->suffix = 0;
1464 }
1465
1466 /* ----------
1467 * DEBUG: Dump the FormatNode Tree (debug)
1468 * ----------
1469 */
1470 #ifdef DEBUG_TO_FROM_CHAR
1471
1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1473 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1474
1475 static void
dump_node(FormatNode * node,int max)1476 dump_node(FormatNode *node, int max)
1477 {
1478 FormatNode *n;
1479 int a;
1480
1481 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1482
1483 for (a = 0, n = node; a <= max; n++, a++)
1484 {
1485 if (n->type == NODE_TYPE_ACTION)
1486 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1487 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1488 else if (n->type == NODE_TYPE_CHAR)
1489 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1490 a, n->character);
1491 else if (n->type == NODE_TYPE_END)
1492 {
1493 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1494 return;
1495 }
1496 else
1497 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1498 }
1499 }
1500 #endif /* DEBUG */
1501
1502 /*****************************************************************************
1503 * Private utils
1504 *****************************************************************************/
1505
1506 /* ----------
1507 * Return ST/ND/RD/TH for simple (1..9) numbers
1508 * type --> 0 upper, 1 lower
1509 * ----------
1510 */
1511 static const char *
get_th(char * num,int type)1512 get_th(char *num, int type)
1513 {
1514 int len = strlen(num),
1515 last,
1516 seclast;
1517
1518 last = *(num + (len - 1));
1519 if (!isdigit((unsigned char) last))
1520 ereport(ERROR,
1521 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1522 errmsg("\"%s\" is not a number", num)));
1523
1524 /*
1525 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1526 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1527 */
1528 if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1529 last = 0;
1530
1531 switch (last)
1532 {
1533 case '1':
1534 if (type == TH_UPPER)
1535 return numTH[0];
1536 return numth[0];
1537 case '2':
1538 if (type == TH_UPPER)
1539 return numTH[1];
1540 return numth[1];
1541 case '3':
1542 if (type == TH_UPPER)
1543 return numTH[2];
1544 return numth[2];
1545 default:
1546 if (type == TH_UPPER)
1547 return numTH[3];
1548 return numth[3];
1549 }
1550 }
1551
1552 /* ----------
1553 * Convert string-number to ordinal string-number
1554 * type --> 0 upper, 1 lower
1555 * ----------
1556 */
1557 static char *
str_numth(char * dest,char * num,int type)1558 str_numth(char *dest, char *num, int type)
1559 {
1560 if (dest != num)
1561 strcpy(dest, num);
1562 strcat(dest, get_th(num, type));
1563 return dest;
1564 }
1565
1566 /*****************************************************************************
1567 * upper/lower/initcap functions
1568 *****************************************************************************/
1569
1570 #ifdef USE_ICU
1571
1572 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1573 const UChar *src, int32_t srcLength,
1574 const char *locale,
1575 UErrorCode *pErrorCode);
1576
1577 static int32_t
icu_convert_case(ICU_Convert_Func func,pg_locale_t mylocale,UChar ** buff_dest,UChar * buff_source,int32_t len_source)1578 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1579 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1580 {
1581 UErrorCode status;
1582 int32_t len_dest;
1583
1584 len_dest = len_source; /* try first with same length */
1585 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1586 status = U_ZERO_ERROR;
1587 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1588 mylocale->info.icu.locale, &status);
1589 if (status == U_BUFFER_OVERFLOW_ERROR)
1590 {
1591 /* try again with adjusted length */
1592 pfree(*buff_dest);
1593 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1594 status = U_ZERO_ERROR;
1595 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1596 mylocale->info.icu.locale, &status);
1597 }
1598 if (U_FAILURE(status))
1599 ereport(ERROR,
1600 (errmsg("case conversion failed: %s", u_errorName(status))));
1601 return len_dest;
1602 }
1603
1604 static int32_t
u_strToTitle_default_BI(UChar * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,const char * locale,UErrorCode * pErrorCode)1605 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1606 const UChar *src, int32_t srcLength,
1607 const char *locale,
1608 UErrorCode *pErrorCode)
1609 {
1610 return u_strToTitle(dest, destCapacity, src, srcLength,
1611 NULL, locale, pErrorCode);
1612 }
1613
1614 #endif /* USE_ICU */
1615
1616 /*
1617 * If the system provides the needed functions for wide-character manipulation
1618 * (which are all standardized by C99), then we implement upper/lower/initcap
1619 * using wide-character functions, if necessary. Otherwise we use the
1620 * traditional <ctype.h> functions, which of course will not work as desired
1621 * in multibyte character sets. Note that in either case we are effectively
1622 * assuming that the database character encoding matches the encoding implied
1623 * by LC_CTYPE.
1624 *
1625 * If the system provides locale_t and associated functions (which are
1626 * standardized by Open Group's XBD), we can support collations that are
1627 * neither default nor C. The code is written to handle both combinations
1628 * of have-wide-characters and have-locale_t, though it's rather unlikely
1629 * a platform would have the latter without the former.
1630 */
1631
1632 /*
1633 * collation-aware, wide-character-aware lower function
1634 *
1635 * We pass the number of bytes so we can pass varlena and char*
1636 * to this function. The result is a palloc'd, null-terminated string.
1637 */
1638 char *
str_tolower(const char * buff,size_t nbytes,Oid collid)1639 str_tolower(const char *buff, size_t nbytes, Oid collid)
1640 {
1641 char *result;
1642
1643 if (!buff)
1644 return NULL;
1645
1646 /* C/POSIX collations use this path regardless of database encoding */
1647 if (lc_ctype_is_c(collid))
1648 {
1649 result = asc_tolower(buff, nbytes);
1650 }
1651 else
1652 {
1653 pg_locale_t mylocale = 0;
1654
1655 if (collid != DEFAULT_COLLATION_OID)
1656 {
1657 if (!OidIsValid(collid))
1658 {
1659 /*
1660 * This typically means that the parser could not resolve a
1661 * conflict of implicit collations, so report it that way.
1662 */
1663 ereport(ERROR,
1664 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1665 errmsg("could not determine which collation to use for %s function",
1666 "lower()"),
1667 errhint("Use the COLLATE clause to set the collation explicitly.")));
1668 }
1669 mylocale = pg_newlocale_from_collation(collid);
1670 }
1671
1672 #ifdef USE_ICU
1673 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1674 {
1675 int32_t len_uchar;
1676 int32_t len_conv;
1677 UChar *buff_uchar;
1678 UChar *buff_conv;
1679
1680 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1681 len_conv = icu_convert_case(u_strToLower, mylocale,
1682 &buff_conv, buff_uchar, len_uchar);
1683 icu_from_uchar(&result, buff_conv, len_conv);
1684 pfree(buff_uchar);
1685 pfree(buff_conv);
1686 }
1687 else
1688 #endif
1689 {
1690 if (pg_database_encoding_max_length() > 1)
1691 {
1692 wchar_t *workspace;
1693 size_t curr_char;
1694 size_t result_size;
1695
1696 /* Overflow paranoia */
1697 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1698 ereport(ERROR,
1699 (errcode(ERRCODE_OUT_OF_MEMORY),
1700 errmsg("out of memory")));
1701
1702 /* Output workspace cannot have more codes than input bytes */
1703 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1704
1705 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1706
1707 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1708 {
1709 #ifdef HAVE_LOCALE_T
1710 if (mylocale)
1711 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1712 else
1713 #endif
1714 workspace[curr_char] = towlower(workspace[curr_char]);
1715 }
1716
1717 /*
1718 * Make result large enough; case change might change number
1719 * of bytes
1720 */
1721 result_size = curr_char * pg_database_encoding_max_length() + 1;
1722 result = palloc(result_size);
1723
1724 wchar2char(result, workspace, result_size, mylocale);
1725 pfree(workspace);
1726 }
1727 else
1728 {
1729 char *p;
1730
1731 result = pnstrdup(buff, nbytes);
1732
1733 /*
1734 * Note: we assume that tolower_l() will not be so broken as
1735 * to need an isupper_l() guard test. When using the default
1736 * collation, we apply the traditional Postgres behavior that
1737 * forces ASCII-style treatment of I/i, but in non-default
1738 * collations you get exactly what the collation says.
1739 */
1740 for (p = result; *p; p++)
1741 {
1742 #ifdef HAVE_LOCALE_T
1743 if (mylocale)
1744 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1745 else
1746 #endif
1747 *p = pg_tolower((unsigned char) *p);
1748 }
1749 }
1750 }
1751 }
1752
1753 return result;
1754 }
1755
1756 /*
1757 * collation-aware, wide-character-aware upper function
1758 *
1759 * We pass the number of bytes so we can pass varlena and char*
1760 * to this function. The result is a palloc'd, null-terminated string.
1761 */
1762 char *
str_toupper(const char * buff,size_t nbytes,Oid collid)1763 str_toupper(const char *buff, size_t nbytes, Oid collid)
1764 {
1765 char *result;
1766
1767 if (!buff)
1768 return NULL;
1769
1770 /* C/POSIX collations use this path regardless of database encoding */
1771 if (lc_ctype_is_c(collid))
1772 {
1773 result = asc_toupper(buff, nbytes);
1774 }
1775 else
1776 {
1777 pg_locale_t mylocale = 0;
1778
1779 if (collid != DEFAULT_COLLATION_OID)
1780 {
1781 if (!OidIsValid(collid))
1782 {
1783 /*
1784 * This typically means that the parser could not resolve a
1785 * conflict of implicit collations, so report it that way.
1786 */
1787 ereport(ERROR,
1788 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1789 errmsg("could not determine which collation to use for %s function",
1790 "upper()"),
1791 errhint("Use the COLLATE clause to set the collation explicitly.")));
1792 }
1793 mylocale = pg_newlocale_from_collation(collid);
1794 }
1795
1796 #ifdef USE_ICU
1797 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1798 {
1799 int32_t len_uchar,
1800 len_conv;
1801 UChar *buff_uchar;
1802 UChar *buff_conv;
1803
1804 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1805 len_conv = icu_convert_case(u_strToUpper, mylocale,
1806 &buff_conv, buff_uchar, len_uchar);
1807 icu_from_uchar(&result, buff_conv, len_conv);
1808 pfree(buff_uchar);
1809 pfree(buff_conv);
1810 }
1811 else
1812 #endif
1813 {
1814 if (pg_database_encoding_max_length() > 1)
1815 {
1816 wchar_t *workspace;
1817 size_t curr_char;
1818 size_t result_size;
1819
1820 /* Overflow paranoia */
1821 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1822 ereport(ERROR,
1823 (errcode(ERRCODE_OUT_OF_MEMORY),
1824 errmsg("out of memory")));
1825
1826 /* Output workspace cannot have more codes than input bytes */
1827 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1828
1829 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1830
1831 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1832 {
1833 #ifdef HAVE_LOCALE_T
1834 if (mylocale)
1835 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1836 else
1837 #endif
1838 workspace[curr_char] = towupper(workspace[curr_char]);
1839 }
1840
1841 /*
1842 * Make result large enough; case change might change number
1843 * of bytes
1844 */
1845 result_size = curr_char * pg_database_encoding_max_length() + 1;
1846 result = palloc(result_size);
1847
1848 wchar2char(result, workspace, result_size, mylocale);
1849 pfree(workspace);
1850 }
1851 else
1852 {
1853 char *p;
1854
1855 result = pnstrdup(buff, nbytes);
1856
1857 /*
1858 * Note: we assume that toupper_l() will not be so broken as
1859 * to need an islower_l() guard test. When using the default
1860 * collation, we apply the traditional Postgres behavior that
1861 * forces ASCII-style treatment of I/i, but in non-default
1862 * collations you get exactly what the collation says.
1863 */
1864 for (p = result; *p; p++)
1865 {
1866 #ifdef HAVE_LOCALE_T
1867 if (mylocale)
1868 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1869 else
1870 #endif
1871 *p = pg_toupper((unsigned char) *p);
1872 }
1873 }
1874 }
1875 }
1876
1877 return result;
1878 }
1879
1880 /*
1881 * collation-aware, wide-character-aware initcap function
1882 *
1883 * We pass the number of bytes so we can pass varlena and char*
1884 * to this function. The result is a palloc'd, null-terminated string.
1885 */
1886 char *
str_initcap(const char * buff,size_t nbytes,Oid collid)1887 str_initcap(const char *buff, size_t nbytes, Oid collid)
1888 {
1889 char *result;
1890 int wasalnum = false;
1891
1892 if (!buff)
1893 return NULL;
1894
1895 /* C/POSIX collations use this path regardless of database encoding */
1896 if (lc_ctype_is_c(collid))
1897 {
1898 result = asc_initcap(buff, nbytes);
1899 }
1900 else
1901 {
1902 pg_locale_t mylocale = 0;
1903
1904 if (collid != DEFAULT_COLLATION_OID)
1905 {
1906 if (!OidIsValid(collid))
1907 {
1908 /*
1909 * This typically means that the parser could not resolve a
1910 * conflict of implicit collations, so report it that way.
1911 */
1912 ereport(ERROR,
1913 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1914 errmsg("could not determine which collation to use for %s function",
1915 "initcap()"),
1916 errhint("Use the COLLATE clause to set the collation explicitly.")));
1917 }
1918 mylocale = pg_newlocale_from_collation(collid);
1919 }
1920
1921 #ifdef USE_ICU
1922 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1923 {
1924 int32_t len_uchar,
1925 len_conv;
1926 UChar *buff_uchar;
1927 UChar *buff_conv;
1928
1929 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1930 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1931 &buff_conv, buff_uchar, len_uchar);
1932 icu_from_uchar(&result, buff_conv, len_conv);
1933 pfree(buff_uchar);
1934 pfree(buff_conv);
1935 }
1936 else
1937 #endif
1938 {
1939 if (pg_database_encoding_max_length() > 1)
1940 {
1941 wchar_t *workspace;
1942 size_t curr_char;
1943 size_t result_size;
1944
1945 /* Overflow paranoia */
1946 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1947 ereport(ERROR,
1948 (errcode(ERRCODE_OUT_OF_MEMORY),
1949 errmsg("out of memory")));
1950
1951 /* Output workspace cannot have more codes than input bytes */
1952 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1953
1954 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1955
1956 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1957 {
1958 #ifdef HAVE_LOCALE_T
1959 if (mylocale)
1960 {
1961 if (wasalnum)
1962 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1963 else
1964 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1965 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1966 }
1967 else
1968 #endif
1969 {
1970 if (wasalnum)
1971 workspace[curr_char] = towlower(workspace[curr_char]);
1972 else
1973 workspace[curr_char] = towupper(workspace[curr_char]);
1974 wasalnum = iswalnum(workspace[curr_char]);
1975 }
1976 }
1977
1978 /*
1979 * Make result large enough; case change might change number
1980 * of bytes
1981 */
1982 result_size = curr_char * pg_database_encoding_max_length() + 1;
1983 result = palloc(result_size);
1984
1985 wchar2char(result, workspace, result_size, mylocale);
1986 pfree(workspace);
1987 }
1988 else
1989 {
1990 char *p;
1991
1992 result = pnstrdup(buff, nbytes);
1993
1994 /*
1995 * Note: we assume that toupper_l()/tolower_l() will not be so
1996 * broken as to need guard tests. When using the default
1997 * collation, we apply the traditional Postgres behavior that
1998 * forces ASCII-style treatment of I/i, but in non-default
1999 * collations you get exactly what the collation says.
2000 */
2001 for (p = result; *p; p++)
2002 {
2003 #ifdef HAVE_LOCALE_T
2004 if (mylocale)
2005 {
2006 if (wasalnum)
2007 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2008 else
2009 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2010 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2011 }
2012 else
2013 #endif
2014 {
2015 if (wasalnum)
2016 *p = pg_tolower((unsigned char) *p);
2017 else
2018 *p = pg_toupper((unsigned char) *p);
2019 wasalnum = isalnum((unsigned char) *p);
2020 }
2021 }
2022 }
2023 }
2024 }
2025
2026 return result;
2027 }
2028
2029 /*
2030 * ASCII-only lower function
2031 *
2032 * We pass the number of bytes so we can pass varlena and char*
2033 * to this function. The result is a palloc'd, null-terminated string.
2034 */
2035 char *
asc_tolower(const char * buff,size_t nbytes)2036 asc_tolower(const char *buff, size_t nbytes)
2037 {
2038 char *result;
2039 char *p;
2040
2041 if (!buff)
2042 return NULL;
2043
2044 result = pnstrdup(buff, nbytes);
2045
2046 for (p = result; *p; p++)
2047 *p = pg_ascii_tolower((unsigned char) *p);
2048
2049 return result;
2050 }
2051
2052 /*
2053 * ASCII-only upper function
2054 *
2055 * We pass the number of bytes so we can pass varlena and char*
2056 * to this function. The result is a palloc'd, null-terminated string.
2057 */
2058 char *
asc_toupper(const char * buff,size_t nbytes)2059 asc_toupper(const char *buff, size_t nbytes)
2060 {
2061 char *result;
2062 char *p;
2063
2064 if (!buff)
2065 return NULL;
2066
2067 result = pnstrdup(buff, nbytes);
2068
2069 for (p = result; *p; p++)
2070 *p = pg_ascii_toupper((unsigned char) *p);
2071
2072 return result;
2073 }
2074
2075 /*
2076 * ASCII-only initcap function
2077 *
2078 * We pass the number of bytes so we can pass varlena and char*
2079 * to this function. The result is a palloc'd, null-terminated string.
2080 */
2081 char *
asc_initcap(const char * buff,size_t nbytes)2082 asc_initcap(const char *buff, size_t nbytes)
2083 {
2084 char *result;
2085 char *p;
2086 int wasalnum = false;
2087
2088 if (!buff)
2089 return NULL;
2090
2091 result = pnstrdup(buff, nbytes);
2092
2093 for (p = result; *p; p++)
2094 {
2095 char c;
2096
2097 if (wasalnum)
2098 *p = c = pg_ascii_tolower((unsigned char) *p);
2099 else
2100 *p = c = pg_ascii_toupper((unsigned char) *p);
2101 /* we don't trust isalnum() here */
2102 wasalnum = ((c >= 'A' && c <= 'Z') ||
2103 (c >= 'a' && c <= 'z') ||
2104 (c >= '0' && c <= '9'));
2105 }
2106
2107 return result;
2108 }
2109
2110 /* convenience routines for when the input is null-terminated */
2111
2112 static char *
str_tolower_z(const char * buff,Oid collid)2113 str_tolower_z(const char *buff, Oid collid)
2114 {
2115 return str_tolower(buff, strlen(buff), collid);
2116 }
2117
2118 static char *
str_toupper_z(const char * buff,Oid collid)2119 str_toupper_z(const char *buff, Oid collid)
2120 {
2121 return str_toupper(buff, strlen(buff), collid);
2122 }
2123
2124 static char *
str_initcap_z(const char * buff,Oid collid)2125 str_initcap_z(const char *buff, Oid collid)
2126 {
2127 return str_initcap(buff, strlen(buff), collid);
2128 }
2129
2130 static char *
asc_tolower_z(const char * buff)2131 asc_tolower_z(const char *buff)
2132 {
2133 return asc_tolower(buff, strlen(buff));
2134 }
2135
2136 static char *
asc_toupper_z(const char * buff)2137 asc_toupper_z(const char *buff)
2138 {
2139 return asc_toupper(buff, strlen(buff));
2140 }
2141
2142 /* asc_initcap_z is not currently needed */
2143
2144
2145 /* ----------
2146 * Skip TM / th in FROM_CHAR
2147 *
2148 * If S_THth is on, skip two chars, assuming there are two available
2149 * ----------
2150 */
2151 #define SKIP_THth(ptr, _suf) \
2152 do { \
2153 if (S_THth(_suf)) \
2154 { \
2155 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2156 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2157 } \
2158 } while (0)
2159
2160
2161 #ifdef DEBUG_TO_FROM_CHAR
2162 /* -----------
2163 * DEBUG: Call for debug and for index checking; (Show ASCII char
2164 * and defined keyword for each used position
2165 * ----------
2166 */
2167 static void
dump_index(const KeyWord * k,const int * index)2168 dump_index(const KeyWord *k, const int *index)
2169 {
2170 int i,
2171 count = 0,
2172 free_i = 0;
2173
2174 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2175
2176 for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2177 {
2178 if (index[i] != -1)
2179 {
2180 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2181 count++;
2182 }
2183 else
2184 {
2185 free_i++;
2186 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2187 }
2188 }
2189 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2190 count, free_i);
2191 }
2192 #endif /* DEBUG */
2193
2194 /* ----------
2195 * Return true if next format picture is not digit value
2196 * ----------
2197 */
2198 static bool
is_next_separator(FormatNode * n)2199 is_next_separator(FormatNode *n)
2200 {
2201 if (n->type == NODE_TYPE_END)
2202 return false;
2203
2204 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2205 return true;
2206
2207 /*
2208 * Next node
2209 */
2210 n++;
2211
2212 /* end of format string is treated like a non-digit separator */
2213 if (n->type == NODE_TYPE_END)
2214 return true;
2215
2216 if (n->type == NODE_TYPE_ACTION)
2217 {
2218 if (n->key->is_digit)
2219 return false;
2220
2221 return true;
2222 }
2223 else if (n->character[1] == '\0' &&
2224 isdigit((unsigned char) n->character[0]))
2225 return false;
2226
2227 return true; /* some non-digit input (separator) */
2228 }
2229
2230
2231 static int
adjust_partial_year_to_2020(int year)2232 adjust_partial_year_to_2020(int year)
2233 {
2234 /*
2235 * Adjust all dates toward 2020; this is effectively what happens when we
2236 * assume '70' is 1970 and '69' is 2069.
2237 */
2238 /* Force 0-69 into the 2000's */
2239 if (year < 70)
2240 return year + 2000;
2241 /* Force 70-99 into the 1900's */
2242 else if (year < 100)
2243 return year + 1900;
2244 /* Force 100-519 into the 2000's */
2245 else if (year < 520)
2246 return year + 2000;
2247 /* Force 520-999 into the 1000's */
2248 else if (year < 1000)
2249 return year + 1000;
2250 else
2251 return year;
2252 }
2253
2254
2255 static int
strspace_len(const char * str)2256 strspace_len(const char *str)
2257 {
2258 int len = 0;
2259
2260 while (*str && isspace((unsigned char) *str))
2261 {
2262 str++;
2263 len++;
2264 }
2265 return len;
2266 }
2267
2268 /*
2269 * Set the date mode of a from-char conversion.
2270 *
2271 * Puke if the date mode has already been set, and the caller attempts to set
2272 * it to a conflicting mode.
2273 *
2274 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2275 */
2276 static void
from_char_set_mode(TmFromChar * tmfc,const FromCharDateMode mode,bool * have_error)2277 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2278 {
2279 if (mode != FROM_CHAR_DATE_NONE)
2280 {
2281 if (tmfc->mode == FROM_CHAR_DATE_NONE)
2282 tmfc->mode = mode;
2283 else if (tmfc->mode != mode)
2284 RETURN_ERROR(ereport(ERROR,
2285 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2286 errmsg("invalid combination of date conventions"),
2287 errhint("Do not mix Gregorian and ISO week date "
2288 "conventions in a formatting template."))));
2289 }
2290
2291 on_error:
2292 return;
2293 }
2294
2295 /*
2296 * Set the integer pointed to by 'dest' to the given value.
2297 *
2298 * Puke if the destination integer has previously been set to some other
2299 * non-zero value.
2300 *
2301 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2302 */
2303 static void
from_char_set_int(int * dest,const int value,const FormatNode * node,bool * have_error)2304 from_char_set_int(int *dest, const int value, const FormatNode *node,
2305 bool *have_error)
2306 {
2307 if (*dest != 0 && *dest != value)
2308 RETURN_ERROR(ereport(ERROR,
2309 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2310 errmsg("conflicting values for \"%s\" field in "
2311 "formatting string",
2312 node->key->name),
2313 errdetail("This value contradicts a previous setting "
2314 "for the same field type."))));
2315 *dest = value;
2316
2317 on_error:
2318 return;
2319 }
2320
2321 /*
2322 * Read a single integer from the source string, into the int pointed to by
2323 * 'dest'. If 'dest' is NULL, the result is discarded.
2324 *
2325 * In fixed-width mode (the node does not have the FM suffix), consume at most
2326 * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2327 *
2328 * We use strtol() to recover the integer value from the source string, in
2329 * accordance with the given FormatNode.
2330 *
2331 * If the conversion completes successfully, src will have been advanced to
2332 * point at the character immediately following the last character used in the
2333 * conversion.
2334 *
2335 * Return the number of characters consumed.
2336 *
2337 * Note that from_char_parse_int() provides a more convenient wrapper where
2338 * the length of the field is the same as the length of the format keyword (as
2339 * with DD and MI).
2340 *
2341 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2342 * and -1 is returned.
2343 */
2344 static int
from_char_parse_int_len(int * dest,const char ** src,const int len,FormatNode * node,bool * have_error)2345 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2346 bool *have_error)
2347 {
2348 long result;
2349 char copy[DCH_MAX_ITEM_SIZ + 1];
2350 const char *init = *src;
2351 int used;
2352
2353 /*
2354 * Skip any whitespace before parsing the integer.
2355 */
2356 *src += strspace_len(*src);
2357
2358 Assert(len <= DCH_MAX_ITEM_SIZ);
2359 used = (int) strlcpy(copy, *src, len + 1);
2360
2361 if (S_FM(node->suffix) || is_next_separator(node))
2362 {
2363 /*
2364 * This node is in Fill Mode, or the next node is known to be a
2365 * non-digit value, so we just slurp as many characters as we can get.
2366 */
2367 char *endptr;
2368
2369 errno = 0;
2370 result = strtol(init, &endptr, 10);
2371 *src = endptr;
2372 }
2373 else
2374 {
2375 /*
2376 * We need to pull exactly the number of characters given in 'len' out
2377 * of the string, and convert those.
2378 */
2379 char *last;
2380
2381 if (used < len)
2382 RETURN_ERROR(ereport(ERROR,
2383 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2384 errmsg("source string too short for \"%s\" "
2385 "formatting field",
2386 node->key->name),
2387 errdetail("Field requires %d characters, "
2388 "but only %d remain.",
2389 len, used),
2390 errhint("If your source string is not fixed-width, "
2391 "try using the \"FM\" modifier."))));
2392
2393 errno = 0;
2394 result = strtol(copy, &last, 10);
2395 used = last - copy;
2396
2397 if (used > 0 && used < len)
2398 RETURN_ERROR(ereport(ERROR,
2399 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2400 errmsg("invalid value \"%s\" for \"%s\"",
2401 copy, node->key->name),
2402 errdetail("Field requires %d characters, "
2403 "but only %d could be parsed.",
2404 len, used),
2405 errhint("If your source string is not fixed-width, "
2406 "try using the \"FM\" modifier."))));
2407
2408 *src += used;
2409 }
2410
2411 if (*src == init)
2412 RETURN_ERROR(ereport(ERROR,
2413 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2414 errmsg("invalid value \"%s\" for \"%s\"",
2415 copy, node->key->name),
2416 errdetail("Value must be an integer."))));
2417
2418 if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2419 RETURN_ERROR(ereport(ERROR,
2420 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2421 errmsg("value for \"%s\" in source string is out of range",
2422 node->key->name),
2423 errdetail("Value must be in the range %d to %d.",
2424 INT_MIN, INT_MAX))));
2425
2426 if (dest != NULL)
2427 {
2428 from_char_set_int(dest, (int) result, node, have_error);
2429 CHECK_ERROR;
2430 }
2431
2432 return *src - init;
2433
2434 on_error:
2435 return -1;
2436 }
2437
2438 /*
2439 * Call from_char_parse_int_len(), using the length of the format keyword as
2440 * the expected length of the field.
2441 *
2442 * Don't call this function if the field differs in length from the format
2443 * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2444 * In such cases, call from_char_parse_int_len() instead to specify the
2445 * required length explicitly.
2446 */
2447 static int
from_char_parse_int(int * dest,const char ** src,FormatNode * node,bool * have_error)2448 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2449 {
2450 return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2451 }
2452
2453 /*
2454 * Sequentially search null-terminated "array" for a case-insensitive match
2455 * to the initial character(s) of "name".
2456 *
2457 * Returns array index of match, or -1 for no match.
2458 *
2459 * *len is set to the length of the match, or 0 for no match.
2460 *
2461 * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2462 * suitable for comparisons to ASCII strings.
2463 */
2464 static int
seq_search_ascii(const char * name,const char * const * array,int * len)2465 seq_search_ascii(const char *name, const char *const *array, int *len)
2466 {
2467 unsigned char firstc;
2468 const char *const *a;
2469
2470 *len = 0;
2471
2472 /* empty string can't match anything */
2473 if (!*name)
2474 return -1;
2475
2476 /* we handle first char specially to gain some speed */
2477 firstc = pg_ascii_tolower((unsigned char) *name);
2478
2479 for (a = array; *a != NULL; a++)
2480 {
2481 const char *p;
2482 const char *n;
2483
2484 /* compare first chars */
2485 if (pg_ascii_tolower((unsigned char) **a) != firstc)
2486 continue;
2487
2488 /* compare rest of string */
2489 for (p = *a + 1, n = name + 1;; p++, n++)
2490 {
2491 /* return success if we matched whole array entry */
2492 if (*p == '\0')
2493 {
2494 *len = n - name;
2495 return a - array;
2496 }
2497 /* else, must have another character in "name" ... */
2498 if (*n == '\0')
2499 break;
2500 /* ... and it must match */
2501 if (pg_ascii_tolower((unsigned char) *p) !=
2502 pg_ascii_tolower((unsigned char) *n))
2503 break;
2504 }
2505 }
2506
2507 return -1;
2508 }
2509
2510 /*
2511 * Sequentially search an array of possibly non-English words for
2512 * a case-insensitive match to the initial character(s) of "name".
2513 *
2514 * This has the same API as seq_search_ascii(), but we use a more general
2515 * case-folding transformation to achieve case-insensitivity. Case folding
2516 * is done per the rules of the collation identified by "collid".
2517 *
2518 * The array is treated as const, but we don't declare it that way because
2519 * the arrays exported by pg_locale.c aren't const.
2520 */
2521 static int
seq_search_localized(const char * name,char ** array,int * len,Oid collid)2522 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2523 {
2524 char **a;
2525 char *upper_name;
2526 char *lower_name;
2527
2528 *len = 0;
2529
2530 /* empty string can't match anything */
2531 if (!*name)
2532 return -1;
2533
2534 /*
2535 * The case-folding processing done below is fairly expensive, so before
2536 * doing that, make a quick pass to see if there is an exact match.
2537 */
2538 for (a = array; *a != NULL; a++)
2539 {
2540 int element_len = strlen(*a);
2541
2542 if (strncmp(name, *a, element_len) == 0)
2543 {
2544 *len = element_len;
2545 return a - array;
2546 }
2547 }
2548
2549 /*
2550 * Fold to upper case, then to lower case, so that we can match reliably
2551 * even in languages in which case conversions are not injective.
2552 */
2553 upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2554 lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2555 pfree(upper_name);
2556
2557 for (a = array; *a != NULL; a++)
2558 {
2559 char *upper_element;
2560 char *lower_element;
2561 int element_len;
2562
2563 /* Likewise upper/lower-case array element */
2564 upper_element = str_toupper(*a, strlen(*a), collid);
2565 lower_element = str_tolower(upper_element, strlen(upper_element),
2566 collid);
2567 pfree(upper_element);
2568 element_len = strlen(lower_element);
2569
2570 /* Match? */
2571 if (strncmp(lower_name, lower_element, element_len) == 0)
2572 {
2573 *len = element_len;
2574 pfree(lower_element);
2575 pfree(lower_name);
2576 return a - array;
2577 }
2578 pfree(lower_element);
2579 }
2580
2581 pfree(lower_name);
2582 return -1;
2583 }
2584
2585 /*
2586 * Perform a sequential search in 'array' (or 'localized_array', if that's
2587 * not NULL) for an entry matching the first character(s) of the 'src'
2588 * string case-insensitively.
2589 *
2590 * The 'array' is presumed to be English words (all-ASCII), but
2591 * if 'localized_array' is supplied, that might be non-English
2592 * so we need a more expensive case-folding transformation
2593 * (which will follow the rules of the collation 'collid').
2594 *
2595 * If a match is found, copy the array index of the match into the integer
2596 * pointed to by 'dest', advance 'src' to the end of the part of the string
2597 * which matched, and return the number of characters consumed.
2598 *
2599 * If the string doesn't match, throw an error if 'have_error' is NULL,
2600 * otherwise set '*have_error' and return -1.
2601 *
2602 * 'node' is used only for error reports: node->key->name identifies the
2603 * field type we were searching for.
2604 */
2605 static int
from_char_seq_search(int * dest,const char ** src,const char * const * array,char ** localized_array,Oid collid,FormatNode * node,bool * have_error)2606 from_char_seq_search(int *dest, const char **src, const char *const *array,
2607 char **localized_array, Oid collid,
2608 FormatNode *node, bool *have_error)
2609 {
2610 int len;
2611
2612 if (localized_array == NULL)
2613 *dest = seq_search_ascii(*src, array, &len);
2614 else
2615 *dest = seq_search_localized(*src, localized_array, &len, collid);
2616
2617 if (len <= 0)
2618 {
2619 /*
2620 * In the error report, truncate the string at the next whitespace (if
2621 * any) to avoid including irrelevant data.
2622 */
2623 char *copy = pstrdup(*src);
2624 char *c;
2625
2626 for (c = copy; *c; c++)
2627 {
2628 if (scanner_isspace(*c))
2629 {
2630 *c = '\0';
2631 break;
2632 }
2633 }
2634
2635 RETURN_ERROR(ereport(ERROR,
2636 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2637 errmsg("invalid value \"%s\" for \"%s\"",
2638 copy, node->key->name),
2639 errdetail("The given value did not match any of "
2640 "the allowed values for this field."))));
2641 }
2642 *src += len;
2643 return len;
2644
2645 on_error:
2646 return -1;
2647 }
2648
2649 /* ----------
2650 * Process a TmToChar struct as denoted by a list of FormatNodes.
2651 * The formatted data is written to the string pointed to by 'out'.
2652 * ----------
2653 */
2654 static void
DCH_to_char(FormatNode * node,bool is_interval,TmToChar * in,char * out,Oid collid)2655 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2656 {
2657 FormatNode *n;
2658 char *s;
2659 struct pg_tm *tm = &in->tm;
2660 int i;
2661
2662 /* cache localized days and months */
2663 cache_locale_time();
2664
2665 s = out;
2666 for (n = node; n->type != NODE_TYPE_END; n++)
2667 {
2668 if (n->type != NODE_TYPE_ACTION)
2669 {
2670 strcpy(s, n->character);
2671 s += strlen(s);
2672 continue;
2673 }
2674
2675 switch (n->key->id)
2676 {
2677 case DCH_A_M:
2678 case DCH_P_M:
2679 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2680 ? P_M_STR : A_M_STR);
2681 s += strlen(s);
2682 break;
2683 case DCH_AM:
2684 case DCH_PM:
2685 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2686 ? PM_STR : AM_STR);
2687 s += strlen(s);
2688 break;
2689 case DCH_a_m:
2690 case DCH_p_m:
2691 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2692 ? p_m_STR : a_m_STR);
2693 s += strlen(s);
2694 break;
2695 case DCH_am:
2696 case DCH_pm:
2697 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2698 ? pm_STR : am_STR);
2699 s += strlen(s);
2700 break;
2701 case DCH_HH:
2702 case DCH_HH12:
2703
2704 /*
2705 * display time as shown on a 12-hour clock, even for
2706 * intervals
2707 */
2708 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2709 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2710 tm->tm_hour % (HOURS_PER_DAY / 2));
2711 if (S_THth(n->suffix))
2712 str_numth(s, s, S_TH_TYPE(n->suffix));
2713 s += strlen(s);
2714 break;
2715 case DCH_HH24:
2716 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2717 tm->tm_hour);
2718 if (S_THth(n->suffix))
2719 str_numth(s, s, S_TH_TYPE(n->suffix));
2720 s += strlen(s);
2721 break;
2722 case DCH_MI:
2723 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2724 tm->tm_min);
2725 if (S_THth(n->suffix))
2726 str_numth(s, s, S_TH_TYPE(n->suffix));
2727 s += strlen(s);
2728 break;
2729 case DCH_SS:
2730 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2731 tm->tm_sec);
2732 if (S_THth(n->suffix))
2733 str_numth(s, s, S_TH_TYPE(n->suffix));
2734 s += strlen(s);
2735 break;
2736
2737 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2738 sprintf(s, frac_fmt, (int) (frac_val)); \
2739 if (S_THth(n->suffix)) \
2740 str_numth(s, s, S_TH_TYPE(n->suffix)); \
2741 s += strlen(s)
2742
2743 case DCH_FF1: /* tenth of second */
2744 DCH_to_char_fsec("%01d", in->fsec / 100000);
2745 break;
2746 case DCH_FF2: /* hundredth of second */
2747 DCH_to_char_fsec("%02d", in->fsec / 10000);
2748 break;
2749 case DCH_FF3:
2750 case DCH_MS: /* millisecond */
2751 DCH_to_char_fsec("%03d", in->fsec / 1000);
2752 break;
2753 case DCH_FF4: /* tenth of a millisecond */
2754 DCH_to_char_fsec("%04d", in->fsec / 100);
2755 break;
2756 case DCH_FF5: /* hundredth of a millisecond */
2757 DCH_to_char_fsec("%05d", in->fsec / 10);
2758 break;
2759 case DCH_FF6:
2760 case DCH_US: /* microsecond */
2761 DCH_to_char_fsec("%06d", in->fsec);
2762 break;
2763 #undef DCH_to_char_fsec
2764 case DCH_SSSS:
2765 sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2766 tm->tm_min * SECS_PER_MINUTE +
2767 tm->tm_sec);
2768 if (S_THth(n->suffix))
2769 str_numth(s, s, S_TH_TYPE(n->suffix));
2770 s += strlen(s);
2771 break;
2772 case DCH_tz:
2773 INVALID_FOR_INTERVAL;
2774 if (tmtcTzn(in))
2775 {
2776 /* We assume here that timezone names aren't localized */
2777 char *p = asc_tolower_z(tmtcTzn(in));
2778
2779 strcpy(s, p);
2780 pfree(p);
2781 s += strlen(s);
2782 }
2783 break;
2784 case DCH_TZ:
2785 INVALID_FOR_INTERVAL;
2786 if (tmtcTzn(in))
2787 {
2788 strcpy(s, tmtcTzn(in));
2789 s += strlen(s);
2790 }
2791 break;
2792 case DCH_TZH:
2793 INVALID_FOR_INTERVAL;
2794 sprintf(s, "%c%02d",
2795 (tm->tm_gmtoff >= 0) ? '+' : '-',
2796 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2797 s += strlen(s);
2798 break;
2799 case DCH_TZM:
2800 INVALID_FOR_INTERVAL;
2801 sprintf(s, "%02d",
2802 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2803 s += strlen(s);
2804 break;
2805 case DCH_OF:
2806 INVALID_FOR_INTERVAL;
2807 sprintf(s, "%c%0*d",
2808 (tm->tm_gmtoff >= 0) ? '+' : '-',
2809 S_FM(n->suffix) ? 0 : 2,
2810 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2811 s += strlen(s);
2812 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2813 {
2814 sprintf(s, ":%02d",
2815 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2816 s += strlen(s);
2817 }
2818 break;
2819 case DCH_A_D:
2820 case DCH_B_C:
2821 INVALID_FOR_INTERVAL;
2822 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2823 s += strlen(s);
2824 break;
2825 case DCH_AD:
2826 case DCH_BC:
2827 INVALID_FOR_INTERVAL;
2828 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2829 s += strlen(s);
2830 break;
2831 case DCH_a_d:
2832 case DCH_b_c:
2833 INVALID_FOR_INTERVAL;
2834 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2835 s += strlen(s);
2836 break;
2837 case DCH_ad:
2838 case DCH_bc:
2839 INVALID_FOR_INTERVAL;
2840 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2841 s += strlen(s);
2842 break;
2843 case DCH_MONTH:
2844 INVALID_FOR_INTERVAL;
2845 if (!tm->tm_mon)
2846 break;
2847 if (S_TM(n->suffix))
2848 {
2849 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2850
2851 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2852 strcpy(s, str);
2853 else
2854 ereport(ERROR,
2855 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2856 errmsg("localized string format value too long")));
2857 }
2858 else
2859 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2860 asc_toupper_z(months_full[tm->tm_mon - 1]));
2861 s += strlen(s);
2862 break;
2863 case DCH_Month:
2864 INVALID_FOR_INTERVAL;
2865 if (!tm->tm_mon)
2866 break;
2867 if (S_TM(n->suffix))
2868 {
2869 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2870
2871 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2872 strcpy(s, str);
2873 else
2874 ereport(ERROR,
2875 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2876 errmsg("localized string format value too long")));
2877 }
2878 else
2879 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2880 months_full[tm->tm_mon - 1]);
2881 s += strlen(s);
2882 break;
2883 case DCH_month:
2884 INVALID_FOR_INTERVAL;
2885 if (!tm->tm_mon)
2886 break;
2887 if (S_TM(n->suffix))
2888 {
2889 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2890
2891 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2892 strcpy(s, str);
2893 else
2894 ereport(ERROR,
2895 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2896 errmsg("localized string format value too long")));
2897 }
2898 else
2899 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2900 asc_tolower_z(months_full[tm->tm_mon - 1]));
2901 s += strlen(s);
2902 break;
2903 case DCH_MON:
2904 INVALID_FOR_INTERVAL;
2905 if (!tm->tm_mon)
2906 break;
2907 if (S_TM(n->suffix))
2908 {
2909 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2910
2911 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2912 strcpy(s, str);
2913 else
2914 ereport(ERROR,
2915 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2916 errmsg("localized string format value too long")));
2917 }
2918 else
2919 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2920 s += strlen(s);
2921 break;
2922 case DCH_Mon:
2923 INVALID_FOR_INTERVAL;
2924 if (!tm->tm_mon)
2925 break;
2926 if (S_TM(n->suffix))
2927 {
2928 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2929
2930 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2931 strcpy(s, str);
2932 else
2933 ereport(ERROR,
2934 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2935 errmsg("localized string format value too long")));
2936 }
2937 else
2938 strcpy(s, months[tm->tm_mon - 1]);
2939 s += strlen(s);
2940 break;
2941 case DCH_mon:
2942 INVALID_FOR_INTERVAL;
2943 if (!tm->tm_mon)
2944 break;
2945 if (S_TM(n->suffix))
2946 {
2947 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2948
2949 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2950 strcpy(s, str);
2951 else
2952 ereport(ERROR,
2953 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2954 errmsg("localized string format value too long")));
2955 }
2956 else
2957 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2958 s += strlen(s);
2959 break;
2960 case DCH_MM:
2961 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2962 tm->tm_mon);
2963 if (S_THth(n->suffix))
2964 str_numth(s, s, S_TH_TYPE(n->suffix));
2965 s += strlen(s);
2966 break;
2967 case DCH_DAY:
2968 INVALID_FOR_INTERVAL;
2969 if (S_TM(n->suffix))
2970 {
2971 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2972
2973 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2974 strcpy(s, str);
2975 else
2976 ereport(ERROR,
2977 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2978 errmsg("localized string format value too long")));
2979 }
2980 else
2981 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2982 asc_toupper_z(days[tm->tm_wday]));
2983 s += strlen(s);
2984 break;
2985 case DCH_Day:
2986 INVALID_FOR_INTERVAL;
2987 if (S_TM(n->suffix))
2988 {
2989 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2990
2991 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2992 strcpy(s, str);
2993 else
2994 ereport(ERROR,
2995 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2996 errmsg("localized string format value too long")));
2997 }
2998 else
2999 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3000 days[tm->tm_wday]);
3001 s += strlen(s);
3002 break;
3003 case DCH_day:
3004 INVALID_FOR_INTERVAL;
3005 if (S_TM(n->suffix))
3006 {
3007 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3008
3009 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3010 strcpy(s, str);
3011 else
3012 ereport(ERROR,
3013 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3014 errmsg("localized string format value too long")));
3015 }
3016 else
3017 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3018 asc_tolower_z(days[tm->tm_wday]));
3019 s += strlen(s);
3020 break;
3021 case DCH_DY:
3022 INVALID_FOR_INTERVAL;
3023 if (S_TM(n->suffix))
3024 {
3025 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3026
3027 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3028 strcpy(s, str);
3029 else
3030 ereport(ERROR,
3031 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3032 errmsg("localized string format value too long")));
3033 }
3034 else
3035 strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3036 s += strlen(s);
3037 break;
3038 case DCH_Dy:
3039 INVALID_FOR_INTERVAL;
3040 if (S_TM(n->suffix))
3041 {
3042 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3043
3044 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3045 strcpy(s, str);
3046 else
3047 ereport(ERROR,
3048 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3049 errmsg("localized string format value too long")));
3050 }
3051 else
3052 strcpy(s, days_short[tm->tm_wday]);
3053 s += strlen(s);
3054 break;
3055 case DCH_dy:
3056 INVALID_FOR_INTERVAL;
3057 if (S_TM(n->suffix))
3058 {
3059 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3060
3061 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3062 strcpy(s, str);
3063 else
3064 ereport(ERROR,
3065 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3066 errmsg("localized string format value too long")));
3067 }
3068 else
3069 strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3070 s += strlen(s);
3071 break;
3072 case DCH_DDD:
3073 case DCH_IDDD:
3074 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3075 (n->key->id == DCH_DDD) ?
3076 tm->tm_yday :
3077 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3078 if (S_THth(n->suffix))
3079 str_numth(s, s, S_TH_TYPE(n->suffix));
3080 s += strlen(s);
3081 break;
3082 case DCH_DD:
3083 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3084 if (S_THth(n->suffix))
3085 str_numth(s, s, S_TH_TYPE(n->suffix));
3086 s += strlen(s);
3087 break;
3088 case DCH_D:
3089 INVALID_FOR_INTERVAL;
3090 sprintf(s, "%d", tm->tm_wday + 1);
3091 if (S_THth(n->suffix))
3092 str_numth(s, s, S_TH_TYPE(n->suffix));
3093 s += strlen(s);
3094 break;
3095 case DCH_ID:
3096 INVALID_FOR_INTERVAL;
3097 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3098 if (S_THth(n->suffix))
3099 str_numth(s, s, S_TH_TYPE(n->suffix));
3100 s += strlen(s);
3101 break;
3102 case DCH_WW:
3103 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3104 (tm->tm_yday - 1) / 7 + 1);
3105 if (S_THth(n->suffix))
3106 str_numth(s, s, S_TH_TYPE(n->suffix));
3107 s += strlen(s);
3108 break;
3109 case DCH_IW:
3110 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3111 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3112 if (S_THth(n->suffix))
3113 str_numth(s, s, S_TH_TYPE(n->suffix));
3114 s += strlen(s);
3115 break;
3116 case DCH_Q:
3117 if (!tm->tm_mon)
3118 break;
3119 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3120 if (S_THth(n->suffix))
3121 str_numth(s, s, S_TH_TYPE(n->suffix));
3122 s += strlen(s);
3123 break;
3124 case DCH_CC:
3125 if (is_interval) /* straight calculation */
3126 i = tm->tm_year / 100;
3127 else
3128 {
3129 if (tm->tm_year > 0)
3130 /* Century 20 == 1901 - 2000 */
3131 i = (tm->tm_year - 1) / 100 + 1;
3132 else
3133 /* Century 6BC == 600BC - 501BC */
3134 i = tm->tm_year / 100 - 1;
3135 }
3136 if (i <= 99 && i >= -99)
3137 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3138 else
3139 sprintf(s, "%d", i);
3140 if (S_THth(n->suffix))
3141 str_numth(s, s, S_TH_TYPE(n->suffix));
3142 s += strlen(s);
3143 break;
3144 case DCH_Y_YYY:
3145 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3146 sprintf(s, "%d,%03d", i,
3147 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3148 if (S_THth(n->suffix))
3149 str_numth(s, s, S_TH_TYPE(n->suffix));
3150 s += strlen(s);
3151 break;
3152 case DCH_YYYY:
3153 case DCH_IYYY:
3154 sprintf(s, "%0*d",
3155 S_FM(n->suffix) ? 0 :
3156 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3157 (n->key->id == DCH_YYYY ?
3158 ADJUST_YEAR(tm->tm_year, is_interval) :
3159 ADJUST_YEAR(date2isoyear(tm->tm_year,
3160 tm->tm_mon,
3161 tm->tm_mday),
3162 is_interval)));
3163 if (S_THth(n->suffix))
3164 str_numth(s, s, S_TH_TYPE(n->suffix));
3165 s += strlen(s);
3166 break;
3167 case DCH_YYY:
3168 case DCH_IYY:
3169 sprintf(s, "%0*d",
3170 S_FM(n->suffix) ? 0 :
3171 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3172 (n->key->id == DCH_YYY ?
3173 ADJUST_YEAR(tm->tm_year, is_interval) :
3174 ADJUST_YEAR(date2isoyear(tm->tm_year,
3175 tm->tm_mon,
3176 tm->tm_mday),
3177 is_interval)) % 1000);
3178 if (S_THth(n->suffix))
3179 str_numth(s, s, S_TH_TYPE(n->suffix));
3180 s += strlen(s);
3181 break;
3182 case DCH_YY:
3183 case DCH_IY:
3184 sprintf(s, "%0*d",
3185 S_FM(n->suffix) ? 0 :
3186 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3187 (n->key->id == DCH_YY ?
3188 ADJUST_YEAR(tm->tm_year, is_interval) :
3189 ADJUST_YEAR(date2isoyear(tm->tm_year,
3190 tm->tm_mon,
3191 tm->tm_mday),
3192 is_interval)) % 100);
3193 if (S_THth(n->suffix))
3194 str_numth(s, s, S_TH_TYPE(n->suffix));
3195 s += strlen(s);
3196 break;
3197 case DCH_Y:
3198 case DCH_I:
3199 sprintf(s, "%1d",
3200 (n->key->id == DCH_Y ?
3201 ADJUST_YEAR(tm->tm_year, is_interval) :
3202 ADJUST_YEAR(date2isoyear(tm->tm_year,
3203 tm->tm_mon,
3204 tm->tm_mday),
3205 is_interval)) % 10);
3206 if (S_THth(n->suffix))
3207 str_numth(s, s, S_TH_TYPE(n->suffix));
3208 s += strlen(s);
3209 break;
3210 case DCH_RM:
3211 /* FALLTHROUGH */
3212 case DCH_rm:
3213
3214 /*
3215 * For intervals, values like '12 month' will be reduced to 0
3216 * month and some years. These should be processed.
3217 */
3218 if (!tm->tm_mon && !tm->tm_year)
3219 break;
3220 else
3221 {
3222 int mon = 0;
3223 const char *const *months;
3224
3225 if (n->key->id == DCH_RM)
3226 months = rm_months_upper;
3227 else
3228 months = rm_months_lower;
3229
3230 /*
3231 * Compute the position in the roman-numeral array. Note
3232 * that the contents of the array are reversed, December
3233 * being first and January last.
3234 */
3235 if (tm->tm_mon == 0)
3236 {
3237 /*
3238 * This case is special, and tracks the case of full
3239 * interval years.
3240 */
3241 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3242 }
3243 else if (tm->tm_mon < 0)
3244 {
3245 /*
3246 * Negative case. In this case, the calculation is
3247 * reversed, where -1 means December, -2 November,
3248 * etc.
3249 */
3250 mon = -1 * (tm->tm_mon + 1);
3251 }
3252 else
3253 {
3254 /*
3255 * Common case, with a strictly positive value. The
3256 * position in the array matches with the value of
3257 * tm_mon.
3258 */
3259 mon = MONTHS_PER_YEAR - tm->tm_mon;
3260 }
3261
3262 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3263 months[mon]);
3264 s += strlen(s);
3265 }
3266 break;
3267 case DCH_W:
3268 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3269 if (S_THth(n->suffix))
3270 str_numth(s, s, S_TH_TYPE(n->suffix));
3271 s += strlen(s);
3272 break;
3273 case DCH_J:
3274 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3275 if (S_THth(n->suffix))
3276 str_numth(s, s, S_TH_TYPE(n->suffix));
3277 s += strlen(s);
3278 break;
3279 }
3280 }
3281
3282 *s = '\0';
3283 }
3284
3285 /*
3286 * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3287 * The TmFromChar struct pointed to by 'out' is populated with the results.
3288 *
3289 * 'collid' identifies the collation to use, if needed.
3290 * 'std' specifies standard parsing mode.
3291 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3292 *
3293 * Note: we currently don't have any to_interval() function, so there
3294 * is no need here for INVALID_FOR_INTERVAL checks.
3295 */
3296 static void
DCH_from_char(FormatNode * node,const char * in,TmFromChar * out,Oid collid,bool std,bool * have_error)3297 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3298 Oid collid, bool std, bool *have_error)
3299 {
3300 FormatNode *n;
3301 const char *s;
3302 int len,
3303 value;
3304 bool fx_mode = std;
3305
3306 /* number of extra skipped characters (more than given in format string) */
3307 int extra_skip = 0;
3308
3309 /* cache localized days and months */
3310 cache_locale_time();
3311
3312 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3313 {
3314 /*
3315 * Ignore spaces at the beginning of the string and before fields when
3316 * not in FX (fixed width) mode.
3317 */
3318 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3319 (n->type == NODE_TYPE_ACTION || n == node))
3320 {
3321 while (*s != '\0' && isspace((unsigned char) *s))
3322 {
3323 s++;
3324 extra_skip++;
3325 }
3326 }
3327
3328 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3329 {
3330 if (std)
3331 {
3332 /*
3333 * Standard mode requires strict matching between format
3334 * string separators/spaces and input string.
3335 */
3336 Assert(n->character[0] && !n->character[1]);
3337
3338 if (*s == n->character[0])
3339 s++;
3340 else
3341 RETURN_ERROR(ereport(ERROR,
3342 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3343 errmsg("unmatched format separator \"%c\"",
3344 n->character[0]))));
3345 }
3346 else if (!fx_mode)
3347 {
3348 /*
3349 * In non FX (fixed format) mode one format string space or
3350 * separator match to one space or separator in input string.
3351 * Or match nothing if there is no space or separator in the
3352 * current position of input string.
3353 */
3354 extra_skip--;
3355 if (isspace((unsigned char) *s) || is_separator_char(s))
3356 {
3357 s++;
3358 extra_skip++;
3359 }
3360 }
3361 else
3362 {
3363 /*
3364 * In FX mode, on format string space or separator we consume
3365 * exactly one character from input string. Notice we don't
3366 * insist that the consumed character match the format's
3367 * character.
3368 */
3369 s += pg_mblen(s);
3370 }
3371 continue;
3372 }
3373 else if (n->type != NODE_TYPE_ACTION)
3374 {
3375 /*
3376 * Text character, so consume one character from input string.
3377 * Notice we don't insist that the consumed character match the
3378 * format's character.
3379 */
3380 if (!fx_mode)
3381 {
3382 /*
3383 * In non FX mode we might have skipped some extra characters
3384 * (more than specified in format string) before. In this
3385 * case we don't skip input string character, because it might
3386 * be part of field.
3387 */
3388 if (extra_skip > 0)
3389 extra_skip--;
3390 else
3391 s += pg_mblen(s);
3392 }
3393 else
3394 {
3395 int chlen = pg_mblen(s);
3396
3397 /*
3398 * Standard mode requires strict match of format characters.
3399 */
3400 if (std && n->type == NODE_TYPE_CHAR &&
3401 strncmp(s, n->character, chlen) != 0)
3402 RETURN_ERROR(ereport(ERROR,
3403 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3404 errmsg("unmatched format character \"%s\"",
3405 n->character))));
3406
3407 s += chlen;
3408 }
3409 continue;
3410 }
3411
3412 from_char_set_mode(out, n->key->date_mode, have_error);
3413 CHECK_ERROR;
3414
3415 switch (n->key->id)
3416 {
3417 case DCH_FX:
3418 fx_mode = true;
3419 break;
3420 case DCH_A_M:
3421 case DCH_P_M:
3422 case DCH_a_m:
3423 case DCH_p_m:
3424 from_char_seq_search(&value, &s, ampm_strings_long,
3425 NULL, InvalidOid,
3426 n, have_error);
3427 CHECK_ERROR;
3428 from_char_set_int(&out->pm, value % 2, n, have_error);
3429 CHECK_ERROR;
3430 out->clock = CLOCK_12_HOUR;
3431 break;
3432 case DCH_AM:
3433 case DCH_PM:
3434 case DCH_am:
3435 case DCH_pm:
3436 from_char_seq_search(&value, &s, ampm_strings,
3437 NULL, InvalidOid,
3438 n, have_error);
3439 CHECK_ERROR;
3440 from_char_set_int(&out->pm, value % 2, n, have_error);
3441 CHECK_ERROR;
3442 out->clock = CLOCK_12_HOUR;
3443 break;
3444 case DCH_HH:
3445 case DCH_HH12:
3446 from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3447 CHECK_ERROR;
3448 out->clock = CLOCK_12_HOUR;
3449 SKIP_THth(s, n->suffix);
3450 break;
3451 case DCH_HH24:
3452 from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3453 CHECK_ERROR;
3454 SKIP_THth(s, n->suffix);
3455 break;
3456 case DCH_MI:
3457 from_char_parse_int(&out->mi, &s, n, have_error);
3458 CHECK_ERROR;
3459 SKIP_THth(s, n->suffix);
3460 break;
3461 case DCH_SS:
3462 from_char_parse_int(&out->ss, &s, n, have_error);
3463 CHECK_ERROR;
3464 SKIP_THth(s, n->suffix);
3465 break;
3466 case DCH_MS: /* millisecond */
3467 len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3468 CHECK_ERROR;
3469
3470 /*
3471 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3472 */
3473 out->ms *= len == 1 ? 100 :
3474 len == 2 ? 10 : 1;
3475
3476 SKIP_THth(s, n->suffix);
3477 break;
3478 case DCH_FF1:
3479 case DCH_FF2:
3480 case DCH_FF3:
3481 case DCH_FF4:
3482 case DCH_FF5:
3483 case DCH_FF6:
3484 out->ff = n->key->id - DCH_FF1 + 1;
3485 /* fall through */
3486 case DCH_US: /* microsecond */
3487 len = from_char_parse_int_len(&out->us, &s,
3488 n->key->id == DCH_US ? 6 :
3489 out->ff, n, have_error);
3490 CHECK_ERROR;
3491
3492 out->us *= len == 1 ? 100000 :
3493 len == 2 ? 10000 :
3494 len == 3 ? 1000 :
3495 len == 4 ? 100 :
3496 len == 5 ? 10 : 1;
3497
3498 SKIP_THth(s, n->suffix);
3499 break;
3500 case DCH_SSSS:
3501 from_char_parse_int(&out->ssss, &s, n, have_error);
3502 CHECK_ERROR;
3503 SKIP_THth(s, n->suffix);
3504 break;
3505 case DCH_tz:
3506 case DCH_TZ:
3507 case DCH_OF:
3508 RETURN_ERROR(ereport(ERROR,
3509 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3510 errmsg("formatting field \"%s\" is only supported in to_char",
3511 n->key->name))));
3512 CHECK_ERROR;
3513 break;
3514 case DCH_TZH:
3515
3516 /*
3517 * Value of TZH might be negative. And the issue is that we
3518 * might swallow minus sign as the separator. So, if we have
3519 * skipped more characters than specified in the format
3520 * string, then we consider prepending last skipped minus to
3521 * TZH.
3522 */
3523 if (*s == '+' || *s == '-' || *s == ' ')
3524 {
3525 out->tzsign = *s == '-' ? -1 : +1;
3526 s++;
3527 }
3528 else
3529 {
3530 if (extra_skip > 0 && *(s - 1) == '-')
3531 out->tzsign = -1;
3532 else
3533 out->tzsign = +1;
3534 }
3535
3536 from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3537 CHECK_ERROR;
3538 break;
3539 case DCH_TZM:
3540 /* assign positive timezone sign if TZH was not seen before */
3541 if (!out->tzsign)
3542 out->tzsign = +1;
3543 from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3544 CHECK_ERROR;
3545 break;
3546 case DCH_A_D:
3547 case DCH_B_C:
3548 case DCH_a_d:
3549 case DCH_b_c:
3550 from_char_seq_search(&value, &s, adbc_strings_long,
3551 NULL, InvalidOid,
3552 n, have_error);
3553 CHECK_ERROR;
3554 from_char_set_int(&out->bc, value % 2, n, have_error);
3555 CHECK_ERROR;
3556 break;
3557 case DCH_AD:
3558 case DCH_BC:
3559 case DCH_ad:
3560 case DCH_bc:
3561 from_char_seq_search(&value, &s, adbc_strings,
3562 NULL, InvalidOid,
3563 n, have_error);
3564 CHECK_ERROR;
3565 from_char_set_int(&out->bc, value % 2, n, have_error);
3566 CHECK_ERROR;
3567 break;
3568 case DCH_MONTH:
3569 case DCH_Month:
3570 case DCH_month:
3571 from_char_seq_search(&value, &s, months_full,
3572 S_TM(n->suffix) ? localized_full_months : NULL,
3573 collid,
3574 n, have_error);
3575 CHECK_ERROR;
3576 from_char_set_int(&out->mm, value + 1, n, have_error);
3577 CHECK_ERROR;
3578 break;
3579 case DCH_MON:
3580 case DCH_Mon:
3581 case DCH_mon:
3582 from_char_seq_search(&value, &s, months,
3583 S_TM(n->suffix) ? localized_abbrev_months : NULL,
3584 collid,
3585 n, have_error);
3586 CHECK_ERROR;
3587 from_char_set_int(&out->mm, value + 1, n, have_error);
3588 CHECK_ERROR;
3589 break;
3590 case DCH_MM:
3591 from_char_parse_int(&out->mm, &s, n, have_error);
3592 CHECK_ERROR;
3593 SKIP_THth(s, n->suffix);
3594 break;
3595 case DCH_DAY:
3596 case DCH_Day:
3597 case DCH_day:
3598 from_char_seq_search(&value, &s, days,
3599 S_TM(n->suffix) ? localized_full_days : NULL,
3600 collid,
3601 n, have_error);
3602 CHECK_ERROR;
3603 from_char_set_int(&out->d, value, n, have_error);
3604 CHECK_ERROR;
3605 out->d++;
3606 break;
3607 case DCH_DY:
3608 case DCH_Dy:
3609 case DCH_dy:
3610 from_char_seq_search(&value, &s, days_short,
3611 S_TM(n->suffix) ? localized_abbrev_days : NULL,
3612 collid,
3613 n, have_error);
3614 CHECK_ERROR;
3615 from_char_set_int(&out->d, value, n, have_error);
3616 CHECK_ERROR;
3617 out->d++;
3618 break;
3619 case DCH_DDD:
3620 from_char_parse_int(&out->ddd, &s, n, have_error);
3621 CHECK_ERROR;
3622 SKIP_THth(s, n->suffix);
3623 break;
3624 case DCH_IDDD:
3625 from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3626 CHECK_ERROR;
3627 SKIP_THth(s, n->suffix);
3628 break;
3629 case DCH_DD:
3630 from_char_parse_int(&out->dd, &s, n, have_error);
3631 CHECK_ERROR;
3632 SKIP_THth(s, n->suffix);
3633 break;
3634 case DCH_D:
3635 from_char_parse_int(&out->d, &s, n, have_error);
3636 CHECK_ERROR;
3637 SKIP_THth(s, n->suffix);
3638 break;
3639 case DCH_ID:
3640 from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3641 CHECK_ERROR;
3642 /* Shift numbering to match Gregorian where Sunday = 1 */
3643 if (++out->d > 7)
3644 out->d = 1;
3645 SKIP_THth(s, n->suffix);
3646 break;
3647 case DCH_WW:
3648 case DCH_IW:
3649 from_char_parse_int(&out->ww, &s, n, have_error);
3650 CHECK_ERROR;
3651 SKIP_THth(s, n->suffix);
3652 break;
3653 case DCH_Q:
3654
3655 /*
3656 * We ignore 'Q' when converting to date because it is unclear
3657 * which date in the quarter to use, and some people specify
3658 * both quarter and month, so if it was honored it might
3659 * conflict with the supplied month. That is also why we don't
3660 * throw an error.
3661 *
3662 * We still parse the source string for an integer, but it
3663 * isn't stored anywhere in 'out'.
3664 */
3665 from_char_parse_int((int *) NULL, &s, n, have_error);
3666 CHECK_ERROR;
3667 SKIP_THth(s, n->suffix);
3668 break;
3669 case DCH_CC:
3670 from_char_parse_int(&out->cc, &s, n, have_error);
3671 CHECK_ERROR;
3672 SKIP_THth(s, n->suffix);
3673 break;
3674 case DCH_Y_YYY:
3675 {
3676 int matched,
3677 years,
3678 millennia,
3679 nch;
3680
3681 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3682 if (matched < 2)
3683 RETURN_ERROR(ereport(ERROR,
3684 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3685 errmsg("invalid input string for \"Y,YYY\""))));
3686 years += (millennia * 1000);
3687 from_char_set_int(&out->year, years, n, have_error);
3688 CHECK_ERROR;
3689 out->yysz = 4;
3690 s += nch;
3691 SKIP_THth(s, n->suffix);
3692 }
3693 break;
3694 case DCH_YYYY:
3695 case DCH_IYYY:
3696 from_char_parse_int(&out->year, &s, n, have_error);
3697 CHECK_ERROR;
3698 out->yysz = 4;
3699 SKIP_THth(s, n->suffix);
3700 break;
3701 case DCH_YYY:
3702 case DCH_IYY:
3703 len = from_char_parse_int(&out->year, &s, n, have_error);
3704 CHECK_ERROR;
3705 if (len < 4)
3706 out->year = adjust_partial_year_to_2020(out->year);
3707 out->yysz = 3;
3708 SKIP_THth(s, n->suffix);
3709 break;
3710 case DCH_YY:
3711 case DCH_IY:
3712 len = from_char_parse_int(&out->year, &s, n, have_error);
3713 CHECK_ERROR;
3714 if (len < 4)
3715 out->year = adjust_partial_year_to_2020(out->year);
3716 out->yysz = 2;
3717 SKIP_THth(s, n->suffix);
3718 break;
3719 case DCH_Y:
3720 case DCH_I:
3721 len = from_char_parse_int(&out->year, &s, n, have_error);
3722 CHECK_ERROR;
3723 if (len < 4)
3724 out->year = adjust_partial_year_to_2020(out->year);
3725 out->yysz = 1;
3726 SKIP_THth(s, n->suffix);
3727 break;
3728 case DCH_RM:
3729 case DCH_rm:
3730 from_char_seq_search(&value, &s, rm_months_lower,
3731 NULL, InvalidOid,
3732 n, have_error);
3733 CHECK_ERROR;
3734 from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3735 n, have_error);
3736 CHECK_ERROR;
3737 break;
3738 case DCH_W:
3739 from_char_parse_int(&out->w, &s, n, have_error);
3740 CHECK_ERROR;
3741 SKIP_THth(s, n->suffix);
3742 break;
3743 case DCH_J:
3744 from_char_parse_int(&out->j, &s, n, have_error);
3745 CHECK_ERROR;
3746 SKIP_THth(s, n->suffix);
3747 break;
3748 }
3749
3750 /* Ignore all spaces after fields */
3751 if (!fx_mode)
3752 {
3753 extra_skip = 0;
3754 while (*s != '\0' && isspace((unsigned char) *s))
3755 {
3756 s++;
3757 extra_skip++;
3758 }
3759 }
3760 }
3761
3762 /*
3763 * Standard parsing mode doesn't allow unmatched format patterns or
3764 * trailing characters in the input string.
3765 */
3766 if (std)
3767 {
3768 if (n->type != NODE_TYPE_END)
3769 RETURN_ERROR(ereport(ERROR,
3770 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3771 errmsg("input string is too short for datetime format"))));
3772
3773 while (*s != '\0' && isspace((unsigned char) *s))
3774 s++;
3775
3776 if (*s != '\0')
3777 RETURN_ERROR(ereport(ERROR,
3778 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3779 errmsg("trailing characters remain in input string "
3780 "after datetime format"))));
3781 }
3782
3783 on_error:
3784 return;
3785 }
3786
3787 /*
3788 * The invariant for DCH cache entry management is that DCHCounter is equal
3789 * to the maximum age value among the existing entries, and we increment it
3790 * whenever an access occurs. If we approach overflow, deal with that by
3791 * halving all the age values, so that we retain a fairly accurate idea of
3792 * which entries are oldest.
3793 */
3794 static inline void
DCH_prevent_counter_overflow(void)3795 DCH_prevent_counter_overflow(void)
3796 {
3797 if (DCHCounter >= (INT_MAX - 1))
3798 {
3799 for (int i = 0; i < n_DCHCache; i++)
3800 DCHCache[i]->age >>= 1;
3801 DCHCounter >>= 1;
3802 }
3803 }
3804
3805 /*
3806 * Get mask of date/time/zone components present in format nodes.
3807 *
3808 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3809 */
3810 static int
DCH_datetime_type(FormatNode * node,bool * have_error)3811 DCH_datetime_type(FormatNode *node, bool *have_error)
3812 {
3813 FormatNode *n;
3814 int flags = 0;
3815
3816 for (n = node; n->type != NODE_TYPE_END; n++)
3817 {
3818 if (n->type != NODE_TYPE_ACTION)
3819 continue;
3820
3821 switch (n->key->id)
3822 {
3823 case DCH_FX:
3824 break;
3825 case DCH_A_M:
3826 case DCH_P_M:
3827 case DCH_a_m:
3828 case DCH_p_m:
3829 case DCH_AM:
3830 case DCH_PM:
3831 case DCH_am:
3832 case DCH_pm:
3833 case DCH_HH:
3834 case DCH_HH12:
3835 case DCH_HH24:
3836 case DCH_MI:
3837 case DCH_SS:
3838 case DCH_MS: /* millisecond */
3839 case DCH_US: /* microsecond */
3840 case DCH_FF1:
3841 case DCH_FF2:
3842 case DCH_FF3:
3843 case DCH_FF4:
3844 case DCH_FF5:
3845 case DCH_FF6:
3846 case DCH_SSSS:
3847 flags |= DCH_TIMED;
3848 break;
3849 case DCH_tz:
3850 case DCH_TZ:
3851 case DCH_OF:
3852 RETURN_ERROR(ereport(ERROR,
3853 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3854 errmsg("formatting field \"%s\" is only supported in to_char",
3855 n->key->name))));
3856 flags |= DCH_ZONED;
3857 break;
3858 case DCH_TZH:
3859 case DCH_TZM:
3860 flags |= DCH_ZONED;
3861 break;
3862 case DCH_A_D:
3863 case DCH_B_C:
3864 case DCH_a_d:
3865 case DCH_b_c:
3866 case DCH_AD:
3867 case DCH_BC:
3868 case DCH_ad:
3869 case DCH_bc:
3870 case DCH_MONTH:
3871 case DCH_Month:
3872 case DCH_month:
3873 case DCH_MON:
3874 case DCH_Mon:
3875 case DCH_mon:
3876 case DCH_MM:
3877 case DCH_DAY:
3878 case DCH_Day:
3879 case DCH_day:
3880 case DCH_DY:
3881 case DCH_Dy:
3882 case DCH_dy:
3883 case DCH_DDD:
3884 case DCH_IDDD:
3885 case DCH_DD:
3886 case DCH_D:
3887 case DCH_ID:
3888 case DCH_WW:
3889 case DCH_Q:
3890 case DCH_CC:
3891 case DCH_Y_YYY:
3892 case DCH_YYYY:
3893 case DCH_IYYY:
3894 case DCH_YYY:
3895 case DCH_IYY:
3896 case DCH_YY:
3897 case DCH_IY:
3898 case DCH_Y:
3899 case DCH_I:
3900 case DCH_RM:
3901 case DCH_rm:
3902 case DCH_W:
3903 case DCH_J:
3904 flags |= DCH_DATED;
3905 break;
3906 }
3907 }
3908
3909 on_error:
3910 return flags;
3911 }
3912
3913 /* select a DCHCacheEntry to hold the given format picture */
3914 static DCHCacheEntry *
DCH_cache_getnew(const char * str,bool std)3915 DCH_cache_getnew(const char *str, bool std)
3916 {
3917 DCHCacheEntry *ent;
3918
3919 /* Ensure we can advance DCHCounter below */
3920 DCH_prevent_counter_overflow();
3921
3922 /*
3923 * If cache is full, remove oldest entry (or recycle first not-valid one)
3924 */
3925 if (n_DCHCache >= DCH_CACHE_ENTRIES)
3926 {
3927 DCHCacheEntry *old = DCHCache[0];
3928
3929 #ifdef DEBUG_TO_FROM_CHAR
3930 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3931 #endif
3932 if (old->valid)
3933 {
3934 for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3935 {
3936 ent = DCHCache[i];
3937 if (!ent->valid)
3938 {
3939 old = ent;
3940 break;
3941 }
3942 if (ent->age < old->age)
3943 old = ent;
3944 }
3945 }
3946 #ifdef DEBUG_TO_FROM_CHAR
3947 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3948 #endif
3949 old->valid = false;
3950 StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3951 old->age = (++DCHCounter);
3952 /* caller is expected to fill format, then set valid */
3953 return old;
3954 }
3955 else
3956 {
3957 #ifdef DEBUG_TO_FROM_CHAR
3958 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3959 #endif
3960 Assert(DCHCache[n_DCHCache] == NULL);
3961 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3962 MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
3963 ent->valid = false;
3964 StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3965 ent->std = std;
3966 ent->age = (++DCHCounter);
3967 /* caller is expected to fill format, then set valid */
3968 ++n_DCHCache;
3969 return ent;
3970 }
3971 }
3972
3973 /* look for an existing DCHCacheEntry matching the given format picture */
3974 static DCHCacheEntry *
DCH_cache_search(const char * str,bool std)3975 DCH_cache_search(const char *str, bool std)
3976 {
3977 /* Ensure we can advance DCHCounter below */
3978 DCH_prevent_counter_overflow();
3979
3980 for (int i = 0; i < n_DCHCache; i++)
3981 {
3982 DCHCacheEntry *ent = DCHCache[i];
3983
3984 if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3985 {
3986 ent->age = (++DCHCounter);
3987 return ent;
3988 }
3989 }
3990
3991 return NULL;
3992 }
3993
3994 /* Find or create a DCHCacheEntry for the given format picture */
3995 static DCHCacheEntry *
DCH_cache_fetch(const char * str,bool std)3996 DCH_cache_fetch(const char *str, bool std)
3997 {
3998 DCHCacheEntry *ent;
3999
4000 if ((ent = DCH_cache_search(str, std)) == NULL)
4001 {
4002 /*
4003 * Not in the cache, must run parser and save a new format-picture to
4004 * the cache. Do not mark the cache entry valid until parsing
4005 * succeeds.
4006 */
4007 ent = DCH_cache_getnew(str, std);
4008
4009 parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4010 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4011
4012 ent->valid = true;
4013 }
4014 return ent;
4015 }
4016
4017 /*
4018 * Format a date/time or interval into a string according to fmt.
4019 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4020 * for formatting.
4021 */
4022 static text *
datetime_to_char_body(TmToChar * tmtc,text * fmt,bool is_interval,Oid collid)4023 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4024 {
4025 FormatNode *format;
4026 char *fmt_str,
4027 *result;
4028 bool incache;
4029 int fmt_len;
4030 text *res;
4031
4032 /*
4033 * Convert fmt to C string
4034 */
4035 fmt_str = text_to_cstring(fmt);
4036 fmt_len = strlen(fmt_str);
4037
4038 /*
4039 * Allocate workspace for result as C string
4040 */
4041 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4042 *result = '\0';
4043
4044 if (fmt_len > DCH_CACHE_SIZE)
4045 {
4046 /*
4047 * Allocate new memory if format picture is bigger than static cache
4048 * and do not use cache (call parser always)
4049 */
4050 incache = false;
4051
4052 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4053
4054 parse_format(format, fmt_str, DCH_keywords,
4055 DCH_suff, DCH_index, DCH_FLAG, NULL);
4056 }
4057 else
4058 {
4059 /*
4060 * Use cache buffers
4061 */
4062 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4063
4064 incache = true;
4065 format = ent->format;
4066 }
4067
4068 /* The real work is here */
4069 DCH_to_char(format, is_interval, tmtc, result, collid);
4070
4071 if (!incache)
4072 pfree(format);
4073
4074 pfree(fmt_str);
4075
4076 /* convert C-string result to TEXT format */
4077 res = cstring_to_text(result);
4078
4079 pfree(result);
4080 return res;
4081 }
4082
4083 /****************************************************************************
4084 * Public routines
4085 ***************************************************************************/
4086
4087 /* -------------------
4088 * TIMESTAMP to_char()
4089 * -------------------
4090 */
4091 Datum
timestamp_to_char(PG_FUNCTION_ARGS)4092 timestamp_to_char(PG_FUNCTION_ARGS)
4093 {
4094 Timestamp dt = PG_GETARG_TIMESTAMP(0);
4095 text *fmt = PG_GETARG_TEXT_PP(1),
4096 *res;
4097 TmToChar tmtc;
4098 struct pg_tm *tm;
4099 int thisdate;
4100
4101 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4102 PG_RETURN_NULL();
4103
4104 ZERO_tmtc(&tmtc);
4105 tm = tmtcTm(&tmtc);
4106
4107 if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4108 ereport(ERROR,
4109 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4110 errmsg("timestamp out of range")));
4111
4112 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4113 tm->tm_wday = (thisdate + 1) % 7;
4114 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4115
4116 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4117 PG_RETURN_NULL();
4118
4119 PG_RETURN_TEXT_P(res);
4120 }
4121
4122 Datum
timestamptz_to_char(PG_FUNCTION_ARGS)4123 timestamptz_to_char(PG_FUNCTION_ARGS)
4124 {
4125 TimestampTz dt = PG_GETARG_TIMESTAMP(0);
4126 text *fmt = PG_GETARG_TEXT_PP(1),
4127 *res;
4128 TmToChar tmtc;
4129 int tz;
4130 struct pg_tm *tm;
4131 int thisdate;
4132
4133 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4134 PG_RETURN_NULL();
4135
4136 ZERO_tmtc(&tmtc);
4137 tm = tmtcTm(&tmtc);
4138
4139 if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4140 ereport(ERROR,
4141 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4142 errmsg("timestamp out of range")));
4143
4144 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4145 tm->tm_wday = (thisdate + 1) % 7;
4146 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4147
4148 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4149 PG_RETURN_NULL();
4150
4151 PG_RETURN_TEXT_P(res);
4152 }
4153
4154
4155 /* -------------------
4156 * INTERVAL to_char()
4157 * -------------------
4158 */
4159 Datum
interval_to_char(PG_FUNCTION_ARGS)4160 interval_to_char(PG_FUNCTION_ARGS)
4161 {
4162 Interval *it = PG_GETARG_INTERVAL_P(0);
4163 text *fmt = PG_GETARG_TEXT_PP(1),
4164 *res;
4165 TmToChar tmtc;
4166 struct pg_tm *tm;
4167
4168 if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4169 PG_RETURN_NULL();
4170
4171 ZERO_tmtc(&tmtc);
4172 tm = tmtcTm(&tmtc);
4173
4174 if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4175 PG_RETURN_NULL();
4176
4177 /* wday is meaningless, yday approximates the total span in days */
4178 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4179
4180 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4181 PG_RETURN_NULL();
4182
4183 PG_RETURN_TEXT_P(res);
4184 }
4185
4186 /* ---------------------
4187 * TO_TIMESTAMP()
4188 *
4189 * Make Timestamp from date_str which is formatted at argument 'fmt'
4190 * ( to_timestamp is reverse to_char() )
4191 * ---------------------
4192 */
4193 Datum
to_timestamp(PG_FUNCTION_ARGS)4194 to_timestamp(PG_FUNCTION_ARGS)
4195 {
4196 text *date_txt = PG_GETARG_TEXT_PP(0);
4197 text *fmt = PG_GETARG_TEXT_PP(1);
4198 Oid collid = PG_GET_COLLATION();
4199 Timestamp result;
4200 int tz;
4201 struct pg_tm tm;
4202 fsec_t fsec;
4203 int fprec;
4204
4205 do_to_timestamp(date_txt, fmt, collid, false,
4206 &tm, &fsec, &fprec, NULL, NULL);
4207
4208 /* Use the specified time zone, if any. */
4209 if (tm.tm_zone)
4210 {
4211 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4212
4213 if (dterr)
4214 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4215 }
4216 else
4217 tz = DetermineTimeZoneOffset(&tm, session_timezone);
4218
4219 if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4220 ereport(ERROR,
4221 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4222 errmsg("timestamp out of range")));
4223
4224 /* Use the specified fractional precision, if any. */
4225 if (fprec)
4226 AdjustTimestampForTypmod(&result, fprec);
4227
4228 PG_RETURN_TIMESTAMP(result);
4229 }
4230
4231 /* ----------
4232 * TO_DATE
4233 * Make Date from date_str which is formatted at argument 'fmt'
4234 * ----------
4235 */
4236 Datum
to_date(PG_FUNCTION_ARGS)4237 to_date(PG_FUNCTION_ARGS)
4238 {
4239 text *date_txt = PG_GETARG_TEXT_PP(0);
4240 text *fmt = PG_GETARG_TEXT_PP(1);
4241 Oid collid = PG_GET_COLLATION();
4242 DateADT result;
4243 struct pg_tm tm;
4244 fsec_t fsec;
4245
4246 do_to_timestamp(date_txt, fmt, collid, false,
4247 &tm, &fsec, NULL, NULL, NULL);
4248
4249 /* Prevent overflow in Julian-day routines */
4250 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4251 ereport(ERROR,
4252 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4253 errmsg("date out of range: \"%s\"",
4254 text_to_cstring(date_txt))));
4255
4256 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4257
4258 /* Now check for just-out-of-range dates */
4259 if (!IS_VALID_DATE(result))
4260 ereport(ERROR,
4261 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4262 errmsg("date out of range: \"%s\"",
4263 text_to_cstring(date_txt))));
4264
4265 PG_RETURN_DATEADT(result);
4266 }
4267
4268 /*
4269 * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4270 * as a format string. The collation 'collid' may be used for case-folding
4271 * rules in some cases. 'strict' specifies standard parsing mode.
4272 *
4273 * The actual data type (returned in 'typid', 'typmod') is determined by
4274 * the presence of date/time/zone components in the format string.
4275 *
4276 * When timezone component is present, the corresponding offset is
4277 * returned in '*tz'.
4278 *
4279 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4280 * and zero value is returned.
4281 */
4282 Datum
parse_datetime(text * date_txt,text * fmt,Oid collid,bool strict,Oid * typid,int32 * typmod,int * tz,bool * have_error)4283 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4284 Oid *typid, int32 *typmod, int *tz,
4285 bool *have_error)
4286 {
4287 struct pg_tm tm;
4288 fsec_t fsec;
4289 int fprec;
4290 uint32 flags;
4291
4292 do_to_timestamp(date_txt, fmt, collid, strict,
4293 &tm, &fsec, &fprec, &flags, have_error);
4294 CHECK_ERROR;
4295
4296 *typmod = fprec ? fprec : -1; /* fractional part precision */
4297
4298 if (flags & DCH_DATED)
4299 {
4300 if (flags & DCH_TIMED)
4301 {
4302 if (flags & DCH_ZONED)
4303 {
4304 TimestampTz result;
4305
4306 if (tm.tm_zone)
4307 {
4308 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4309
4310 if (dterr)
4311 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4312 }
4313 else
4314 {
4315 /*
4316 * Time zone is present in format string, but not in input
4317 * string. Assuming do_to_timestamp() triggers no error
4318 * this should be possible only in non-strict case.
4319 */
4320 Assert(!strict);
4321
4322 RETURN_ERROR(ereport(ERROR,
4323 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4324 errmsg("missing time zone in input string for type timestamptz"))));
4325 }
4326
4327 if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4328 RETURN_ERROR(ereport(ERROR,
4329 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4330 errmsg("timestamptz out of range"))));
4331
4332 AdjustTimestampForTypmod(&result, *typmod);
4333
4334 *typid = TIMESTAMPTZOID;
4335 return TimestampTzGetDatum(result);
4336 }
4337 else
4338 {
4339 Timestamp result;
4340
4341 if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4342 RETURN_ERROR(ereport(ERROR,
4343 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4344 errmsg("timestamp out of range"))));
4345
4346 AdjustTimestampForTypmod(&result, *typmod);
4347
4348 *typid = TIMESTAMPOID;
4349 return TimestampGetDatum(result);
4350 }
4351 }
4352 else
4353 {
4354 if (flags & DCH_ZONED)
4355 {
4356 RETURN_ERROR(ereport(ERROR,
4357 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4358 errmsg("datetime format is zoned but not timed"))));
4359 }
4360 else
4361 {
4362 DateADT result;
4363
4364 /* Prevent overflow in Julian-day routines */
4365 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4366 RETURN_ERROR(ereport(ERROR,
4367 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4368 errmsg("date out of range: \"%s\"",
4369 text_to_cstring(date_txt)))));
4370
4371 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4372 POSTGRES_EPOCH_JDATE;
4373
4374 /* Now check for just-out-of-range dates */
4375 if (!IS_VALID_DATE(result))
4376 RETURN_ERROR(ereport(ERROR,
4377 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4378 errmsg("date out of range: \"%s\"",
4379 text_to_cstring(date_txt)))));
4380
4381 *typid = DATEOID;
4382 return DateADTGetDatum(result);
4383 }
4384 }
4385 }
4386 else if (flags & DCH_TIMED)
4387 {
4388 if (flags & DCH_ZONED)
4389 {
4390 TimeTzADT *result = palloc(sizeof(TimeTzADT));
4391
4392 if (tm.tm_zone)
4393 {
4394 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4395
4396 if (dterr)
4397 RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4398 }
4399 else
4400 {
4401 /*
4402 * Time zone is present in format string, but not in input
4403 * string. Assuming do_to_timestamp() triggers no error this
4404 * should be possible only in non-strict case.
4405 */
4406 Assert(!strict);
4407
4408 RETURN_ERROR(ereport(ERROR,
4409 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4410 errmsg("missing time zone in input string for type timetz"))));
4411 }
4412
4413 if (tm2timetz(&tm, fsec, *tz, result) != 0)
4414 RETURN_ERROR(ereport(ERROR,
4415 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4416 errmsg("timetz out of range"))));
4417
4418 AdjustTimeForTypmod(&result->time, *typmod);
4419
4420 *typid = TIMETZOID;
4421 return TimeTzADTPGetDatum(result);
4422 }
4423 else
4424 {
4425 TimeADT result;
4426
4427 if (tm2time(&tm, fsec, &result) != 0)
4428 RETURN_ERROR(ereport(ERROR,
4429 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4430 errmsg("time out of range"))));
4431
4432 AdjustTimeForTypmod(&result, *typmod);
4433
4434 *typid = TIMEOID;
4435 return TimeADTGetDatum(result);
4436 }
4437 }
4438 else
4439 {
4440 RETURN_ERROR(ereport(ERROR,
4441 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4442 errmsg("datetime format is not dated and not timed"))));
4443 }
4444
4445 on_error:
4446 return (Datum) 0;
4447 }
4448
4449 /*
4450 * do_to_timestamp: shared code for to_timestamp and to_date
4451 *
4452 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4453 * fractional seconds, and fractional precision.
4454 *
4455 * 'collid' identifies the collation to use, if needed.
4456 * 'std' specifies standard parsing mode.
4457 * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4458 * if that is not NULL.
4459 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4460 *
4461 * We parse 'fmt' into a list of FormatNodes, which is then passed to
4462 * DCH_from_char to populate a TmFromChar with the parsed contents of
4463 * 'date_txt'.
4464 *
4465 * The TmFromChar is then analysed and converted into the final results in
4466 * struct 'tm', 'fsec', and 'fprec'.
4467 */
4468 static void
do_to_timestamp(text * date_txt,text * fmt,Oid collid,bool std,struct pg_tm * tm,fsec_t * fsec,int * fprec,uint32 * flags,bool * have_error)4469 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4470 struct pg_tm *tm, fsec_t *fsec, int *fprec,
4471 uint32 *flags, bool *have_error)
4472 {
4473 FormatNode *format = NULL;
4474 TmFromChar tmfc;
4475 int fmt_len;
4476 char *date_str;
4477 int fmask;
4478 bool incache = false;
4479
4480 Assert(tm != NULL);
4481 Assert(fsec != NULL);
4482
4483 date_str = text_to_cstring(date_txt);
4484
4485 ZERO_tmfc(&tmfc);
4486 ZERO_tm(tm);
4487 *fsec = 0;
4488 if (fprec)
4489 *fprec = 0;
4490 if (flags)
4491 *flags = 0;
4492 fmask = 0; /* bit mask for ValidateDate() */
4493
4494 fmt_len = VARSIZE_ANY_EXHDR(fmt);
4495
4496 if (fmt_len)
4497 {
4498 char *fmt_str;
4499
4500 fmt_str = text_to_cstring(fmt);
4501
4502 if (fmt_len > DCH_CACHE_SIZE)
4503 {
4504 /*
4505 * Allocate new memory if format picture is bigger than static
4506 * cache and do not use cache (call parser always)
4507 */
4508 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4509
4510 parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4511 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4512 }
4513 else
4514 {
4515 /*
4516 * Use cache buffers
4517 */
4518 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4519
4520 incache = true;
4521 format = ent->format;
4522 }
4523
4524 #ifdef DEBUG_TO_FROM_CHAR
4525 /* dump_node(format, fmt_len); */
4526 /* dump_index(DCH_keywords, DCH_index); */
4527 #endif
4528
4529 DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4530 CHECK_ERROR;
4531
4532 pfree(fmt_str);
4533
4534 if (flags)
4535 *flags = DCH_datetime_type(format, have_error);
4536
4537 if (!incache)
4538 {
4539 pfree(format);
4540 format = NULL;
4541 }
4542
4543 CHECK_ERROR;
4544 }
4545
4546 DEBUG_TMFC(&tmfc);
4547
4548 /*
4549 * Convert to_date/to_timestamp input fields to standard 'tm'
4550 */
4551 if (tmfc.ssss)
4552 {
4553 int x = tmfc.ssss;
4554
4555 tm->tm_hour = x / SECS_PER_HOUR;
4556 x %= SECS_PER_HOUR;
4557 tm->tm_min = x / SECS_PER_MINUTE;
4558 x %= SECS_PER_MINUTE;
4559 tm->tm_sec = x;
4560 }
4561
4562 if (tmfc.ss)
4563 tm->tm_sec = tmfc.ss;
4564 if (tmfc.mi)
4565 tm->tm_min = tmfc.mi;
4566 if (tmfc.hh)
4567 tm->tm_hour = tmfc.hh;
4568
4569 if (tmfc.clock == CLOCK_12_HOUR)
4570 {
4571 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4572 {
4573 RETURN_ERROR(ereport(ERROR,
4574 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4575 errmsg("hour \"%d\" is invalid for the 12-hour clock",
4576 tm->tm_hour),
4577 errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4578 }
4579
4580 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4581 tm->tm_hour += HOURS_PER_DAY / 2;
4582 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4583 tm->tm_hour = 0;
4584 }
4585
4586 if (tmfc.year)
4587 {
4588 /*
4589 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4590 * the year in the given century. Keep in mind that the 21st century
4591 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4592 * 600BC to 501BC.
4593 */
4594 if (tmfc.cc && tmfc.yysz <= 2)
4595 {
4596 if (tmfc.bc)
4597 tmfc.cc = -tmfc.cc;
4598 tm->tm_year = tmfc.year % 100;
4599 if (tm->tm_year)
4600 {
4601 if (tmfc.cc >= 0)
4602 tm->tm_year += (tmfc.cc - 1) * 100;
4603 else
4604 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4605 }
4606 else
4607 {
4608 /* find century year for dates ending in "00" */
4609 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4610 }
4611 }
4612 else
4613 {
4614 /* If a 4-digit year is provided, we use that and ignore CC. */
4615 tm->tm_year = tmfc.year;
4616 if (tmfc.bc)
4617 tm->tm_year = -tm->tm_year;
4618 /* correct for our representation of BC years */
4619 if (tm->tm_year < 0)
4620 tm->tm_year++;
4621 }
4622 fmask |= DTK_M(YEAR);
4623 }
4624 else if (tmfc.cc)
4625 {
4626 /* use first year of century */
4627 if (tmfc.bc)
4628 tmfc.cc = -tmfc.cc;
4629 if (tmfc.cc >= 0)
4630 /* +1 because 21st century started in 2001 */
4631 tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4632 else
4633 /* +1 because year == 599 is 600 BC */
4634 tm->tm_year = tmfc.cc * 100 + 1;
4635 fmask |= DTK_M(YEAR);
4636 }
4637
4638 if (tmfc.j)
4639 {
4640 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4641 fmask |= DTK_DATE_M;
4642 }
4643
4644 if (tmfc.ww)
4645 {
4646 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4647 {
4648 /*
4649 * If tmfc.d is not set, then the date is left at the beginning of
4650 * the ISO week (Monday).
4651 */
4652 if (tmfc.d)
4653 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4654 else
4655 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4656 fmask |= DTK_DATE_M;
4657 }
4658 else
4659 tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4660 }
4661
4662 if (tmfc.w)
4663 tmfc.dd = (tmfc.w - 1) * 7 + 1;
4664 if (tmfc.dd)
4665 {
4666 tm->tm_mday = tmfc.dd;
4667 fmask |= DTK_M(DAY);
4668 }
4669 if (tmfc.mm)
4670 {
4671 tm->tm_mon = tmfc.mm;
4672 fmask |= DTK_M(MONTH);
4673 }
4674
4675 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4676 {
4677 /*
4678 * The month and day field have not been set, so we use the
4679 * day-of-year field to populate them. Depending on the date mode,
4680 * this field may be interpreted as a Gregorian day-of-year, or an ISO
4681 * week date day-of-year.
4682 */
4683
4684 if (!tm->tm_year && !tmfc.bc)
4685 {
4686 RETURN_ERROR(ereport(ERROR,
4687 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4688 errmsg("cannot calculate day of year without year information"))));
4689 }
4690
4691 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4692 {
4693 int j0; /* zeroth day of the ISO year, in Julian */
4694
4695 j0 = isoweek2j(tm->tm_year, 1) - 1;
4696
4697 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4698 fmask |= DTK_DATE_M;
4699 }
4700 else
4701 {
4702 const int *y;
4703 int i;
4704
4705 static const int ysum[2][13] = {
4706 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4707 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4708
4709 y = ysum[isleap(tm->tm_year)];
4710
4711 for (i = 1; i <= MONTHS_PER_YEAR; i++)
4712 {
4713 if (tmfc.ddd <= y[i])
4714 break;
4715 }
4716 if (tm->tm_mon <= 1)
4717 tm->tm_mon = i;
4718
4719 if (tm->tm_mday <= 1)
4720 tm->tm_mday = tmfc.ddd - y[i - 1];
4721
4722 fmask |= DTK_M(MONTH) | DTK_M(DAY);
4723 }
4724 }
4725
4726 if (tmfc.ms)
4727 *fsec += tmfc.ms * 1000;
4728 if (tmfc.us)
4729 *fsec += tmfc.us;
4730 if (fprec)
4731 *fprec = tmfc.ff; /* fractional precision, if specified */
4732
4733 /* Range-check date fields according to bit mask computed above */
4734 if (fmask != 0)
4735 {
4736 /* We already dealt with AD/BC, so pass isjulian = true */
4737 int dterr = ValidateDate(fmask, true, false, false, tm);
4738
4739 if (dterr != 0)
4740 {
4741 /*
4742 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4743 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4744 * irrelevant hint about datestyle.
4745 */
4746 RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4747 }
4748 }
4749
4750 /* Range-check time fields too */
4751 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4752 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4753 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4754 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4755 {
4756 RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4757 }
4758
4759 /* Save parsed time-zone into tm->tm_zone if it was specified */
4760 if (tmfc.tzsign)
4761 {
4762 char *tz;
4763
4764 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4765 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4766 {
4767 RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4768 }
4769
4770 tz = psprintf("%c%02d:%02d",
4771 tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4772
4773 tm->tm_zone = tz;
4774 }
4775
4776 DEBUG_TM(tm);
4777
4778 on_error:
4779
4780 if (format && !incache)
4781 pfree(format);
4782
4783 pfree(date_str);
4784 }
4785
4786
4787 /**********************************************************************
4788 * the NUMBER version part
4789 *********************************************************************/
4790
4791
4792 static char *
fill_str(char * str,int c,int max)4793 fill_str(char *str, int c, int max)
4794 {
4795 memset(str, c, max);
4796 *(str + max) = '\0';
4797 return str;
4798 }
4799
4800 #define zeroize_NUM(_n) \
4801 do { \
4802 (_n)->flag = 0; \
4803 (_n)->lsign = 0; \
4804 (_n)->pre = 0; \
4805 (_n)->post = 0; \
4806 (_n)->pre_lsign_num = 0; \
4807 (_n)->need_locale = 0; \
4808 (_n)->multi = 0; \
4809 (_n)->zero_start = 0; \
4810 (_n)->zero_end = 0; \
4811 } while(0)
4812
4813 /* This works the same as DCH_prevent_counter_overflow */
4814 static inline void
NUM_prevent_counter_overflow(void)4815 NUM_prevent_counter_overflow(void)
4816 {
4817 if (NUMCounter >= (INT_MAX - 1))
4818 {
4819 for (int i = 0; i < n_NUMCache; i++)
4820 NUMCache[i]->age >>= 1;
4821 NUMCounter >>= 1;
4822 }
4823 }
4824
4825 /* select a NUMCacheEntry to hold the given format picture */
4826 static NUMCacheEntry *
NUM_cache_getnew(const char * str)4827 NUM_cache_getnew(const char *str)
4828 {
4829 NUMCacheEntry *ent;
4830
4831 /* Ensure we can advance NUMCounter below */
4832 NUM_prevent_counter_overflow();
4833
4834 /*
4835 * If cache is full, remove oldest entry (or recycle first not-valid one)
4836 */
4837 if (n_NUMCache >= NUM_CACHE_ENTRIES)
4838 {
4839 NUMCacheEntry *old = NUMCache[0];
4840
4841 #ifdef DEBUG_TO_FROM_CHAR
4842 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4843 #endif
4844 if (old->valid)
4845 {
4846 for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4847 {
4848 ent = NUMCache[i];
4849 if (!ent->valid)
4850 {
4851 old = ent;
4852 break;
4853 }
4854 if (ent->age < old->age)
4855 old = ent;
4856 }
4857 }
4858 #ifdef DEBUG_TO_FROM_CHAR
4859 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4860 #endif
4861 old->valid = false;
4862 StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
4863 old->age = (++NUMCounter);
4864 /* caller is expected to fill format and Num, then set valid */
4865 return old;
4866 }
4867 else
4868 {
4869 #ifdef DEBUG_TO_FROM_CHAR
4870 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4871 #endif
4872 Assert(NUMCache[n_NUMCache] == NULL);
4873 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4874 MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
4875 ent->valid = false;
4876 StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4877 ent->age = (++NUMCounter);
4878 /* caller is expected to fill format and Num, then set valid */
4879 ++n_NUMCache;
4880 return ent;
4881 }
4882 }
4883
4884 /* look for an existing NUMCacheEntry matching the given format picture */
4885 static NUMCacheEntry *
NUM_cache_search(const char * str)4886 NUM_cache_search(const char *str)
4887 {
4888 /* Ensure we can advance NUMCounter below */
4889 NUM_prevent_counter_overflow();
4890
4891 for (int i = 0; i < n_NUMCache; i++)
4892 {
4893 NUMCacheEntry *ent = NUMCache[i];
4894
4895 if (ent->valid && strcmp(ent->str, str) == 0)
4896 {
4897 ent->age = (++NUMCounter);
4898 return ent;
4899 }
4900 }
4901
4902 return NULL;
4903 }
4904
4905 /* Find or create a NUMCacheEntry for the given format picture */
4906 static NUMCacheEntry *
NUM_cache_fetch(const char * str)4907 NUM_cache_fetch(const char *str)
4908 {
4909 NUMCacheEntry *ent;
4910
4911 if ((ent = NUM_cache_search(str)) == NULL)
4912 {
4913 /*
4914 * Not in the cache, must run parser and save a new format-picture to
4915 * the cache. Do not mark the cache entry valid until parsing
4916 * succeeds.
4917 */
4918 ent = NUM_cache_getnew(str);
4919
4920 zeroize_NUM(&ent->Num);
4921
4922 parse_format(ent->format, str, NUM_keywords,
4923 NULL, NUM_index, NUM_FLAG, &ent->Num);
4924
4925 ent->valid = true;
4926 }
4927 return ent;
4928 }
4929
4930 /* ----------
4931 * Cache routine for NUM to_char version
4932 * ----------
4933 */
4934 static FormatNode *
NUM_cache(int len,NUMDesc * Num,text * pars_str,bool * shouldFree)4935 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4936 {
4937 FormatNode *format = NULL;
4938 char *str;
4939
4940 str = text_to_cstring(pars_str);
4941
4942 if (len > NUM_CACHE_SIZE)
4943 {
4944 /*
4945 * Allocate new memory if format picture is bigger than static cache
4946 * and do not use cache (call parser always)
4947 */
4948 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4949
4950 *shouldFree = true;
4951
4952 zeroize_NUM(Num);
4953
4954 parse_format(format, str, NUM_keywords,
4955 NULL, NUM_index, NUM_FLAG, Num);
4956 }
4957 else
4958 {
4959 /*
4960 * Use cache buffers
4961 */
4962 NUMCacheEntry *ent = NUM_cache_fetch(str);
4963
4964 *shouldFree = false;
4965
4966 format = ent->format;
4967
4968 /*
4969 * Copy cache to used struct
4970 */
4971 Num->flag = ent->Num.flag;
4972 Num->lsign = ent->Num.lsign;
4973 Num->pre = ent->Num.pre;
4974 Num->post = ent->Num.post;
4975 Num->pre_lsign_num = ent->Num.pre_lsign_num;
4976 Num->need_locale = ent->Num.need_locale;
4977 Num->multi = ent->Num.multi;
4978 Num->zero_start = ent->Num.zero_start;
4979 Num->zero_end = ent->Num.zero_end;
4980 }
4981
4982 #ifdef DEBUG_TO_FROM_CHAR
4983 /* dump_node(format, len); */
4984 dump_index(NUM_keywords, NUM_index);
4985 #endif
4986
4987 pfree(str);
4988 return format;
4989 }
4990
4991
4992 static char *
int_to_roman(int number)4993 int_to_roman(int number)
4994 {
4995 int len = 0,
4996 num = 0;
4997 char *p = NULL,
4998 *result,
4999 numstr[12];
5000
5001 result = (char *) palloc(16);
5002 *result = '\0';
5003
5004 if (number > 3999 || number < 1)
5005 {
5006 fill_str(result, '#', 15);
5007 return result;
5008 }
5009 len = snprintf(numstr, sizeof(numstr), "%d", number);
5010
5011 for (p = numstr; *p != '\0'; p++, --len)
5012 {
5013 num = *p - 49; /* 48 ascii + 1 */
5014 if (num < 0)
5015 continue;
5016
5017 if (len > 3)
5018 {
5019 while (num-- != -1)
5020 strcat(result, "M");
5021 }
5022 else
5023 {
5024 if (len == 3)
5025 strcat(result, rm100[num]);
5026 else if (len == 2)
5027 strcat(result, rm10[num]);
5028 else if (len == 1)
5029 strcat(result, rm1[num]);
5030 }
5031 }
5032 return result;
5033 }
5034
5035
5036
5037 /* ----------
5038 * Locale
5039 * ----------
5040 */
5041 static void
NUM_prepare_locale(NUMProc * Np)5042 NUM_prepare_locale(NUMProc *Np)
5043 {
5044 if (Np->Num->need_locale)
5045 {
5046 struct lconv *lconv;
5047
5048 /*
5049 * Get locales
5050 */
5051 lconv = PGLC_localeconv();
5052
5053 /*
5054 * Positive / Negative number sign
5055 */
5056 if (lconv->negative_sign && *lconv->negative_sign)
5057 Np->L_negative_sign = lconv->negative_sign;
5058 else
5059 Np->L_negative_sign = "-";
5060
5061 if (lconv->positive_sign && *lconv->positive_sign)
5062 Np->L_positive_sign = lconv->positive_sign;
5063 else
5064 Np->L_positive_sign = "+";
5065
5066 /*
5067 * Number decimal point
5068 */
5069 if (lconv->decimal_point && *lconv->decimal_point)
5070 Np->decimal = lconv->decimal_point;
5071
5072 else
5073 Np->decimal = ".";
5074
5075 if (!IS_LDECIMAL(Np->Num))
5076 Np->decimal = ".";
5077
5078 /*
5079 * Number thousands separator
5080 *
5081 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5082 * but "" for thousands_sep, so we set the thousands_sep too.
5083 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5084 */
5085 if (lconv->thousands_sep && *lconv->thousands_sep)
5086 Np->L_thousands_sep = lconv->thousands_sep;
5087 /* Make sure thousands separator doesn't match decimal point symbol. */
5088 else if (strcmp(Np->decimal, ",") != 0)
5089 Np->L_thousands_sep = ",";
5090 else
5091 Np->L_thousands_sep = ".";
5092
5093 /*
5094 * Currency symbol
5095 */
5096 if (lconv->currency_symbol && *lconv->currency_symbol)
5097 Np->L_currency_symbol = lconv->currency_symbol;
5098 else
5099 Np->L_currency_symbol = " ";
5100 }
5101 else
5102 {
5103 /*
5104 * Default values
5105 */
5106 Np->L_negative_sign = "-";
5107 Np->L_positive_sign = "+";
5108 Np->decimal = ".";
5109
5110 Np->L_thousands_sep = ",";
5111 Np->L_currency_symbol = " ";
5112 }
5113 }
5114
5115 /* ----------
5116 * Return pointer of last relevant number after decimal point
5117 * 12.0500 --> last relevant is '5'
5118 * 12.0000 --> last relevant is '.'
5119 * If there is no decimal point, return NULL (which will result in same
5120 * behavior as if FM hadn't been specified).
5121 * ----------
5122 */
5123 static char *
get_last_relevant_decnum(char * num)5124 get_last_relevant_decnum(char *num)
5125 {
5126 char *result,
5127 *p = strchr(num, '.');
5128
5129 #ifdef DEBUG_TO_FROM_CHAR
5130 elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5131 #endif
5132
5133 if (!p)
5134 return NULL;
5135
5136 result = p;
5137
5138 while (*(++p))
5139 {
5140 if (*p != '0')
5141 result = p;
5142 }
5143
5144 return result;
5145 }
5146
5147 /*
5148 * These macros are used in NUM_processor() and its subsidiary routines.
5149 * OVERLOAD_TEST: true if we've reached end of input string
5150 * AMOUNT_TEST(s): true if at least s bytes remain in string
5151 */
5152 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
5153 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
5154
5155 /* ----------
5156 * Number extraction for TO_NUMBER()
5157 * ----------
5158 */
5159 static void
NUM_numpart_from_char(NUMProc * Np,int id,int input_len)5160 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5161 {
5162 bool isread = false;
5163
5164 #ifdef DEBUG_TO_FROM_CHAR
5165 elog(DEBUG_elog_output, " --- scan start --- id=%s",
5166 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5167 #endif
5168
5169 if (OVERLOAD_TEST)
5170 return;
5171
5172 if (*Np->inout_p == ' ')
5173 Np->inout_p++;
5174
5175 if (OVERLOAD_TEST)
5176 return;
5177
5178 /*
5179 * read sign before number
5180 */
5181 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5182 (Np->read_pre + Np->read_post) == 0)
5183 {
5184 #ifdef DEBUG_TO_FROM_CHAR
5185 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5186 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5187 #endif
5188
5189 /*
5190 * locale sign
5191 */
5192 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5193 {
5194 int x = 0;
5195
5196 #ifdef DEBUG_TO_FROM_CHAR
5197 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5198 #endif
5199 if ((x = strlen(Np->L_negative_sign)) &&
5200 AMOUNT_TEST(x) &&
5201 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5202 {
5203 Np->inout_p += x;
5204 *Np->number = '-';
5205 }
5206 else if ((x = strlen(Np->L_positive_sign)) &&
5207 AMOUNT_TEST(x) &&
5208 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5209 {
5210 Np->inout_p += x;
5211 *Np->number = '+';
5212 }
5213 }
5214 else
5215 {
5216 #ifdef DEBUG_TO_FROM_CHAR
5217 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5218 #endif
5219
5220 /*
5221 * simple + - < >
5222 */
5223 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5224 *Np->inout_p == '<'))
5225 {
5226 *Np->number = '-'; /* set - */
5227 Np->inout_p++;
5228 }
5229 else if (*Np->inout_p == '+')
5230 {
5231 *Np->number = '+'; /* set + */
5232 Np->inout_p++;
5233 }
5234 }
5235 }
5236
5237 if (OVERLOAD_TEST)
5238 return;
5239
5240 #ifdef DEBUG_TO_FROM_CHAR
5241 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5242 #endif
5243
5244 /*
5245 * read digit or decimal point
5246 */
5247 if (isdigit((unsigned char) *Np->inout_p))
5248 {
5249 if (Np->read_dec && Np->read_post == Np->Num->post)
5250 return;
5251
5252 *Np->number_p = *Np->inout_p;
5253 Np->number_p++;
5254
5255 if (Np->read_dec)
5256 Np->read_post++;
5257 else
5258 Np->read_pre++;
5259
5260 isread = true;
5261
5262 #ifdef DEBUG_TO_FROM_CHAR
5263 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5264 #endif
5265 }
5266 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5267 {
5268 /*
5269 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5270 * Np->decimal is always just "." if we don't have a D format token.
5271 * So we just unconditionally match to Np->decimal.
5272 */
5273 int x = strlen(Np->decimal);
5274
5275 #ifdef DEBUG_TO_FROM_CHAR
5276 elog(DEBUG_elog_output, "Try read decimal point (%c)",
5277 *Np->inout_p);
5278 #endif
5279 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5280 {
5281 Np->inout_p += x - 1;
5282 *Np->number_p = '.';
5283 Np->number_p++;
5284 Np->read_dec = true;
5285 isread = true;
5286 }
5287 }
5288
5289 if (OVERLOAD_TEST)
5290 return;
5291
5292 /*
5293 * Read sign behind "last" number
5294 *
5295 * We need sign detection because determine exact position of post-sign is
5296 * difficult:
5297 *
5298 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
5299 * 5.01-
5300 */
5301 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5302 {
5303 /*
5304 * locale sign (NUM_S) is always anchored behind a last number, if: -
5305 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5306 * next char is not digit
5307 */
5308 if (IS_LSIGN(Np->Num) && isread &&
5309 (Np->inout_p + 1) < Np->inout + input_len &&
5310 !isdigit((unsigned char) *(Np->inout_p + 1)))
5311 {
5312 int x;
5313 char *tmp = Np->inout_p++;
5314
5315 #ifdef DEBUG_TO_FROM_CHAR
5316 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5317 #endif
5318 if ((x = strlen(Np->L_negative_sign)) &&
5319 AMOUNT_TEST(x) &&
5320 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5321 {
5322 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5323 *Np->number = '-';
5324 }
5325 else if ((x = strlen(Np->L_positive_sign)) &&
5326 AMOUNT_TEST(x) &&
5327 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5328 {
5329 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5330 *Np->number = '+';
5331 }
5332 if (*Np->number == ' ')
5333 /* no sign read */
5334 Np->inout_p = tmp;
5335 }
5336
5337 /*
5338 * try read non-locale sign, it's happen only if format is not exact
5339 * and we cannot determine sign position of MI/PL/SG, an example:
5340 *
5341 * FM9.999999MI -> 5.01-
5342 *
5343 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5344 * like to_number('1 -', '9S') where sign is not anchored to last
5345 * number.
5346 */
5347 else if (isread == false && IS_LSIGN(Np->Num) == false &&
5348 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5349 {
5350 #ifdef DEBUG_TO_FROM_CHAR
5351 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5352 #endif
5353
5354 /*
5355 * simple + -
5356 */
5357 if (*Np->inout_p == '-' || *Np->inout_p == '+')
5358 /* NUM_processor() do inout_p++ */
5359 *Np->number = *Np->inout_p;
5360 }
5361 }
5362 }
5363
5364 #define IS_PREDEC_SPACE(_n) \
5365 (IS_ZERO((_n)->Num)==false && \
5366 (_n)->number == (_n)->number_p && \
5367 *(_n)->number == '0' && \
5368 (_n)->Num->post != 0)
5369
5370 /* ----------
5371 * Add digit or sign to number-string
5372 * ----------
5373 */
5374 static void
NUM_numpart_to_char(NUMProc * Np,int id)5375 NUM_numpart_to_char(NUMProc *Np, int id)
5376 {
5377 int end;
5378
5379 if (IS_ROMAN(Np->Num))
5380 return;
5381
5382 /* Note: in this elog() output not set '\0' in 'inout' */
5383
5384 #ifdef DEBUG_TO_FROM_CHAR
5385
5386 /*
5387 * Np->num_curr is number of current item in format-picture, it is not
5388 * current position in inout!
5389 */
5390 elog(DEBUG_elog_output,
5391 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5392 Np->sign_wrote,
5393 Np->num_curr,
5394 Np->number_p,
5395 Np->inout);
5396 #endif
5397 Np->num_in = false;
5398
5399 /*
5400 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5401 * handle "9.9" --> " .1"
5402 */
5403 if (Np->sign_wrote == false &&
5404 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5405 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5406 {
5407 if (IS_LSIGN(Np->Num))
5408 {
5409 if (Np->Num->lsign == NUM_LSIGN_PRE)
5410 {
5411 if (Np->sign == '-')
5412 strcpy(Np->inout_p, Np->L_negative_sign);
5413 else
5414 strcpy(Np->inout_p, Np->L_positive_sign);
5415 Np->inout_p += strlen(Np->inout_p);
5416 Np->sign_wrote = true;
5417 }
5418 }
5419 else if (IS_BRACKET(Np->Num))
5420 {
5421 *Np->inout_p = Np->sign == '+' ? ' ' : '<';
5422 ++Np->inout_p;
5423 Np->sign_wrote = true;
5424 }
5425 else if (Np->sign == '+')
5426 {
5427 if (!IS_FILLMODE(Np->Num))
5428 {
5429 *Np->inout_p = ' '; /* Write + */
5430 ++Np->inout_p;
5431 }
5432 Np->sign_wrote = true;
5433 }
5434 else if (Np->sign == '-')
5435 { /* Write - */
5436 *Np->inout_p = '-';
5437 ++Np->inout_p;
5438 Np->sign_wrote = true;
5439 }
5440 }
5441
5442
5443 /*
5444 * digits / FM / Zero / Dec. point
5445 */
5446 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
5447 {
5448 if (Np->num_curr < Np->out_pre_spaces &&
5449 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
5450 {
5451 /*
5452 * Write blank space
5453 */
5454 if (!IS_FILLMODE(Np->Num))
5455 {
5456 *Np->inout_p = ' '; /* Write ' ' */
5457 ++Np->inout_p;
5458 }
5459 }
5460 else if (IS_ZERO(Np->Num) &&
5461 Np->num_curr < Np->out_pre_spaces &&
5462 Np->Num->zero_start <= Np->num_curr)
5463 {
5464 /*
5465 * Write ZERO
5466 */
5467 *Np->inout_p = '0'; /* Write '0' */
5468 ++Np->inout_p;
5469 Np->num_in = true;
5470 }
5471 else
5472 {
5473 /*
5474 * Write Decimal point
5475 */
5476 if (*Np->number_p == '.')
5477 {
5478 if (!Np->last_relevant || *Np->last_relevant != '.')
5479 {
5480 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
5481 Np->inout_p += strlen(Np->inout_p);
5482 }
5483
5484 /*
5485 * Ora 'n' -- FM9.9 --> 'n.'
5486 */
5487 else if (IS_FILLMODE(Np->Num) &&
5488 Np->last_relevant && *Np->last_relevant == '.')
5489 {
5490 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
5491 Np->inout_p += strlen(Np->inout_p);
5492 }
5493 }
5494 else
5495 {
5496 /*
5497 * Write Digits
5498 */
5499 if (Np->last_relevant && Np->number_p > Np->last_relevant &&
5500 id != NUM_0)
5501 ;
5502
5503 /*
5504 * '0.1' -- 9.9 --> ' .1'
5505 */
5506 else if (IS_PREDEC_SPACE(Np))
5507 {
5508 if (!IS_FILLMODE(Np->Num))
5509 {
5510 *Np->inout_p = ' ';
5511 ++Np->inout_p;
5512 }
5513
5514 /*
5515 * '0' -- FM9.9 --> '0.'
5516 */
5517 else if (Np->last_relevant && *Np->last_relevant == '.')
5518 {
5519 *Np->inout_p = '0';
5520 ++Np->inout_p;
5521 }
5522 }
5523 else
5524 {
5525 *Np->inout_p = *Np->number_p; /* Write DIGIT */
5526 ++Np->inout_p;
5527 Np->num_in = true;
5528 }
5529 }
5530 /* do no exceed string length */
5531 if (*Np->number_p)
5532 ++Np->number_p;
5533 }
5534
5535 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
5536
5537 if (Np->last_relevant && Np->last_relevant == Np->number_p)
5538 end = Np->num_curr;
5539
5540 if (Np->num_curr + 1 == end)
5541 {
5542 if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
5543 {
5544 *Np->inout_p = Np->sign == '+' ? ' ' : '>';
5545 ++Np->inout_p;
5546 }
5547 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
5548 {
5549 if (Np->sign == '-')
5550 strcpy(Np->inout_p, Np->L_negative_sign);
5551 else
5552 strcpy(Np->inout_p, Np->L_positive_sign);
5553 Np->inout_p += strlen(Np->inout_p);
5554 }
5555 }
5556 }
5557
5558 ++Np->num_curr;
5559 }
5560
5561 /*
5562 * Skip over "n" input characters, but only if they aren't numeric data
5563 */
5564 static void
NUM_eat_non_data_chars(NUMProc * Np,int n,int input_len)5565 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
5566 {
5567 while (n-- > 0)
5568 {
5569 if (OVERLOAD_TEST)
5570 break; /* end of input */
5571 if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
5572 break; /* it's a data character */
5573 Np->inout_p += pg_mblen(Np->inout_p);
5574 }
5575 }
5576
5577 static char *
NUM_processor(FormatNode * node,NUMDesc * Num,char * inout,char * number,int input_len,int to_char_out_pre_spaces,int sign,bool is_to_char,Oid collid)5578 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
5579 char *number, int input_len, int to_char_out_pre_spaces,
5580 int sign, bool is_to_char, Oid collid)
5581 {
5582 FormatNode *n;
5583 NUMProc _Np,
5584 *Np = &_Np;
5585 const char *pattern;
5586 int pattern_len;
5587
5588 MemSet(Np, 0, sizeof(NUMProc));
5589
5590 Np->Num = Num;
5591 Np->is_to_char = is_to_char;
5592 Np->number = number;
5593 Np->inout = inout;
5594 Np->last_relevant = NULL;
5595 Np->read_post = 0;
5596 Np->read_pre = 0;
5597 Np->read_dec = false;
5598
5599 if (Np->Num->zero_start)
5600 --Np->Num->zero_start;
5601
5602 if (IS_EEEE(Np->Num))
5603 {
5604 if (!Np->is_to_char)
5605 ereport(ERROR,
5606 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5607 errmsg("\"EEEE\" not supported for input")));
5608 return strcpy(inout, number);
5609 }
5610
5611 /*
5612 * Roman correction
5613 */
5614 if (IS_ROMAN(Np->Num))
5615 {
5616 if (!Np->is_to_char)
5617 ereport(ERROR,
5618 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5619 errmsg("\"RN\" not supported for input")));
5620
5621 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
5622 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
5623
5624 if (IS_FILLMODE(Np->Num))
5625 {
5626 Np->Num->flag = 0;
5627 Np->Num->flag |= NUM_F_FILLMODE;
5628 }
5629 else
5630 Np->Num->flag = 0;
5631 Np->Num->flag |= NUM_F_ROMAN;
5632 }
5633
5634 /*
5635 * Sign
5636 */
5637 if (is_to_char)
5638 {
5639 Np->sign = sign;
5640
5641 /* MI/PL/SG - write sign itself and not in number */
5642 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
5643 {
5644 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
5645 Np->sign_wrote = false; /* need sign */
5646 else
5647 Np->sign_wrote = true; /* needn't sign */
5648 }
5649 else
5650 {
5651 if (Np->sign != '-')
5652 {
5653 if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
5654 Np->Num->flag &= ~NUM_F_BRACKET;
5655 if (IS_MINUS(Np->Num))
5656 Np->Num->flag &= ~NUM_F_MINUS;
5657 }
5658 else if (Np->sign != '+' && IS_PLUS(Np->Num))
5659 Np->Num->flag &= ~NUM_F_PLUS;
5660
5661 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
5662 Np->sign_wrote = true; /* needn't sign */
5663 else
5664 Np->sign_wrote = false; /* need sign */
5665
5666 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
5667 Np->Num->lsign = NUM_LSIGN_POST;
5668 }
5669 }
5670 else
5671 Np->sign = false;
5672
5673 /*
5674 * Count
5675 */
5676 Np->num_count = Np->Num->post + Np->Num->pre - 1;
5677
5678 if (is_to_char)
5679 {
5680 Np->out_pre_spaces = to_char_out_pre_spaces;
5681
5682 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
5683 {
5684 Np->last_relevant = get_last_relevant_decnum(Np->number);
5685
5686 /*
5687 * If any '0' specifiers are present, make sure we don't strip
5688 * those digits.
5689 */
5690 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
5691 {
5692 char *last_zero;
5693
5694 last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
5695 if (Np->last_relevant < last_zero)
5696 Np->last_relevant = last_zero;
5697 }
5698 }
5699
5700 if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
5701 ++Np->num_count;
5702 }
5703 else
5704 {
5705 Np->out_pre_spaces = 0;
5706 *Np->number = ' '; /* sign space */
5707 *(Np->number + 1) = '\0';
5708 }
5709
5710 Np->num_in = 0;
5711 Np->num_curr = 0;
5712
5713 #ifdef DEBUG_TO_FROM_CHAR
5714 elog(DEBUG_elog_output,
5715 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5716 Np->sign,
5717 Np->number,
5718 Np->Num->pre,
5719 Np->Num->post,
5720 Np->num_count,
5721 Np->out_pre_spaces,
5722 Np->sign_wrote ? "Yes" : "No",
5723 IS_ZERO(Np->Num) ? "Yes" : "No",
5724 Np->Num->zero_start,
5725 Np->Num->zero_end,
5726 Np->last_relevant ? Np->last_relevant : "<not set>",
5727 IS_BRACKET(Np->Num) ? "Yes" : "No",
5728 IS_PLUS(Np->Num) ? "Yes" : "No",
5729 IS_MINUS(Np->Num) ? "Yes" : "No",
5730 IS_FILLMODE(Np->Num) ? "Yes" : "No",
5731 IS_ROMAN(Np->Num) ? "Yes" : "No",
5732 IS_EEEE(Np->Num) ? "Yes" : "No"
5733 );
5734 #endif
5735
5736 /*
5737 * Locale
5738 */
5739 NUM_prepare_locale(Np);
5740
5741 /*
5742 * Processor direct cycle
5743 */
5744 if (Np->is_to_char)
5745 Np->number_p = Np->number;
5746 else
5747 Np->number_p = Np->number + 1; /* first char is space for sign */
5748
5749 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5750 {
5751 if (!Np->is_to_char)
5752 {
5753 /*
5754 * Check at least one byte remains to be scanned. (In actions
5755 * below, must use AMOUNT_TEST if we want to read more bytes than
5756 * that.)
5757 */
5758 if (OVERLOAD_TEST)
5759 break;
5760 }
5761
5762 /*
5763 * Format pictures actions
5764 */
5765 if (n->type == NODE_TYPE_ACTION)
5766 {
5767 /*
5768 * Create/read digit/zero/blank/sign/special-case
5769 *
5770 * 'NUM_S' note: The locale sign is anchored to number and we
5771 * read/write it when we work with first or last number
5772 * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
5773 *
5774 * Notice the "Np->inout_p++" at the bottom of the loop. This is
5775 * why most of the actions advance inout_p one less than you might
5776 * expect. In cases where we don't want that increment to happen,
5777 * a switch case ends with "continue" not "break".
5778 */
5779 switch (n->key->id)
5780 {
5781 case NUM_9:
5782 case NUM_0:
5783 case NUM_DEC:
5784 case NUM_D:
5785 if (Np->is_to_char)
5786 {
5787 NUM_numpart_to_char(Np, n->key->id);
5788 continue; /* for() */
5789 }
5790 else
5791 {
5792 NUM_numpart_from_char(Np, n->key->id, input_len);
5793 break; /* switch() case: */
5794 }
5795
5796 case NUM_COMMA:
5797 if (Np->is_to_char)
5798 {
5799 if (!Np->num_in)
5800 {
5801 if (IS_FILLMODE(Np->Num))
5802 continue;
5803 else
5804 *Np->inout_p = ' ';
5805 }
5806 else
5807 *Np->inout_p = ',';
5808 }
5809 else
5810 {
5811 if (!Np->num_in)
5812 {
5813 if (IS_FILLMODE(Np->Num))
5814 continue;
5815 }
5816 if (*Np->inout_p != ',')
5817 continue;
5818 }
5819 break;
5820
5821 case NUM_G:
5822 pattern = Np->L_thousands_sep;
5823 pattern_len = strlen(pattern);
5824 if (Np->is_to_char)
5825 {
5826 if (!Np->num_in)
5827 {
5828 if (IS_FILLMODE(Np->Num))
5829 continue;
5830 else
5831 {
5832 /* just in case there are MB chars */
5833 pattern_len = pg_mbstrlen(pattern);
5834 memset(Np->inout_p, ' ', pattern_len);
5835 Np->inout_p += pattern_len - 1;
5836 }
5837 }
5838 else
5839 {
5840 strcpy(Np->inout_p, pattern);
5841 Np->inout_p += pattern_len - 1;
5842 }
5843 }
5844 else
5845 {
5846 if (!Np->num_in)
5847 {
5848 if (IS_FILLMODE(Np->Num))
5849 continue;
5850 }
5851
5852 /*
5853 * Because L_thousands_sep typically contains data
5854 * characters (either '.' or ','), we can't use
5855 * NUM_eat_non_data_chars here. Instead skip only if
5856 * the input matches L_thousands_sep.
5857 */
5858 if (AMOUNT_TEST(pattern_len) &&
5859 strncmp(Np->inout_p, pattern, pattern_len) == 0)
5860 Np->inout_p += pattern_len - 1;
5861 else
5862 continue;
5863 }
5864 break;
5865
5866 case NUM_L:
5867 pattern = Np->L_currency_symbol;
5868 if (Np->is_to_char)
5869 {
5870 strcpy(Np->inout_p, pattern);
5871 Np->inout_p += strlen(pattern) - 1;
5872 }
5873 else
5874 {
5875 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5876 continue;
5877 }
5878 break;
5879
5880 case NUM_RN:
5881 if (IS_FILLMODE(Np->Num))
5882 {
5883 strcpy(Np->inout_p, Np->number_p);
5884 Np->inout_p += strlen(Np->inout_p) - 1;
5885 }
5886 else
5887 {
5888 sprintf(Np->inout_p, "%15s", Np->number_p);
5889 Np->inout_p += strlen(Np->inout_p) - 1;
5890 }
5891 break;
5892
5893 case NUM_rn:
5894 if (IS_FILLMODE(Np->Num))
5895 {
5896 strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5897 Np->inout_p += strlen(Np->inout_p) - 1;
5898 }
5899 else
5900 {
5901 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5902 Np->inout_p += strlen(Np->inout_p) - 1;
5903 }
5904 break;
5905
5906 case NUM_th:
5907 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5908 Np->sign == '-' || IS_DECIMAL(Np->Num))
5909 continue;
5910
5911 if (Np->is_to_char)
5912 {
5913 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5914 Np->inout_p += 1;
5915 }
5916 else
5917 {
5918 /* All variants of 'th' occupy 2 characters */
5919 NUM_eat_non_data_chars(Np, 2, input_len);
5920 continue;
5921 }
5922 break;
5923
5924 case NUM_TH:
5925 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5926 Np->sign == '-' || IS_DECIMAL(Np->Num))
5927 continue;
5928
5929 if (Np->is_to_char)
5930 {
5931 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5932 Np->inout_p += 1;
5933 }
5934 else
5935 {
5936 /* All variants of 'TH' occupy 2 characters */
5937 NUM_eat_non_data_chars(Np, 2, input_len);
5938 continue;
5939 }
5940 break;
5941
5942 case NUM_MI:
5943 if (Np->is_to_char)
5944 {
5945 if (Np->sign == '-')
5946 *Np->inout_p = '-';
5947 else if (IS_FILLMODE(Np->Num))
5948 continue;
5949 else
5950 *Np->inout_p = ' ';
5951 }
5952 else
5953 {
5954 if (*Np->inout_p == '-')
5955 *Np->number = '-';
5956 else
5957 {
5958 NUM_eat_non_data_chars(Np, 1, input_len);
5959 continue;
5960 }
5961 }
5962 break;
5963
5964 case NUM_PL:
5965 if (Np->is_to_char)
5966 {
5967 if (Np->sign == '+')
5968 *Np->inout_p = '+';
5969 else if (IS_FILLMODE(Np->Num))
5970 continue;
5971 else
5972 *Np->inout_p = ' ';
5973 }
5974 else
5975 {
5976 if (*Np->inout_p == '+')
5977 *Np->number = '+';
5978 else
5979 {
5980 NUM_eat_non_data_chars(Np, 1, input_len);
5981 continue;
5982 }
5983 }
5984 break;
5985
5986 case NUM_SG:
5987 if (Np->is_to_char)
5988 *Np->inout_p = Np->sign;
5989 else
5990 {
5991 if (*Np->inout_p == '-')
5992 *Np->number = '-';
5993 else if (*Np->inout_p == '+')
5994 *Np->number = '+';
5995 else
5996 {
5997 NUM_eat_non_data_chars(Np, 1, input_len);
5998 continue;
5999 }
6000 }
6001 break;
6002
6003 default:
6004 continue;
6005 break;
6006 }
6007 }
6008 else
6009 {
6010 /*
6011 * In TO_CHAR, non-pattern characters in the format are copied to
6012 * the output. In TO_NUMBER, we skip one input character for each
6013 * non-pattern format character, whether or not it matches the
6014 * format character.
6015 */
6016 if (Np->is_to_char)
6017 {
6018 strcpy(Np->inout_p, n->character);
6019 Np->inout_p += strlen(Np->inout_p);
6020 }
6021 else
6022 {
6023 Np->inout_p += pg_mblen(Np->inout_p);
6024 }
6025 continue;
6026 }
6027 Np->inout_p++;
6028 }
6029
6030 if (Np->is_to_char)
6031 {
6032 *Np->inout_p = '\0';
6033 return Np->inout;
6034 }
6035 else
6036 {
6037 if (*(Np->number_p - 1) == '.')
6038 *(Np->number_p - 1) = '\0';
6039 else
6040 *Np->number_p = '\0';
6041
6042 /*
6043 * Correction - precision of dec. number
6044 */
6045 Np->Num->post = Np->read_post;
6046
6047 #ifdef DEBUG_TO_FROM_CHAR
6048 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
6049 #endif
6050 return Np->number;
6051 }
6052 }
6053
6054 /* ----------
6055 * MACRO: Start part of NUM - for all NUM's to_char variants
6056 * (sorry, but I hate copy same code - macro is better..)
6057 * ----------
6058 */
6059 #define NUM_TOCHAR_prepare \
6060 do { \
6061 int len = VARSIZE_ANY_EXHDR(fmt); \
6062 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
6063 PG_RETURN_TEXT_P(cstring_to_text("")); \
6064 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
6065 format = NUM_cache(len, &Num, fmt, &shouldFree); \
6066 } while (0)
6067
6068 /* ----------
6069 * MACRO: Finish part of NUM
6070 * ----------
6071 */
6072 #define NUM_TOCHAR_finish \
6073 do { \
6074 int len; \
6075 \
6076 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
6077 \
6078 if (shouldFree) \
6079 pfree(format); \
6080 \
6081 /* \
6082 * Convert null-terminated representation of result to standard text. \
6083 * The result is usually much bigger than it needs to be, but there \
6084 * seems little point in realloc'ing it smaller. \
6085 */ \
6086 len = strlen(VARDATA(result)); \
6087 SET_VARSIZE(result, len + VARHDRSZ); \
6088 } while (0)
6089
6090 /* -------------------
6091 * NUMERIC to_number() (convert string to numeric)
6092 * -------------------
6093 */
6094 Datum
numeric_to_number(PG_FUNCTION_ARGS)6095 numeric_to_number(PG_FUNCTION_ARGS)
6096 {
6097 text *value = PG_GETARG_TEXT_PP(0);
6098 text *fmt = PG_GETARG_TEXT_PP(1);
6099 NUMDesc Num;
6100 Datum result;
6101 FormatNode *format;
6102 char *numstr;
6103 bool shouldFree;
6104 int len = 0;
6105 int scale,
6106 precision;
6107
6108 len = VARSIZE_ANY_EXHDR(fmt);
6109
6110 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
6111 PG_RETURN_NULL();
6112
6113 format = NUM_cache(len, &Num, fmt, &shouldFree);
6114
6115 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
6116
6117 NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
6118 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
6119
6120 scale = Num.post;
6121 precision = Num.pre + Num.multi + scale;
6122
6123 if (shouldFree)
6124 pfree(format);
6125
6126 result = DirectFunctionCall3(numeric_in,
6127 CStringGetDatum(numstr),
6128 ObjectIdGetDatum(InvalidOid),
6129 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
6130
6131 if (IS_MULTI(&Num))
6132 {
6133 Numeric x;
6134 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6135 Int32GetDatum(10)));
6136 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6137 Int32GetDatum(-Num.multi)));
6138
6139 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6140 NumericGetDatum(a),
6141 NumericGetDatum(b)));
6142 result = DirectFunctionCall2(numeric_mul,
6143 result,
6144 NumericGetDatum(x));
6145 }
6146
6147 pfree(numstr);
6148 return result;
6149 }
6150
6151 /* ------------------
6152 * NUMERIC to_char()
6153 * ------------------
6154 */
6155 Datum
numeric_to_char(PG_FUNCTION_ARGS)6156 numeric_to_char(PG_FUNCTION_ARGS)
6157 {
6158 Numeric value = PG_GETARG_NUMERIC(0);
6159 text *fmt = PG_GETARG_TEXT_PP(1);
6160 NUMDesc Num;
6161 FormatNode *format;
6162 text *result;
6163 bool shouldFree;
6164 int out_pre_spaces = 0,
6165 sign = 0;
6166 char *numstr,
6167 *orgnum,
6168 *p;
6169 Numeric x;
6170
6171 NUM_TOCHAR_prepare;
6172
6173 /*
6174 * On DateType depend part (numeric)
6175 */
6176 if (IS_ROMAN(&Num))
6177 {
6178 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6179 NumericGetDatum(value),
6180 Int32GetDatum(0)));
6181 numstr = orgnum =
6182 int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
6183 NumericGetDatum(x))));
6184 }
6185 else if (IS_EEEE(&Num))
6186 {
6187 orgnum = numeric_out_sci(value, Num.post);
6188
6189 /*
6190 * numeric_out_sci() does not emit a sign for positive numbers. We
6191 * need to add a space in this case so that positive and negative
6192 * numbers are aligned. We also have to do the right thing for NaN.
6193 */
6194 if (strcmp(orgnum, "NaN") == 0)
6195 {
6196 /*
6197 * Allow 6 characters for the leading sign, the decimal point,
6198 * "e", the exponent's sign and two exponent digits.
6199 */
6200 numstr = (char *) palloc(Num.pre + Num.post + 7);
6201 fill_str(numstr, '#', Num.pre + Num.post + 6);
6202 *numstr = ' ';
6203 *(numstr + Num.pre + 1) = '.';
6204 }
6205 else if (*orgnum != '-')
6206 {
6207 numstr = (char *) palloc(strlen(orgnum) + 2);
6208 *numstr = ' ';
6209 strcpy(numstr + 1, orgnum);
6210 }
6211 else
6212 {
6213 numstr = orgnum;
6214 }
6215 }
6216 else
6217 {
6218 int numstr_pre_len;
6219 Numeric val = value;
6220
6221 if (IS_MULTI(&Num))
6222 {
6223 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6224 Int32GetDatum(10)));
6225 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6226 Int32GetDatum(Num.multi)));
6227
6228 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6229 NumericGetDatum(a),
6230 NumericGetDatum(b)));
6231 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
6232 NumericGetDatum(value),
6233 NumericGetDatum(x)));
6234 Num.pre += Num.multi;
6235 }
6236
6237 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6238 NumericGetDatum(val),
6239 Int32GetDatum(Num.post)));
6240 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
6241 NumericGetDatum(x)));
6242
6243 if (*orgnum == '-')
6244 {
6245 sign = '-';
6246 numstr = orgnum + 1;
6247 }
6248 else
6249 {
6250 sign = '+';
6251 numstr = orgnum;
6252 }
6253
6254 if ((p = strchr(numstr, '.')))
6255 numstr_pre_len = p - numstr;
6256 else
6257 numstr_pre_len = strlen(numstr);
6258
6259 /* needs padding? */
6260 if (numstr_pre_len < Num.pre)
6261 out_pre_spaces = Num.pre - numstr_pre_len;
6262 /* overflowed prefix digit format? */
6263 else if (numstr_pre_len > Num.pre)
6264 {
6265 numstr = (char *) palloc(Num.pre + Num.post + 2);
6266 fill_str(numstr, '#', Num.pre + Num.post + 1);
6267 *(numstr + Num.pre) = '.';
6268 }
6269 }
6270
6271 NUM_TOCHAR_finish;
6272 PG_RETURN_TEXT_P(result);
6273 }
6274
6275 /* ---------------
6276 * INT4 to_char()
6277 * ---------------
6278 */
6279 Datum
int4_to_char(PG_FUNCTION_ARGS)6280 int4_to_char(PG_FUNCTION_ARGS)
6281 {
6282 int32 value = PG_GETARG_INT32(0);
6283 text *fmt = PG_GETARG_TEXT_PP(1);
6284 NUMDesc Num;
6285 FormatNode *format;
6286 text *result;
6287 bool shouldFree;
6288 int out_pre_spaces = 0,
6289 sign = 0;
6290 char *numstr,
6291 *orgnum;
6292
6293 NUM_TOCHAR_prepare;
6294
6295 /*
6296 * On DateType depend part (int32)
6297 */
6298 if (IS_ROMAN(&Num))
6299 numstr = orgnum = int_to_roman(value);
6300 else if (IS_EEEE(&Num))
6301 {
6302 /* we can do it easily because float8 won't lose any precision */
6303 float8 val = (float8) value;
6304
6305 orgnum = (char *) psprintf("%+.*e", Num.post, val);
6306
6307 /*
6308 * Swap a leading positive sign for a space.
6309 */
6310 if (*orgnum == '+')
6311 *orgnum = ' ';
6312
6313 numstr = orgnum;
6314 }
6315 else
6316 {
6317 int numstr_pre_len;
6318
6319 if (IS_MULTI(&Num))
6320 {
6321 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6322 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
6323 Num.pre += Num.multi;
6324 }
6325 else
6326 {
6327 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6328 Int32GetDatum(value)));
6329 }
6330
6331 if (*orgnum == '-')
6332 {
6333 sign = '-';
6334 orgnum++;
6335 }
6336 else
6337 sign = '+';
6338
6339 numstr_pre_len = strlen(orgnum);
6340
6341 /* post-decimal digits? Pad out with zeros. */
6342 if (Num.post)
6343 {
6344 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6345 strcpy(numstr, orgnum);
6346 *(numstr + numstr_pre_len) = '.';
6347 memset(numstr + numstr_pre_len + 1, '0', Num.post);
6348 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6349 }
6350 else
6351 numstr = orgnum;
6352
6353 /* needs padding? */
6354 if (numstr_pre_len < Num.pre)
6355 out_pre_spaces = Num.pre - numstr_pre_len;
6356 /* overflowed prefix digit format? */
6357 else if (numstr_pre_len > Num.pre)
6358 {
6359 numstr = (char *) palloc(Num.pre + Num.post + 2);
6360 fill_str(numstr, '#', Num.pre + Num.post + 1);
6361 *(numstr + Num.pre) = '.';
6362 }
6363 }
6364
6365 NUM_TOCHAR_finish;
6366 PG_RETURN_TEXT_P(result);
6367 }
6368
6369 /* ---------------
6370 * INT8 to_char()
6371 * ---------------
6372 */
6373 Datum
int8_to_char(PG_FUNCTION_ARGS)6374 int8_to_char(PG_FUNCTION_ARGS)
6375 {
6376 int64 value = PG_GETARG_INT64(0);
6377 text *fmt = PG_GETARG_TEXT_PP(1);
6378 NUMDesc Num;
6379 FormatNode *format;
6380 text *result;
6381 bool shouldFree;
6382 int out_pre_spaces = 0,
6383 sign = 0;
6384 char *numstr,
6385 *orgnum;
6386
6387 NUM_TOCHAR_prepare;
6388
6389 /*
6390 * On DateType depend part (int32)
6391 */
6392 if (IS_ROMAN(&Num))
6393 {
6394 /* Currently don't support int8 conversion to roman... */
6395 numstr = orgnum = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
6396 }
6397 else if (IS_EEEE(&Num))
6398 {
6399 /* to avoid loss of precision, must go via numeric not float8 */
6400 Numeric val;
6401
6402 val = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
6403 Int64GetDatum(value)));
6404 orgnum = numeric_out_sci(val, Num.post);
6405
6406 /*
6407 * numeric_out_sci() does not emit a sign for positive numbers. We
6408 * need to add a space in this case so that positive and negative
6409 * numbers are aligned. We don't have to worry about NaN here.
6410 */
6411 if (*orgnum != '-')
6412 {
6413 numstr = (char *) palloc(strlen(orgnum) + 2);
6414 *numstr = ' ';
6415 strcpy(numstr + 1, orgnum);
6416 }
6417 else
6418 {
6419 numstr = orgnum;
6420 }
6421 }
6422 else
6423 {
6424 int numstr_pre_len;
6425
6426 if (IS_MULTI(&Num))
6427 {
6428 double multi = pow((double) 10, (double) Num.multi);
6429
6430 value = DatumGetInt64(DirectFunctionCall2(int8mul,
6431 Int64GetDatum(value),
6432 DirectFunctionCall1(dtoi8,
6433 Float8GetDatum(multi))));
6434 Num.pre += Num.multi;
6435 }
6436
6437 orgnum = DatumGetCString(DirectFunctionCall1(int8out,
6438 Int64GetDatum(value)));
6439
6440 if (*orgnum == '-')
6441 {
6442 sign = '-';
6443 orgnum++;
6444 }
6445 else
6446 sign = '+';
6447
6448 numstr_pre_len = strlen(orgnum);
6449
6450 /* post-decimal digits? Pad out with zeros. */
6451 if (Num.post)
6452 {
6453 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6454 strcpy(numstr, orgnum);
6455 *(numstr + numstr_pre_len) = '.';
6456 memset(numstr + numstr_pre_len + 1, '0', Num.post);
6457 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6458 }
6459 else
6460 numstr = orgnum;
6461
6462 /* needs padding? */
6463 if (numstr_pre_len < Num.pre)
6464 out_pre_spaces = Num.pre - numstr_pre_len;
6465 /* overflowed prefix digit format? */
6466 else if (numstr_pre_len > Num.pre)
6467 {
6468 numstr = (char *) palloc(Num.pre + Num.post + 2);
6469 fill_str(numstr, '#', Num.pre + Num.post + 1);
6470 *(numstr + Num.pre) = '.';
6471 }
6472 }
6473
6474 NUM_TOCHAR_finish;
6475 PG_RETURN_TEXT_P(result);
6476 }
6477
6478 /* -----------------
6479 * FLOAT4 to_char()
6480 * -----------------
6481 */
6482 Datum
float4_to_char(PG_FUNCTION_ARGS)6483 float4_to_char(PG_FUNCTION_ARGS)
6484 {
6485 float4 value = PG_GETARG_FLOAT4(0);
6486 text *fmt = PG_GETARG_TEXT_PP(1);
6487 NUMDesc Num;
6488 FormatNode *format;
6489 text *result;
6490 bool shouldFree;
6491 int out_pre_spaces = 0,
6492 sign = 0;
6493 char *numstr,
6494 *orgnum,
6495 *p;
6496
6497 NUM_TOCHAR_prepare;
6498
6499 if (IS_ROMAN(&Num))
6500 numstr = orgnum = int_to_roman((int) rint(value));
6501 else if (IS_EEEE(&Num))
6502 {
6503 if (isnan(value) || isinf(value))
6504 {
6505 /*
6506 * Allow 6 characters for the leading sign, the decimal point,
6507 * "e", the exponent's sign and two exponent digits.
6508 */
6509 numstr = (char *) palloc(Num.pre + Num.post + 7);
6510 fill_str(numstr, '#', Num.pre + Num.post + 6);
6511 *numstr = ' ';
6512 *(numstr + Num.pre + 1) = '.';
6513 }
6514 else
6515 {
6516 numstr = orgnum = psprintf("%+.*e", Num.post, value);
6517
6518 /*
6519 * Swap a leading positive sign for a space.
6520 */
6521 if (*orgnum == '+')
6522 *orgnum = ' ';
6523
6524 numstr = orgnum;
6525 }
6526 }
6527 else
6528 {
6529 float4 val = value;
6530 int numstr_pre_len;
6531
6532 if (IS_MULTI(&Num))
6533 {
6534 float multi = pow((double) 10, (double) Num.multi);
6535
6536 val = value * multi;
6537 Num.pre += Num.multi;
6538 }
6539
6540 orgnum = (char *) psprintf("%.0f", fabs(val));
6541 numstr_pre_len = strlen(orgnum);
6542
6543 /* adjust post digits to fit max float digits */
6544 if (numstr_pre_len >= FLT_DIG)
6545 Num.post = 0;
6546 else if (numstr_pre_len + Num.post > FLT_DIG)
6547 Num.post = FLT_DIG - numstr_pre_len;
6548 orgnum = psprintf("%.*f", Num.post, val);
6549
6550 if (*orgnum == '-')
6551 { /* < 0 */
6552 sign = '-';
6553 numstr = orgnum + 1;
6554 }
6555 else
6556 {
6557 sign = '+';
6558 numstr = orgnum;
6559 }
6560
6561 if ((p = strchr(numstr, '.')))
6562 numstr_pre_len = p - numstr;
6563 else
6564 numstr_pre_len = strlen(numstr);
6565
6566 /* needs padding? */
6567 if (numstr_pre_len < Num.pre)
6568 out_pre_spaces = Num.pre - numstr_pre_len;
6569 /* overflowed prefix digit format? */
6570 else if (numstr_pre_len > Num.pre)
6571 {
6572 numstr = (char *) palloc(Num.pre + Num.post + 2);
6573 fill_str(numstr, '#', Num.pre + Num.post + 1);
6574 *(numstr + Num.pre) = '.';
6575 }
6576 }
6577
6578 NUM_TOCHAR_finish;
6579 PG_RETURN_TEXT_P(result);
6580 }
6581
6582 /* -----------------
6583 * FLOAT8 to_char()
6584 * -----------------
6585 */
6586 Datum
float8_to_char(PG_FUNCTION_ARGS)6587 float8_to_char(PG_FUNCTION_ARGS)
6588 {
6589 float8 value = PG_GETARG_FLOAT8(0);
6590 text *fmt = PG_GETARG_TEXT_PP(1);
6591 NUMDesc Num;
6592 FormatNode *format;
6593 text *result;
6594 bool shouldFree;
6595 int out_pre_spaces = 0,
6596 sign = 0;
6597 char *numstr,
6598 *orgnum,
6599 *p;
6600
6601 NUM_TOCHAR_prepare;
6602
6603 if (IS_ROMAN(&Num))
6604 numstr = orgnum = int_to_roman((int) rint(value));
6605 else if (IS_EEEE(&Num))
6606 {
6607 if (isnan(value) || isinf(value))
6608 {
6609 /*
6610 * Allow 6 characters for the leading sign, the decimal point,
6611 * "e", the exponent's sign and two exponent digits.
6612 */
6613 numstr = (char *) palloc(Num.pre + Num.post + 7);
6614 fill_str(numstr, '#', Num.pre + Num.post + 6);
6615 *numstr = ' ';
6616 *(numstr + Num.pre + 1) = '.';
6617 }
6618 else
6619 {
6620 numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value);
6621
6622 /*
6623 * Swap a leading positive sign for a space.
6624 */
6625 if (*orgnum == '+')
6626 *orgnum = ' ';
6627
6628 numstr = orgnum;
6629 }
6630 }
6631 else
6632 {
6633 float8 val = value;
6634 int numstr_pre_len;
6635
6636 if (IS_MULTI(&Num))
6637 {
6638 double multi = pow((double) 10, (double) Num.multi);
6639
6640 val = value * multi;
6641 Num.pre += Num.multi;
6642 }
6643 orgnum = psprintf("%.0f", fabs(val));
6644 numstr_pre_len = strlen(orgnum);
6645
6646 /* adjust post digits to fit max double digits */
6647 if (numstr_pre_len >= DBL_DIG)
6648 Num.post = 0;
6649 else if (numstr_pre_len + Num.post > DBL_DIG)
6650 Num.post = DBL_DIG - numstr_pre_len;
6651 orgnum = psprintf("%.*f", Num.post, val);
6652
6653 if (*orgnum == '-')
6654 { /* < 0 */
6655 sign = '-';
6656 numstr = orgnum + 1;
6657 }
6658 else
6659 {
6660 sign = '+';
6661 numstr = orgnum;
6662 }
6663
6664 if ((p = strchr(numstr, '.')))
6665 numstr_pre_len = p - numstr;
6666 else
6667 numstr_pre_len = strlen(numstr);
6668
6669 /* needs padding? */
6670 if (numstr_pre_len < Num.pre)
6671 out_pre_spaces = Num.pre - numstr_pre_len;
6672 /* overflowed prefix digit format? */
6673 else if (numstr_pre_len > Num.pre)
6674 {
6675 numstr = (char *) palloc(Num.pre + Num.post + 2);
6676 fill_str(numstr, '#', Num.pre + Num.post + 1);
6677 *(numstr + Num.pre) = '.';
6678 }
6679 }
6680
6681 NUM_TOCHAR_finish;
6682 PG_RETURN_TEXT_P(result);
6683 }
6684