1 /* -----------------------------------------------------------------------
2 * formatting.c
3 *
4 * src/backend/utils/adt/formatting.c
5 *
6 *
7 * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group
8 *
9 *
10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11 *
12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14 *
15 *
16 * Cache & Memory:
17 * Routines use (itself) internal cache for format pictures.
18 *
19 * The cache uses a static buffer and is persistent across transactions. If
20 * the format-picture is bigger than the cache buffer, the parser is called
21 * always.
22 *
23 * NOTE for Number version:
24 * All in this version is implemented as keywords ( => not used
25 * suffixes), because a format picture is for *one* item (number)
26 * only. It not is as a timestamp version, where each keyword (can)
27 * has suffix.
28 *
29 * NOTE for Timestamp routines:
30 * In this module the POSIX 'struct tm' type is *not* used, but rather
31 * PgSQL type, which has tm_mon based on one (*non* zero) and
32 * year *not* based on 1900, but is used full year number.
33 * Module supports AD / BC / AM / PM.
34 *
35 * Supported types for to_char():
36 *
37 * Timestamp, Numeric, int4, int8, float4, float8
38 *
39 * Supported types for reverse conversion:
40 *
41 * Timestamp - to_timestamp()
42 * Date - to_date()
43 * Numeric - to_number()
44 *
45 *
46 * Karel Zak
47 *
48 * TODO
49 * - better number building (formatting) / parsing, now it isn't
50 * ideal code
51 * - use Assert()
52 * - add support for roman number to standard number conversion
53 * - add support for number spelling
54 * - add support for string to string formatting (we must be better
55 * than Oracle :-),
56 * to_char('Hello', 'X X X X X') -> 'H e l l o'
57 *
58 * -----------------------------------------------------------------------
59 */
60
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64
65 #include "postgres.h"
66
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72
73 /*
74 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75 * declare them in <wchar.h>.
76 */
77 #ifdef HAVE_WCHAR_H
78 #include <wchar.h>
79 #endif
80 #ifdef HAVE_WCTYPE_H
81 #include <wctype.h>
82 #endif
83
84 #ifdef USE_ICU
85 #include <unicode/ustring.h>
86 #endif
87
88 #include "catalog/pg_collation.h"
89 #include "mb/pg_wchar.h"
90 #include "parser/scansup.h"
91 #include "utils/builtins.h"
92 #include "utils/date.h"
93 #include "utils/datetime.h"
94 #include "utils/float.h"
95 #include "utils/formatting.h"
96 #include "utils/int8.h"
97 #include "utils/memutils.h"
98 #include "utils/numeric.h"
99 #include "utils/pg_locale.h"
100
101 /* ----------
102 * Routines type
103 * ----------
104 */
105 #define DCH_TYPE 1 /* DATE-TIME version */
106 #define NUM_TYPE 2 /* NUMBER version */
107
108 /* ----------
109 * KeyWord Index (ascii from position 32 (' ') to 126 (~))
110 * ----------
111 */
112 #define KeyWord_INDEX_SIZE ('~' - ' ')
113 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
114
115 /* ----------
116 * Maximal length of one node
117 * ----------
118 */
119 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
120 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
121
122
123 /* ----------
124 * Format parser structs
125 * ----------
126 */
127 typedef struct
128 {
129 const char *name; /* suffix string */
130 int len, /* suffix length */
131 id, /* used in node->suffix */
132 type; /* prefix / postfix */
133 } KeySuffix;
134
135 /* ----------
136 * FromCharDateMode
137 * ----------
138 *
139 * This value is used to nominate one of several distinct (and mutually
140 * exclusive) date conventions that a keyword can belong to.
141 */
142 typedef enum
143 {
144 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
145 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
146 FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
147 } FromCharDateMode;
148
149 typedef struct
150 {
151 const char *name;
152 int len;
153 int id;
154 bool is_digit;
155 FromCharDateMode date_mode;
156 } KeyWord;
157
158 typedef struct
159 {
160 uint8 type; /* NODE_TYPE_XXX, see below */
161 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
162 uint8 suffix; /* keyword prefix/suffix code, if any */
163 const KeyWord *key; /* if type is ACTION */
164 } FormatNode;
165
166 #define NODE_TYPE_END 1
167 #define NODE_TYPE_ACTION 2
168 #define NODE_TYPE_CHAR 3
169 #define NODE_TYPE_SEPARATOR 4
170 #define NODE_TYPE_SPACE 5
171
172 #define SUFFTYPE_PREFIX 1
173 #define SUFFTYPE_POSTFIX 2
174
175 #define CLOCK_24_HOUR 0
176 #define CLOCK_12_HOUR 1
177
178
179 /* ----------
180 * Full months
181 * ----------
182 */
183 static const char *const months_full[] = {
184 "January", "February", "March", "April", "May", "June", "July",
185 "August", "September", "October", "November", "December", NULL
186 };
187
188 static const char *const days_short[] = {
189 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
190 };
191
192 /* ----------
193 * AD / BC
194 * ----------
195 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
196 * positive and map year == -1 to year zero, and shift all negative
197 * years up one. For interval years, we just return the year.
198 */
199 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
200
201 #define A_D_STR "A.D."
202 #define a_d_STR "a.d."
203 #define AD_STR "AD"
204 #define ad_STR "ad"
205
206 #define B_C_STR "B.C."
207 #define b_c_STR "b.c."
208 #define BC_STR "BC"
209 #define bc_STR "bc"
210
211 /*
212 * AD / BC strings for seq_search.
213 *
214 * These are given in two variants, a long form with periods and a standard
215 * form without.
216 *
217 * The array is laid out such that matches for AD have an even index, and
218 * matches for BC have an odd index. So the boolean value for BC is given by
219 * taking the array index of the match, modulo 2.
220 */
221 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
222 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
223
224 /* ----------
225 * AM / PM
226 * ----------
227 */
228 #define A_M_STR "A.M."
229 #define a_m_STR "a.m."
230 #define AM_STR "AM"
231 #define am_STR "am"
232
233 #define P_M_STR "P.M."
234 #define p_m_STR "p.m."
235 #define PM_STR "PM"
236 #define pm_STR "pm"
237
238 /*
239 * AM / PM strings for seq_search.
240 *
241 * These are given in two variants, a long form with periods and a standard
242 * form without.
243 *
244 * The array is laid out such that matches for AM have an even index, and
245 * matches for PM have an odd index. So the boolean value for PM is given by
246 * taking the array index of the match, modulo 2.
247 */
248 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
249 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
250
251 /* ----------
252 * Months in roman-numeral
253 * (Must be in reverse order for seq_search (in FROM_CHAR), because
254 * 'VIII' must have higher precedence than 'V')
255 * ----------
256 */
257 static const char *const rm_months_upper[] =
258 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
259
260 static const char *const rm_months_lower[] =
261 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
262
263 /* ----------
264 * Roman numbers
265 * ----------
266 */
267 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
268 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
269 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
270
271 /* ----------
272 * Ordinal postfixes
273 * ----------
274 */
275 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
276 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
277
278 /* ----------
279 * Flags & Options:
280 * ----------
281 */
282 #define TH_UPPER 1
283 #define TH_LOWER 2
284
285 /* ----------
286 * Number description struct
287 * ----------
288 */
289 typedef struct
290 {
291 int pre, /* (count) numbers before decimal */
292 post, /* (count) numbers after decimal */
293 lsign, /* want locales sign */
294 flag, /* number parameters */
295 pre_lsign_num, /* tmp value for lsign */
296 multi, /* multiplier for 'V' */
297 zero_start, /* position of first zero */
298 zero_end, /* position of last zero */
299 need_locale; /* needs it locale */
300 } NUMDesc;
301
302 /* ----------
303 * Flags for NUMBER version
304 * ----------
305 */
306 #define NUM_F_DECIMAL (1 << 1)
307 #define NUM_F_LDECIMAL (1 << 2)
308 #define NUM_F_ZERO (1 << 3)
309 #define NUM_F_BLANK (1 << 4)
310 #define NUM_F_FILLMODE (1 << 5)
311 #define NUM_F_LSIGN (1 << 6)
312 #define NUM_F_BRACKET (1 << 7)
313 #define NUM_F_MINUS (1 << 8)
314 #define NUM_F_PLUS (1 << 9)
315 #define NUM_F_ROMAN (1 << 10)
316 #define NUM_F_MULTI (1 << 11)
317 #define NUM_F_PLUS_POST (1 << 12)
318 #define NUM_F_MINUS_POST (1 << 13)
319 #define NUM_F_EEEE (1 << 14)
320
321 #define NUM_LSIGN_PRE (-1)
322 #define NUM_LSIGN_POST 1
323 #define NUM_LSIGN_NONE 0
324
325 /* ----------
326 * Tests
327 * ----------
328 */
329 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
330 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
331 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
332 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
333 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
334 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
335 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
336 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
337 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
338 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
339 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
340 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
341
342 /* ----------
343 * Format picture cache
344 *
345 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
346 * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
347 *
348 * For simplicity, the cache entries are fixed-size, so they allow for the
349 * worst case of a FormatNode for each byte in the picture string.
350 *
351 * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
352 * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
353 * we don't waste too much space by palloc'ing them individually. Be sure
354 * to adjust those macros if you add fields to those structs.
355 *
356 * The max number of entries in each cache is DCH_CACHE_ENTRIES
357 * resp. NUM_CACHE_ENTRIES.
358 * ----------
359 */
360 #define DCH_CACHE_OVERHEAD \
361 MAXALIGN(sizeof(bool) + sizeof(int))
362 #define NUM_CACHE_OVERHEAD \
363 MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
364
365 #define DCH_CACHE_SIZE \
366 ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
367 #define NUM_CACHE_SIZE \
368 ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
369
370 #define DCH_CACHE_ENTRIES 20
371 #define NUM_CACHE_ENTRIES 20
372
373 typedef struct
374 {
375 FormatNode format[DCH_CACHE_SIZE + 1];
376 char str[DCH_CACHE_SIZE + 1];
377 bool valid;
378 int age;
379 } DCHCacheEntry;
380
381 typedef struct
382 {
383 FormatNode format[NUM_CACHE_SIZE + 1];
384 char str[NUM_CACHE_SIZE + 1];
385 bool valid;
386 int age;
387 NUMDesc Num;
388 } NUMCacheEntry;
389
390 /* global cache for date/time format pictures */
391 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
392 static int n_DCHCache = 0; /* current number of entries */
393 static int DCHCounter = 0; /* aging-event counter */
394
395 /* global cache for number format pictures */
396 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
397 static int n_NUMCache = 0; /* current number of entries */
398 static int NUMCounter = 0; /* aging-event counter */
399
400 /* ----------
401 * For char->date/time conversion
402 * ----------
403 */
404 typedef struct
405 {
406 FromCharDateMode mode;
407 int hh,
408 pm,
409 mi,
410 ss,
411 ssss,
412 d, /* stored as 1-7, Sunday = 1, 0 means missing */
413 dd,
414 ddd,
415 mm,
416 ms,
417 year,
418 bc,
419 ww,
420 w,
421 cc,
422 j,
423 us,
424 yysz, /* is it YY or YYYY ? */
425 clock, /* 12 or 24 hour clock? */
426 tzsign, /* +1, -1 or 0 if timezone info is absent */
427 tzh,
428 tzm;
429 } TmFromChar;
430
431 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
432
433 /* ----------
434 * Debug
435 * ----------
436 */
437 #ifdef DEBUG_TO_FROM_CHAR
438 #define DEBUG_TMFC(_X) \
439 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
440 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
441 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
442 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
443 (_X)->yysz, (_X)->clock)
444 #define DEBUG_TM(_X) \
445 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
446 (_X)->tm_sec, (_X)->tm_year,\
447 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
448 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
449 #else
450 #define DEBUG_TMFC(_X)
451 #define DEBUG_TM(_X)
452 #endif
453
454 /* ----------
455 * Datetime to char conversion
456 * ----------
457 */
458 typedef struct TmToChar
459 {
460 struct pg_tm tm; /* classic 'tm' struct */
461 fsec_t fsec; /* fractional seconds */
462 const char *tzn; /* timezone */
463 } TmToChar;
464
465 #define tmtcTm(_X) (&(_X)->tm)
466 #define tmtcTzn(_X) ((_X)->tzn)
467 #define tmtcFsec(_X) ((_X)->fsec)
468
469 #define ZERO_tm(_X) \
470 do { \
471 (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
472 (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
473 (_X)->tm_mday = (_X)->tm_mon = 1; \
474 (_X)->tm_zone = NULL; \
475 } while(0)
476
477 #define ZERO_tmtc(_X) \
478 do { \
479 ZERO_tm( tmtcTm(_X) ); \
480 tmtcFsec(_X) = 0; \
481 tmtcTzn(_X) = NULL; \
482 } while(0)
483
484 /*
485 * to_char(time) appears to to_char() as an interval, so this check
486 * is really for interval and time data types.
487 */
488 #define INVALID_FOR_INTERVAL \
489 do { \
490 if (is_interval) \
491 ereport(ERROR, \
492 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
493 errmsg("invalid format specification for an interval value"), \
494 errhint("Intervals are not tied to specific calendar dates."))); \
495 } while(0)
496
497 /*****************************************************************************
498 * KeyWord definitions
499 *****************************************************************************/
500
501 /* ----------
502 * Suffixes (FormatNode.suffix is an OR of these codes)
503 * ----------
504 */
505 #define DCH_S_FM 0x01
506 #define DCH_S_TH 0x02
507 #define DCH_S_th 0x04
508 #define DCH_S_SP 0x08
509 #define DCH_S_TM 0x10
510
511 /* ----------
512 * Suffix tests
513 * ----------
514 */
515 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
516 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
517 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
518 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
519
520 /* Oracle toggles FM behavior, we don't; see docs. */
521 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
522 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
523 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
524
525 /* ----------
526 * Suffixes definition for DATE-TIME TO/FROM CHAR
527 * ----------
528 */
529 #define TM_SUFFIX_LEN 2
530
531 static const KeySuffix DCH_suff[] = {
532 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
533 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
534 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
535 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
536 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
537 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
538 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
539 /* last */
540 {NULL, 0, 0, 0}
541 };
542
543
544 /* ----------
545 * Format-pictures (KeyWord).
546 *
547 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
548 * complicated -to-> easy:
549 *
550 * (example: "DDD","DD","Day","D" )
551 *
552 * (this specific sort needs the algorithm for sequential search for strings,
553 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
554 * or "HH12"? You must first try "HH12", because "HH" is in string, but
555 * it is not good.
556 *
557 * (!)
558 * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
559 * (!)
560 *
561 * For fast search is used the 'int index[]', index is ascii table from position
562 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
563 * position or -1 if char is not used in the KeyWord. Search example for
564 * string "MM":
565 * 1) see in index to index['M' - 32],
566 * 2) take keywords position (enum DCH_MI) from index
567 * 3) run sequential search in keywords[] from this position
568 *
569 * ----------
570 */
571
572 typedef enum
573 {
574 DCH_A_D,
575 DCH_A_M,
576 DCH_AD,
577 DCH_AM,
578 DCH_B_C,
579 DCH_BC,
580 DCH_CC,
581 DCH_DAY,
582 DCH_DDD,
583 DCH_DD,
584 DCH_DY,
585 DCH_Day,
586 DCH_Dy,
587 DCH_D,
588 DCH_FX, /* global suffix */
589 DCH_HH24,
590 DCH_HH12,
591 DCH_HH,
592 DCH_IDDD,
593 DCH_ID,
594 DCH_IW,
595 DCH_IYYY,
596 DCH_IYY,
597 DCH_IY,
598 DCH_I,
599 DCH_J,
600 DCH_MI,
601 DCH_MM,
602 DCH_MONTH,
603 DCH_MON,
604 DCH_MS,
605 DCH_Month,
606 DCH_Mon,
607 DCH_OF,
608 DCH_P_M,
609 DCH_PM,
610 DCH_Q,
611 DCH_RM,
612 DCH_SSSS,
613 DCH_SS,
614 DCH_TZH,
615 DCH_TZM,
616 DCH_TZ,
617 DCH_US,
618 DCH_WW,
619 DCH_W,
620 DCH_Y_YYY,
621 DCH_YYYY,
622 DCH_YYY,
623 DCH_YY,
624 DCH_Y,
625 DCH_a_d,
626 DCH_a_m,
627 DCH_ad,
628 DCH_am,
629 DCH_b_c,
630 DCH_bc,
631 DCH_cc,
632 DCH_day,
633 DCH_ddd,
634 DCH_dd,
635 DCH_dy,
636 DCH_d,
637 DCH_fx,
638 DCH_hh24,
639 DCH_hh12,
640 DCH_hh,
641 DCH_iddd,
642 DCH_id,
643 DCH_iw,
644 DCH_iyyy,
645 DCH_iyy,
646 DCH_iy,
647 DCH_i,
648 DCH_j,
649 DCH_mi,
650 DCH_mm,
651 DCH_month,
652 DCH_mon,
653 DCH_ms,
654 DCH_p_m,
655 DCH_pm,
656 DCH_q,
657 DCH_rm,
658 DCH_ssss,
659 DCH_ss,
660 DCH_tz,
661 DCH_us,
662 DCH_ww,
663 DCH_w,
664 DCH_y_yyy,
665 DCH_yyyy,
666 DCH_yyy,
667 DCH_yy,
668 DCH_y,
669
670 /* last */
671 _DCH_last_
672 } DCH_poz;
673
674 typedef enum
675 {
676 NUM_COMMA,
677 NUM_DEC,
678 NUM_0,
679 NUM_9,
680 NUM_B,
681 NUM_C,
682 NUM_D,
683 NUM_E,
684 NUM_FM,
685 NUM_G,
686 NUM_L,
687 NUM_MI,
688 NUM_PL,
689 NUM_PR,
690 NUM_RN,
691 NUM_SG,
692 NUM_SP,
693 NUM_S,
694 NUM_TH,
695 NUM_V,
696 NUM_b,
697 NUM_c,
698 NUM_d,
699 NUM_e,
700 NUM_fm,
701 NUM_g,
702 NUM_l,
703 NUM_mi,
704 NUM_pl,
705 NUM_pr,
706 NUM_rn,
707 NUM_sg,
708 NUM_sp,
709 NUM_s,
710 NUM_th,
711 NUM_v,
712
713 /* last */
714 _NUM_last_
715 } NUM_poz;
716
717 /* ----------
718 * KeyWords for DATE-TIME version
719 * ----------
720 */
721 static const KeyWord DCH_keywords[] = {
722 /* name, len, id, is_digit, date_mode */
723 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
724 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
725 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
726 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
727 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
728 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
729 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
730 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
731 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
732 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
733 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
734 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
735 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
736 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
737 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* F */
738 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
739 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
740 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
741 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
742 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
743 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
744 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
745 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
746 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
747 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
748 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
749 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
750 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
751 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
752 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
753 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
754 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
755 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
756 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
757 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
758 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
759 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
760 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
761 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
762 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
763 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
764 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
765 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
766 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
767 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
768 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
769 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
770 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
771 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
772 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
773 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
774 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
775 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
776 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
777 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
778 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
779 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
780 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
781 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
782 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
783 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
784 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
785 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
786 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* f */
787 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
788 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
789 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
790 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
791 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
792 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
793 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
794 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
795 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
796 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
797 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
798 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
799 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
800 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
801 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
802 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
803 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
804 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
805 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
806 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
807 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
808 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
809 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
810 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
811 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
812 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
813 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
814 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
815 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
816 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
817 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
818
819 /* last */
820 {NULL, 0, 0, 0, 0}
821 };
822
823 /* ----------
824 * KeyWords for NUMBER version
825 *
826 * The is_digit and date_mode fields are not relevant here.
827 * ----------
828 */
829 static const KeyWord NUM_keywords[] = {
830 /* name, len, id is in Index */
831 {",", 1, NUM_COMMA}, /* , */
832 {".", 1, NUM_DEC}, /* . */
833 {"0", 1, NUM_0}, /* 0 */
834 {"9", 1, NUM_9}, /* 9 */
835 {"B", 1, NUM_B}, /* B */
836 {"C", 1, NUM_C}, /* C */
837 {"D", 1, NUM_D}, /* D */
838 {"EEEE", 4, NUM_E}, /* E */
839 {"FM", 2, NUM_FM}, /* F */
840 {"G", 1, NUM_G}, /* G */
841 {"L", 1, NUM_L}, /* L */
842 {"MI", 2, NUM_MI}, /* M */
843 {"PL", 2, NUM_PL}, /* P */
844 {"PR", 2, NUM_PR},
845 {"RN", 2, NUM_RN}, /* R */
846 {"SG", 2, NUM_SG}, /* S */
847 {"SP", 2, NUM_SP},
848 {"S", 1, NUM_S},
849 {"TH", 2, NUM_TH}, /* T */
850 {"V", 1, NUM_V}, /* V */
851 {"b", 1, NUM_B}, /* b */
852 {"c", 1, NUM_C}, /* c */
853 {"d", 1, NUM_D}, /* d */
854 {"eeee", 4, NUM_E}, /* e */
855 {"fm", 2, NUM_FM}, /* f */
856 {"g", 1, NUM_G}, /* g */
857 {"l", 1, NUM_L}, /* l */
858 {"mi", 2, NUM_MI}, /* m */
859 {"pl", 2, NUM_PL}, /* p */
860 {"pr", 2, NUM_PR},
861 {"rn", 2, NUM_rn}, /* r */
862 {"sg", 2, NUM_SG}, /* s */
863 {"sp", 2, NUM_SP},
864 {"s", 1, NUM_S},
865 {"th", 2, NUM_th}, /* t */
866 {"v", 1, NUM_V}, /* v */
867
868 /* last */
869 {NULL, 0, 0}
870 };
871
872
873 /* ----------
874 * KeyWords index for DATE-TIME version
875 * ----------
876 */
877 static const int DCH_index[KeyWord_INDEX_SIZE] = {
878 /*
879 0 1 2 3 4 5 6 7 8 9
880 */
881 /*---- first 0..31 chars are skipped ----*/
882
883 -1, -1, -1, -1, -1, -1, -1, -1,
884 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
885 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
886 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
887 DCH_FX, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
888 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
889 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
890 DCH_day, -1, DCH_fx, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
891 -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_ssss, DCH_tz, DCH_us, -1, DCH_ww,
892 -1, DCH_y_yyy, -1, -1, -1, -1
893
894 /*---- chars over 126 are skipped ----*/
895 };
896
897 /* ----------
898 * KeyWords index for NUMBER version
899 * ----------
900 */
901 static const int NUM_index[KeyWord_INDEX_SIZE] = {
902 /*
903 0 1 2 3 4 5 6 7 8 9
904 */
905 /*---- first 0..31 chars are skipped ----*/
906
907 -1, -1, -1, -1, -1, -1, -1, -1,
908 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
909 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
910 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
911 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
912 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
913 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
914 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
915 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
916 -1, -1, -1, -1, -1, -1
917
918 /*---- chars over 126 are skipped ----*/
919 };
920
921 /* ----------
922 * Number processor struct
923 * ----------
924 */
925 typedef struct NUMProc
926 {
927 bool is_to_char;
928 NUMDesc *Num; /* number description */
929
930 int sign, /* '-' or '+' */
931 sign_wrote, /* was sign write */
932 num_count, /* number of write digits */
933 num_in, /* is inside number */
934 num_curr, /* current position in number */
935 out_pre_spaces, /* spaces before first digit */
936
937 read_dec, /* to_number - was read dec. point */
938 read_post, /* to_number - number of dec. digit */
939 read_pre; /* to_number - number non-dec. digit */
940
941 char *number, /* string with number */
942 *number_p, /* pointer to current number position */
943 *inout, /* in / out buffer */
944 *inout_p, /* pointer to current inout position */
945 *last_relevant, /* last relevant number after decimal point */
946
947 *L_negative_sign, /* Locale */
948 *L_positive_sign,
949 *decimal,
950 *L_thousands_sep,
951 *L_currency_symbol;
952 } NUMProc;
953
954
955 /* ----------
956 * Functions
957 * ----------
958 */
959 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
960 const int *index);
961 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
962 static bool is_separator_char(const char *str);
963 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
964 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
965 const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
966
967 static void DCH_to_char(FormatNode *node, bool is_interval,
968 TmToChar *in, char *out, Oid collid);
969 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out);
970
971 #ifdef DEBUG_TO_FROM_CHAR
972 static void dump_index(const KeyWord *k, const int *index);
973 static void dump_node(FormatNode *node, int max);
974 #endif
975
976 static const char *get_th(char *num, int type);
977 static char *str_numth(char *dest, char *num, int type);
978 static int adjust_partial_year_to_2020(int year);
979 static int strspace_len(const char *str);
980 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode);
981 static void from_char_set_int(int *dest, const int value, const FormatNode *node);
982 static int from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node);
983 static int from_char_parse_int(int *dest, const char **src, FormatNode *node);
984 static int seq_search(const char *name, const char *const *array, int *len);
985 static int from_char_seq_search(int *dest, const char **src,
986 const char *const *array,
987 FormatNode *node);
988 static void do_to_timestamp(text *date_txt, text *fmt,
989 struct pg_tm *tm, fsec_t *fsec);
990 static char *fill_str(char *str, int c, int max);
991 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
992 static char *int_to_roman(int number);
993 static void NUM_prepare_locale(NUMProc *Np);
994 static char *get_last_relevant_decnum(char *num);
995 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
996 static void NUM_numpart_to_char(NUMProc *Np, int id);
997 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
998 char *number, int input_len, int to_char_out_pre_spaces,
999 int sign, bool is_to_char, Oid collid);
1000 static DCHCacheEntry *DCH_cache_getnew(const char *str);
1001 static DCHCacheEntry *DCH_cache_search(const char *str);
1002 static DCHCacheEntry *DCH_cache_fetch(const char *str);
1003 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1004 static NUMCacheEntry *NUM_cache_search(const char *str);
1005 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1006
1007
1008 /* ----------
1009 * Fast sequential search, use index for data selection which
1010 * go to seq. cycle (it is very fast for unwanted strings)
1011 * (can't be used binary search in format parsing)
1012 * ----------
1013 */
1014 static const KeyWord *
index_seq_search(const char * str,const KeyWord * kw,const int * index)1015 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1016 {
1017 int poz;
1018
1019 if (!KeyWord_INDEX_FILTER(*str))
1020 return NULL;
1021
1022 if ((poz = *(index + (*str - ' '))) > -1)
1023 {
1024 const KeyWord *k = kw + poz;
1025
1026 do
1027 {
1028 if (strncmp(str, k->name, k->len) == 0)
1029 return k;
1030 k++;
1031 if (!k->name)
1032 return NULL;
1033 } while (*str == *k->name);
1034 }
1035 return NULL;
1036 }
1037
1038 static const KeySuffix *
suff_search(const char * str,const KeySuffix * suf,int type)1039 suff_search(const char *str, const KeySuffix *suf, int type)
1040 {
1041 const KeySuffix *s;
1042
1043 for (s = suf; s->name != NULL; s++)
1044 {
1045 if (s->type != type)
1046 continue;
1047
1048 if (strncmp(str, s->name, s->len) == 0)
1049 return s;
1050 }
1051 return NULL;
1052 }
1053
1054 static bool
is_separator_char(const char * str)1055 is_separator_char(const char *str)
1056 {
1057 /* ASCII printable character, but not letter or digit */
1058 return (*str > 0x20 && *str < 0x7F &&
1059 !(*str >= 'A' && *str <= 'Z') &&
1060 !(*str >= 'a' && *str <= 'z') &&
1061 !(*str >= '0' && *str <= '9'));
1062 }
1063
1064 /* ----------
1065 * Prepare NUMDesc (number description struct) via FormatNode struct
1066 * ----------
1067 */
1068 static void
NUMDesc_prepare(NUMDesc * num,FormatNode * n)1069 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1070 {
1071 if (n->type != NODE_TYPE_ACTION)
1072 return;
1073
1074 if (IS_EEEE(num) && n->key->id != NUM_E)
1075 ereport(ERROR,
1076 (errcode(ERRCODE_SYNTAX_ERROR),
1077 errmsg("\"EEEE\" must be the last pattern used")));
1078
1079 switch (n->key->id)
1080 {
1081 case NUM_9:
1082 if (IS_BRACKET(num))
1083 ereport(ERROR,
1084 (errcode(ERRCODE_SYNTAX_ERROR),
1085 errmsg("\"9\" must be ahead of \"PR\"")));
1086 if (IS_MULTI(num))
1087 {
1088 ++num->multi;
1089 break;
1090 }
1091 if (IS_DECIMAL(num))
1092 ++num->post;
1093 else
1094 ++num->pre;
1095 break;
1096
1097 case NUM_0:
1098 if (IS_BRACKET(num))
1099 ereport(ERROR,
1100 (errcode(ERRCODE_SYNTAX_ERROR),
1101 errmsg("\"0\" must be ahead of \"PR\"")));
1102 if (!IS_ZERO(num) && !IS_DECIMAL(num))
1103 {
1104 num->flag |= NUM_F_ZERO;
1105 num->zero_start = num->pre + 1;
1106 }
1107 if (!IS_DECIMAL(num))
1108 ++num->pre;
1109 else
1110 ++num->post;
1111
1112 num->zero_end = num->pre + num->post;
1113 break;
1114
1115 case NUM_B:
1116 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1117 num->flag |= NUM_F_BLANK;
1118 break;
1119
1120 case NUM_D:
1121 num->flag |= NUM_F_LDECIMAL;
1122 num->need_locale = true;
1123 /* FALLTHROUGH */
1124 case NUM_DEC:
1125 if (IS_DECIMAL(num))
1126 ereport(ERROR,
1127 (errcode(ERRCODE_SYNTAX_ERROR),
1128 errmsg("multiple decimal points")));
1129 if (IS_MULTI(num))
1130 ereport(ERROR,
1131 (errcode(ERRCODE_SYNTAX_ERROR),
1132 errmsg("cannot use \"V\" and decimal point together")));
1133 num->flag |= NUM_F_DECIMAL;
1134 break;
1135
1136 case NUM_FM:
1137 num->flag |= NUM_F_FILLMODE;
1138 break;
1139
1140 case NUM_S:
1141 if (IS_LSIGN(num))
1142 ereport(ERROR,
1143 (errcode(ERRCODE_SYNTAX_ERROR),
1144 errmsg("cannot use \"S\" twice")));
1145 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1146 ereport(ERROR,
1147 (errcode(ERRCODE_SYNTAX_ERROR),
1148 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1149 if (!IS_DECIMAL(num))
1150 {
1151 num->lsign = NUM_LSIGN_PRE;
1152 num->pre_lsign_num = num->pre;
1153 num->need_locale = true;
1154 num->flag |= NUM_F_LSIGN;
1155 }
1156 else if (num->lsign == NUM_LSIGN_NONE)
1157 {
1158 num->lsign = NUM_LSIGN_POST;
1159 num->need_locale = true;
1160 num->flag |= NUM_F_LSIGN;
1161 }
1162 break;
1163
1164 case NUM_MI:
1165 if (IS_LSIGN(num))
1166 ereport(ERROR,
1167 (errcode(ERRCODE_SYNTAX_ERROR),
1168 errmsg("cannot use \"S\" and \"MI\" together")));
1169 num->flag |= NUM_F_MINUS;
1170 if (IS_DECIMAL(num))
1171 num->flag |= NUM_F_MINUS_POST;
1172 break;
1173
1174 case NUM_PL:
1175 if (IS_LSIGN(num))
1176 ereport(ERROR,
1177 (errcode(ERRCODE_SYNTAX_ERROR),
1178 errmsg("cannot use \"S\" and \"PL\" together")));
1179 num->flag |= NUM_F_PLUS;
1180 if (IS_DECIMAL(num))
1181 num->flag |= NUM_F_PLUS_POST;
1182 break;
1183
1184 case NUM_SG:
1185 if (IS_LSIGN(num))
1186 ereport(ERROR,
1187 (errcode(ERRCODE_SYNTAX_ERROR),
1188 errmsg("cannot use \"S\" and \"SG\" together")));
1189 num->flag |= NUM_F_MINUS;
1190 num->flag |= NUM_F_PLUS;
1191 break;
1192
1193 case NUM_PR:
1194 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1195 ereport(ERROR,
1196 (errcode(ERRCODE_SYNTAX_ERROR),
1197 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1198 num->flag |= NUM_F_BRACKET;
1199 break;
1200
1201 case NUM_rn:
1202 case NUM_RN:
1203 num->flag |= NUM_F_ROMAN;
1204 break;
1205
1206 case NUM_L:
1207 case NUM_G:
1208 num->need_locale = true;
1209 break;
1210
1211 case NUM_V:
1212 if (IS_DECIMAL(num))
1213 ereport(ERROR,
1214 (errcode(ERRCODE_SYNTAX_ERROR),
1215 errmsg("cannot use \"V\" and decimal point together")));
1216 num->flag |= NUM_F_MULTI;
1217 break;
1218
1219 case NUM_E:
1220 if (IS_EEEE(num))
1221 ereport(ERROR,
1222 (errcode(ERRCODE_SYNTAX_ERROR),
1223 errmsg("cannot use \"EEEE\" twice")));
1224 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1225 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1226 IS_ROMAN(num) || IS_MULTI(num))
1227 ereport(ERROR,
1228 (errcode(ERRCODE_SYNTAX_ERROR),
1229 errmsg("\"EEEE\" is incompatible with other formats"),
1230 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1231 num->flag |= NUM_F_EEEE;
1232 break;
1233 }
1234 }
1235
1236 /* ----------
1237 * Format parser, search small keywords and keyword's suffixes, and make
1238 * format-node tree.
1239 *
1240 * for DATE-TIME & NUMBER version
1241 * ----------
1242 */
1243 static void
parse_format(FormatNode * node,const char * str,const KeyWord * kw,const KeySuffix * suf,const int * index,int ver,NUMDesc * Num)1244 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1245 const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
1246 {
1247 FormatNode *n;
1248
1249 #ifdef DEBUG_TO_FROM_CHAR
1250 elog(DEBUG_elog_output, "to_char/number(): run parser");
1251 #endif
1252
1253 n = node;
1254
1255 while (*str)
1256 {
1257 int suffix = 0;
1258 const KeySuffix *s;
1259
1260 /*
1261 * Prefix
1262 */
1263 if (ver == DCH_TYPE &&
1264 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1265 {
1266 suffix |= s->id;
1267 if (s->len)
1268 str += s->len;
1269 }
1270
1271 /*
1272 * Keyword
1273 */
1274 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1275 {
1276 n->type = NODE_TYPE_ACTION;
1277 n->suffix = suffix;
1278 if (n->key->len)
1279 str += n->key->len;
1280
1281 /*
1282 * NUM version: Prepare global NUMDesc struct
1283 */
1284 if (ver == NUM_TYPE)
1285 NUMDesc_prepare(Num, n);
1286
1287 /*
1288 * Postfix
1289 */
1290 if (ver == DCH_TYPE && *str &&
1291 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1292 {
1293 n->suffix |= s->id;
1294 if (s->len)
1295 str += s->len;
1296 }
1297
1298 n++;
1299 }
1300 else if (*str)
1301 {
1302 int chlen;
1303
1304 /*
1305 * Process double-quoted literal string, if any
1306 */
1307 if (*str == '"')
1308 {
1309 str++;
1310 while (*str)
1311 {
1312 if (*str == '"')
1313 {
1314 str++;
1315 break;
1316 }
1317 /* backslash quotes the next character, if any */
1318 if (*str == '\\' && *(str + 1))
1319 str++;
1320 chlen = pg_mblen(str);
1321 n->type = NODE_TYPE_CHAR;
1322 memcpy(n->character, str, chlen);
1323 n->character[chlen] = '\0';
1324 n->key = NULL;
1325 n->suffix = 0;
1326 n++;
1327 str += chlen;
1328 }
1329 }
1330 else
1331 {
1332 /*
1333 * Outside double-quoted strings, backslash is only special if
1334 * it immediately precedes a double quote.
1335 */
1336 if (*str == '\\' && *(str + 1) == '"')
1337 str++;
1338 chlen = pg_mblen(str);
1339
1340 if (ver == DCH_TYPE && is_separator_char(str))
1341 n->type = NODE_TYPE_SEPARATOR;
1342 else if (isspace((unsigned char) *str))
1343 n->type = NODE_TYPE_SPACE;
1344 else
1345 n->type = NODE_TYPE_CHAR;
1346
1347 memcpy(n->character, str, chlen);
1348 n->character[chlen] = '\0';
1349 n->key = NULL;
1350 n->suffix = 0;
1351 n++;
1352 str += chlen;
1353 }
1354 }
1355 }
1356
1357 n->type = NODE_TYPE_END;
1358 n->suffix = 0;
1359 }
1360
1361 /* ----------
1362 * DEBUG: Dump the FormatNode Tree (debug)
1363 * ----------
1364 */
1365 #ifdef DEBUG_TO_FROM_CHAR
1366
1367 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1368 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1369
1370 static void
dump_node(FormatNode * node,int max)1371 dump_node(FormatNode *node, int max)
1372 {
1373 FormatNode *n;
1374 int a;
1375
1376 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1377
1378 for (a = 0, n = node; a <= max; n++, a++)
1379 {
1380 if (n->type == NODE_TYPE_ACTION)
1381 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1382 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1383 else if (n->type == NODE_TYPE_CHAR)
1384 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1385 a, n->character);
1386 else if (n->type == NODE_TYPE_END)
1387 {
1388 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1389 return;
1390 }
1391 else
1392 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1393 }
1394 }
1395 #endif /* DEBUG */
1396
1397 /*****************************************************************************
1398 * Private utils
1399 *****************************************************************************/
1400
1401 /* ----------
1402 * Return ST/ND/RD/TH for simple (1..9) numbers
1403 * type --> 0 upper, 1 lower
1404 * ----------
1405 */
1406 static const char *
get_th(char * num,int type)1407 get_th(char *num, int type)
1408 {
1409 int len = strlen(num),
1410 last,
1411 seclast;
1412
1413 last = *(num + (len - 1));
1414 if (!isdigit((unsigned char) last))
1415 ereport(ERROR,
1416 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1417 errmsg("\"%s\" is not a number", num)));
1418
1419 /*
1420 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1421 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1422 */
1423 if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1424 last = 0;
1425
1426 switch (last)
1427 {
1428 case '1':
1429 if (type == TH_UPPER)
1430 return numTH[0];
1431 return numth[0];
1432 case '2':
1433 if (type == TH_UPPER)
1434 return numTH[1];
1435 return numth[1];
1436 case '3':
1437 if (type == TH_UPPER)
1438 return numTH[2];
1439 return numth[2];
1440 default:
1441 if (type == TH_UPPER)
1442 return numTH[3];
1443 return numth[3];
1444 }
1445 }
1446
1447 /* ----------
1448 * Convert string-number to ordinal string-number
1449 * type --> 0 upper, 1 lower
1450 * ----------
1451 */
1452 static char *
str_numth(char * dest,char * num,int type)1453 str_numth(char *dest, char *num, int type)
1454 {
1455 if (dest != num)
1456 strcpy(dest, num);
1457 strcat(dest, get_th(num, type));
1458 return dest;
1459 }
1460
1461 /*****************************************************************************
1462 * upper/lower/initcap functions
1463 *****************************************************************************/
1464
1465 #ifdef USE_ICU
1466
1467 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1468 const UChar *src, int32_t srcLength,
1469 const char *locale,
1470 UErrorCode *pErrorCode);
1471
1472 static int32_t
icu_convert_case(ICU_Convert_Func func,pg_locale_t mylocale,UChar ** buff_dest,UChar * buff_source,int32_t len_source)1473 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1474 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1475 {
1476 UErrorCode status;
1477 int32_t len_dest;
1478
1479 len_dest = len_source; /* try first with same length */
1480 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1481 status = U_ZERO_ERROR;
1482 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1483 mylocale->info.icu.locale, &status);
1484 if (status == U_BUFFER_OVERFLOW_ERROR)
1485 {
1486 /* try again with adjusted length */
1487 pfree(*buff_dest);
1488 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1489 status = U_ZERO_ERROR;
1490 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1491 mylocale->info.icu.locale, &status);
1492 }
1493 if (U_FAILURE(status))
1494 ereport(ERROR,
1495 (errmsg("case conversion failed: %s", u_errorName(status))));
1496 return len_dest;
1497 }
1498
1499 static int32_t
u_strToTitle_default_BI(UChar * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,const char * locale,UErrorCode * pErrorCode)1500 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1501 const UChar *src, int32_t srcLength,
1502 const char *locale,
1503 UErrorCode *pErrorCode)
1504 {
1505 return u_strToTitle(dest, destCapacity, src, srcLength,
1506 NULL, locale, pErrorCode);
1507 }
1508
1509 #endif /* USE_ICU */
1510
1511 /*
1512 * If the system provides the needed functions for wide-character manipulation
1513 * (which are all standardized by C99), then we implement upper/lower/initcap
1514 * using wide-character functions, if necessary. Otherwise we use the
1515 * traditional <ctype.h> functions, which of course will not work as desired
1516 * in multibyte character sets. Note that in either case we are effectively
1517 * assuming that the database character encoding matches the encoding implied
1518 * by LC_CTYPE.
1519 *
1520 * If the system provides locale_t and associated functions (which are
1521 * standardized by Open Group's XBD), we can support collations that are
1522 * neither default nor C. The code is written to handle both combinations
1523 * of have-wide-characters and have-locale_t, though it's rather unlikely
1524 * a platform would have the latter without the former.
1525 */
1526
1527 /*
1528 * collation-aware, wide-character-aware lower function
1529 *
1530 * We pass the number of bytes so we can pass varlena and char*
1531 * to this function. The result is a palloc'd, null-terminated string.
1532 */
1533 char *
str_tolower(const char * buff,size_t nbytes,Oid collid)1534 str_tolower(const char *buff, size_t nbytes, Oid collid)
1535 {
1536 char *result;
1537
1538 if (!buff)
1539 return NULL;
1540
1541 /* C/POSIX collations use this path regardless of database encoding */
1542 if (lc_ctype_is_c(collid))
1543 {
1544 result = asc_tolower(buff, nbytes);
1545 }
1546 else
1547 {
1548 pg_locale_t mylocale = 0;
1549
1550 if (collid != DEFAULT_COLLATION_OID)
1551 {
1552 if (!OidIsValid(collid))
1553 {
1554 /*
1555 * This typically means that the parser could not resolve a
1556 * conflict of implicit collations, so report it that way.
1557 */
1558 ereport(ERROR,
1559 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1560 errmsg("could not determine which collation to use for %s function",
1561 "lower()"),
1562 errhint("Use the COLLATE clause to set the collation explicitly.")));
1563 }
1564 mylocale = pg_newlocale_from_collation(collid);
1565 }
1566
1567 #ifdef USE_ICU
1568 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1569 {
1570 int32_t len_uchar;
1571 int32_t len_conv;
1572 UChar *buff_uchar;
1573 UChar *buff_conv;
1574
1575 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1576 len_conv = icu_convert_case(u_strToLower, mylocale,
1577 &buff_conv, buff_uchar, len_uchar);
1578 icu_from_uchar(&result, buff_conv, len_conv);
1579 pfree(buff_uchar);
1580 pfree(buff_conv);
1581 }
1582 else
1583 #endif
1584 {
1585 if (pg_database_encoding_max_length() > 1)
1586 {
1587 wchar_t *workspace;
1588 size_t curr_char;
1589 size_t result_size;
1590
1591 /* Overflow paranoia */
1592 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1593 ereport(ERROR,
1594 (errcode(ERRCODE_OUT_OF_MEMORY),
1595 errmsg("out of memory")));
1596
1597 /* Output workspace cannot have more codes than input bytes */
1598 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1599
1600 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1601
1602 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1603 {
1604 #ifdef HAVE_LOCALE_T
1605 if (mylocale)
1606 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1607 else
1608 #endif
1609 workspace[curr_char] = towlower(workspace[curr_char]);
1610 }
1611
1612 /*
1613 * Make result large enough; case change might change number
1614 * of bytes
1615 */
1616 result_size = curr_char * pg_database_encoding_max_length() + 1;
1617 result = palloc(result_size);
1618
1619 wchar2char(result, workspace, result_size, mylocale);
1620 pfree(workspace);
1621 }
1622 else
1623 {
1624 char *p;
1625
1626 result = pnstrdup(buff, nbytes);
1627
1628 /*
1629 * Note: we assume that tolower_l() will not be so broken as
1630 * to need an isupper_l() guard test. When using the default
1631 * collation, we apply the traditional Postgres behavior that
1632 * forces ASCII-style treatment of I/i, but in non-default
1633 * collations you get exactly what the collation says.
1634 */
1635 for (p = result; *p; p++)
1636 {
1637 #ifdef HAVE_LOCALE_T
1638 if (mylocale)
1639 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1640 else
1641 #endif
1642 *p = pg_tolower((unsigned char) *p);
1643 }
1644 }
1645 }
1646 }
1647
1648 return result;
1649 }
1650
1651 /*
1652 * collation-aware, wide-character-aware upper function
1653 *
1654 * We pass the number of bytes so we can pass varlena and char*
1655 * to this function. The result is a palloc'd, null-terminated string.
1656 */
1657 char *
str_toupper(const char * buff,size_t nbytes,Oid collid)1658 str_toupper(const char *buff, size_t nbytes, Oid collid)
1659 {
1660 char *result;
1661
1662 if (!buff)
1663 return NULL;
1664
1665 /* C/POSIX collations use this path regardless of database encoding */
1666 if (lc_ctype_is_c(collid))
1667 {
1668 result = asc_toupper(buff, nbytes);
1669 }
1670 else
1671 {
1672 pg_locale_t mylocale = 0;
1673
1674 if (collid != DEFAULT_COLLATION_OID)
1675 {
1676 if (!OidIsValid(collid))
1677 {
1678 /*
1679 * This typically means that the parser could not resolve a
1680 * conflict of implicit collations, so report it that way.
1681 */
1682 ereport(ERROR,
1683 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1684 errmsg("could not determine which collation to use for %s function",
1685 "upper()"),
1686 errhint("Use the COLLATE clause to set the collation explicitly.")));
1687 }
1688 mylocale = pg_newlocale_from_collation(collid);
1689 }
1690
1691 #ifdef USE_ICU
1692 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1693 {
1694 int32_t len_uchar,
1695 len_conv;
1696 UChar *buff_uchar;
1697 UChar *buff_conv;
1698
1699 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1700 len_conv = icu_convert_case(u_strToUpper, mylocale,
1701 &buff_conv, buff_uchar, len_uchar);
1702 icu_from_uchar(&result, buff_conv, len_conv);
1703 pfree(buff_uchar);
1704 pfree(buff_conv);
1705 }
1706 else
1707 #endif
1708 {
1709 if (pg_database_encoding_max_length() > 1)
1710 {
1711 wchar_t *workspace;
1712 size_t curr_char;
1713 size_t result_size;
1714
1715 /* Overflow paranoia */
1716 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1717 ereport(ERROR,
1718 (errcode(ERRCODE_OUT_OF_MEMORY),
1719 errmsg("out of memory")));
1720
1721 /* Output workspace cannot have more codes than input bytes */
1722 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1723
1724 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1725
1726 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1727 {
1728 #ifdef HAVE_LOCALE_T
1729 if (mylocale)
1730 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1731 else
1732 #endif
1733 workspace[curr_char] = towupper(workspace[curr_char]);
1734 }
1735
1736 /*
1737 * Make result large enough; case change might change number
1738 * of bytes
1739 */
1740 result_size = curr_char * pg_database_encoding_max_length() + 1;
1741 result = palloc(result_size);
1742
1743 wchar2char(result, workspace, result_size, mylocale);
1744 pfree(workspace);
1745 }
1746 else
1747 {
1748 char *p;
1749
1750 result = pnstrdup(buff, nbytes);
1751
1752 /*
1753 * Note: we assume that toupper_l() will not be so broken as
1754 * to need an islower_l() guard test. When using the default
1755 * collation, we apply the traditional Postgres behavior that
1756 * forces ASCII-style treatment of I/i, but in non-default
1757 * collations you get exactly what the collation says.
1758 */
1759 for (p = result; *p; p++)
1760 {
1761 #ifdef HAVE_LOCALE_T
1762 if (mylocale)
1763 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1764 else
1765 #endif
1766 *p = pg_toupper((unsigned char) *p);
1767 }
1768 }
1769 }
1770 }
1771
1772 return result;
1773 }
1774
1775 /*
1776 * collation-aware, wide-character-aware initcap function
1777 *
1778 * We pass the number of bytes so we can pass varlena and char*
1779 * to this function. The result is a palloc'd, null-terminated string.
1780 */
1781 char *
str_initcap(const char * buff,size_t nbytes,Oid collid)1782 str_initcap(const char *buff, size_t nbytes, Oid collid)
1783 {
1784 char *result;
1785 int wasalnum = false;
1786
1787 if (!buff)
1788 return NULL;
1789
1790 /* C/POSIX collations use this path regardless of database encoding */
1791 if (lc_ctype_is_c(collid))
1792 {
1793 result = asc_initcap(buff, nbytes);
1794 }
1795 else
1796 {
1797 pg_locale_t mylocale = 0;
1798
1799 if (collid != DEFAULT_COLLATION_OID)
1800 {
1801 if (!OidIsValid(collid))
1802 {
1803 /*
1804 * This typically means that the parser could not resolve a
1805 * conflict of implicit collations, so report it that way.
1806 */
1807 ereport(ERROR,
1808 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1809 errmsg("could not determine which collation to use for %s function",
1810 "initcap()"),
1811 errhint("Use the COLLATE clause to set the collation explicitly.")));
1812 }
1813 mylocale = pg_newlocale_from_collation(collid);
1814 }
1815
1816 #ifdef USE_ICU
1817 if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1818 {
1819 int32_t len_uchar,
1820 len_conv;
1821 UChar *buff_uchar;
1822 UChar *buff_conv;
1823
1824 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1825 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1826 &buff_conv, buff_uchar, len_uchar);
1827 icu_from_uchar(&result, buff_conv, len_conv);
1828 pfree(buff_uchar);
1829 pfree(buff_conv);
1830 }
1831 else
1832 #endif
1833 {
1834 if (pg_database_encoding_max_length() > 1)
1835 {
1836 wchar_t *workspace;
1837 size_t curr_char;
1838 size_t result_size;
1839
1840 /* Overflow paranoia */
1841 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1842 ereport(ERROR,
1843 (errcode(ERRCODE_OUT_OF_MEMORY),
1844 errmsg("out of memory")));
1845
1846 /* Output workspace cannot have more codes than input bytes */
1847 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1848
1849 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1850
1851 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1852 {
1853 #ifdef HAVE_LOCALE_T
1854 if (mylocale)
1855 {
1856 if (wasalnum)
1857 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1858 else
1859 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1860 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1861 }
1862 else
1863 #endif
1864 {
1865 if (wasalnum)
1866 workspace[curr_char] = towlower(workspace[curr_char]);
1867 else
1868 workspace[curr_char] = towupper(workspace[curr_char]);
1869 wasalnum = iswalnum(workspace[curr_char]);
1870 }
1871 }
1872
1873 /*
1874 * Make result large enough; case change might change number
1875 * of bytes
1876 */
1877 result_size = curr_char * pg_database_encoding_max_length() + 1;
1878 result = palloc(result_size);
1879
1880 wchar2char(result, workspace, result_size, mylocale);
1881 pfree(workspace);
1882 }
1883 else
1884 {
1885 char *p;
1886
1887 result = pnstrdup(buff, nbytes);
1888
1889 /*
1890 * Note: we assume that toupper_l()/tolower_l() will not be so
1891 * broken as to need guard tests. When using the default
1892 * collation, we apply the traditional Postgres behavior that
1893 * forces ASCII-style treatment of I/i, but in non-default
1894 * collations you get exactly what the collation says.
1895 */
1896 for (p = result; *p; p++)
1897 {
1898 #ifdef HAVE_LOCALE_T
1899 if (mylocale)
1900 {
1901 if (wasalnum)
1902 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1903 else
1904 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1905 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
1906 }
1907 else
1908 #endif
1909 {
1910 if (wasalnum)
1911 *p = pg_tolower((unsigned char) *p);
1912 else
1913 *p = pg_toupper((unsigned char) *p);
1914 wasalnum = isalnum((unsigned char) *p);
1915 }
1916 }
1917 }
1918 }
1919 }
1920
1921 return result;
1922 }
1923
1924 /*
1925 * ASCII-only lower function
1926 *
1927 * We pass the number of bytes so we can pass varlena and char*
1928 * to this function. The result is a palloc'd, null-terminated string.
1929 */
1930 char *
asc_tolower(const char * buff,size_t nbytes)1931 asc_tolower(const char *buff, size_t nbytes)
1932 {
1933 char *result;
1934 char *p;
1935
1936 if (!buff)
1937 return NULL;
1938
1939 result = pnstrdup(buff, nbytes);
1940
1941 for (p = result; *p; p++)
1942 *p = pg_ascii_tolower((unsigned char) *p);
1943
1944 return result;
1945 }
1946
1947 /*
1948 * ASCII-only upper function
1949 *
1950 * We pass the number of bytes so we can pass varlena and char*
1951 * to this function. The result is a palloc'd, null-terminated string.
1952 */
1953 char *
asc_toupper(const char * buff,size_t nbytes)1954 asc_toupper(const char *buff, size_t nbytes)
1955 {
1956 char *result;
1957 char *p;
1958
1959 if (!buff)
1960 return NULL;
1961
1962 result = pnstrdup(buff, nbytes);
1963
1964 for (p = result; *p; p++)
1965 *p = pg_ascii_toupper((unsigned char) *p);
1966
1967 return result;
1968 }
1969
1970 /*
1971 * ASCII-only initcap function
1972 *
1973 * We pass the number of bytes so we can pass varlena and char*
1974 * to this function. The result is a palloc'd, null-terminated string.
1975 */
1976 char *
asc_initcap(const char * buff,size_t nbytes)1977 asc_initcap(const char *buff, size_t nbytes)
1978 {
1979 char *result;
1980 char *p;
1981 int wasalnum = false;
1982
1983 if (!buff)
1984 return NULL;
1985
1986 result = pnstrdup(buff, nbytes);
1987
1988 for (p = result; *p; p++)
1989 {
1990 char c;
1991
1992 if (wasalnum)
1993 *p = c = pg_ascii_tolower((unsigned char) *p);
1994 else
1995 *p = c = pg_ascii_toupper((unsigned char) *p);
1996 /* we don't trust isalnum() here */
1997 wasalnum = ((c >= 'A' && c <= 'Z') ||
1998 (c >= 'a' && c <= 'z') ||
1999 (c >= '0' && c <= '9'));
2000 }
2001
2002 return result;
2003 }
2004
2005 /* convenience routines for when the input is null-terminated */
2006
2007 static char *
str_tolower_z(const char * buff,Oid collid)2008 str_tolower_z(const char *buff, Oid collid)
2009 {
2010 return str_tolower(buff, strlen(buff), collid);
2011 }
2012
2013 static char *
str_toupper_z(const char * buff,Oid collid)2014 str_toupper_z(const char *buff, Oid collid)
2015 {
2016 return str_toupper(buff, strlen(buff), collid);
2017 }
2018
2019 static char *
str_initcap_z(const char * buff,Oid collid)2020 str_initcap_z(const char *buff, Oid collid)
2021 {
2022 return str_initcap(buff, strlen(buff), collid);
2023 }
2024
2025 static char *
asc_tolower_z(const char * buff)2026 asc_tolower_z(const char *buff)
2027 {
2028 return asc_tolower(buff, strlen(buff));
2029 }
2030
2031 static char *
asc_toupper_z(const char * buff)2032 asc_toupper_z(const char *buff)
2033 {
2034 return asc_toupper(buff, strlen(buff));
2035 }
2036
2037 /* asc_initcap_z is not currently needed */
2038
2039
2040 /* ----------
2041 * Skip TM / th in FROM_CHAR
2042 *
2043 * If S_THth is on, skip two chars, assuming there are two available
2044 * ----------
2045 */
2046 #define SKIP_THth(ptr, _suf) \
2047 do { \
2048 if (S_THth(_suf)) \
2049 { \
2050 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2051 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2052 } \
2053 } while (0)
2054
2055
2056 #ifdef DEBUG_TO_FROM_CHAR
2057 /* -----------
2058 * DEBUG: Call for debug and for index checking; (Show ASCII char
2059 * and defined keyword for each used position
2060 * ----------
2061 */
2062 static void
dump_index(const KeyWord * k,const int * index)2063 dump_index(const KeyWord *k, const int *index)
2064 {
2065 int i,
2066 count = 0,
2067 free_i = 0;
2068
2069 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2070
2071 for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2072 {
2073 if (index[i] != -1)
2074 {
2075 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2076 count++;
2077 }
2078 else
2079 {
2080 free_i++;
2081 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2082 }
2083 }
2084 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2085 count, free_i);
2086 }
2087 #endif /* DEBUG */
2088
2089 /* ----------
2090 * Return true if next format picture is not digit value
2091 * ----------
2092 */
2093 static bool
is_next_separator(FormatNode * n)2094 is_next_separator(FormatNode *n)
2095 {
2096 if (n->type == NODE_TYPE_END)
2097 return false;
2098
2099 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2100 return true;
2101
2102 /*
2103 * Next node
2104 */
2105 n++;
2106
2107 /* end of format string is treated like a non-digit separator */
2108 if (n->type == NODE_TYPE_END)
2109 return true;
2110
2111 if (n->type == NODE_TYPE_ACTION)
2112 {
2113 if (n->key->is_digit)
2114 return false;
2115
2116 return true;
2117 }
2118 else if (n->character[1] == '\0' &&
2119 isdigit((unsigned char) n->character[0]))
2120 return false;
2121
2122 return true; /* some non-digit input (separator) */
2123 }
2124
2125
2126 static int
adjust_partial_year_to_2020(int year)2127 adjust_partial_year_to_2020(int year)
2128 {
2129 /*
2130 * Adjust all dates toward 2020; this is effectively what happens when we
2131 * assume '70' is 1970 and '69' is 2069.
2132 */
2133 /* Force 0-69 into the 2000's */
2134 if (year < 70)
2135 return year + 2000;
2136 /* Force 70-99 into the 1900's */
2137 else if (year < 100)
2138 return year + 1900;
2139 /* Force 100-519 into the 2000's */
2140 else if (year < 520)
2141 return year + 2000;
2142 /* Force 520-999 into the 1000's */
2143 else if (year < 1000)
2144 return year + 1000;
2145 else
2146 return year;
2147 }
2148
2149
2150 static int
strspace_len(const char * str)2151 strspace_len(const char *str)
2152 {
2153 int len = 0;
2154
2155 while (*str && isspace((unsigned char) *str))
2156 {
2157 str++;
2158 len++;
2159 }
2160 return len;
2161 }
2162
2163 /*
2164 * Set the date mode of a from-char conversion.
2165 *
2166 * Puke if the date mode has already been set, and the caller attempts to set
2167 * it to a conflicting mode.
2168 */
2169 static void
from_char_set_mode(TmFromChar * tmfc,const FromCharDateMode mode)2170 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode)
2171 {
2172 if (mode != FROM_CHAR_DATE_NONE)
2173 {
2174 if (tmfc->mode == FROM_CHAR_DATE_NONE)
2175 tmfc->mode = mode;
2176 else if (tmfc->mode != mode)
2177 ereport(ERROR,
2178 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2179 errmsg("invalid combination of date conventions"),
2180 errhint("Do not mix Gregorian and ISO week date "
2181 "conventions in a formatting template.")));
2182 }
2183 }
2184
2185 /*
2186 * Set the integer pointed to by 'dest' to the given value.
2187 *
2188 * Puke if the destination integer has previously been set to some other
2189 * non-zero value.
2190 */
2191 static void
from_char_set_int(int * dest,const int value,const FormatNode * node)2192 from_char_set_int(int *dest, const int value, const FormatNode *node)
2193 {
2194 if (*dest != 0 && *dest != value)
2195 ereport(ERROR,
2196 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2197 errmsg("conflicting values for \"%s\" field in formatting string",
2198 node->key->name),
2199 errdetail("This value contradicts a previous setting for "
2200 "the same field type.")));
2201 *dest = value;
2202 }
2203
2204 /*
2205 * Read a single integer from the source string, into the int pointed to by
2206 * 'dest'. If 'dest' is NULL, the result is discarded.
2207 *
2208 * In fixed-width mode (the node does not have the FM suffix), consume at most
2209 * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2210 *
2211 * We use strtol() to recover the integer value from the source string, in
2212 * accordance with the given FormatNode.
2213 *
2214 * If the conversion completes successfully, src will have been advanced to
2215 * point at the character immediately following the last character used in the
2216 * conversion.
2217 *
2218 * Return the number of characters consumed.
2219 *
2220 * Note that from_char_parse_int() provides a more convenient wrapper where
2221 * the length of the field is the same as the length of the format keyword (as
2222 * with DD and MI).
2223 */
2224 static int
from_char_parse_int_len(int * dest,const char ** src,const int len,FormatNode * node)2225 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node)
2226 {
2227 long result;
2228 char copy[DCH_MAX_ITEM_SIZ + 1];
2229 const char *init = *src;
2230 int used;
2231
2232 /*
2233 * Skip any whitespace before parsing the integer.
2234 */
2235 *src += strspace_len(*src);
2236
2237 Assert(len <= DCH_MAX_ITEM_SIZ);
2238 used = (int) strlcpy(copy, *src, len + 1);
2239
2240 if (S_FM(node->suffix) || is_next_separator(node))
2241 {
2242 /*
2243 * This node is in Fill Mode, or the next node is known to be a
2244 * non-digit value, so we just slurp as many characters as we can get.
2245 */
2246 char *endptr;
2247
2248 errno = 0;
2249 result = strtol(init, &endptr, 10);
2250 *src = endptr;
2251 }
2252 else
2253 {
2254 /*
2255 * We need to pull exactly the number of characters given in 'len' out
2256 * of the string, and convert those.
2257 */
2258 char *last;
2259
2260 if (used < len)
2261 ereport(ERROR,
2262 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2263 errmsg("source string too short for \"%s\" formatting field",
2264 node->key->name),
2265 errdetail("Field requires %d characters, but only %d "
2266 "remain.",
2267 len, used),
2268 errhint("If your source string is not fixed-width, try "
2269 "using the \"FM\" modifier.")));
2270
2271 errno = 0;
2272 result = strtol(copy, &last, 10);
2273 used = last - copy;
2274
2275 if (used > 0 && used < len)
2276 ereport(ERROR,
2277 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2278 errmsg("invalid value \"%s\" for \"%s\"",
2279 copy, node->key->name),
2280 errdetail("Field requires %d characters, but only %d "
2281 "could be parsed.", len, used),
2282 errhint("If your source string is not fixed-width, try "
2283 "using the \"FM\" modifier.")));
2284
2285 *src += used;
2286 }
2287
2288 if (*src == init)
2289 ereport(ERROR,
2290 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2291 errmsg("invalid value \"%s\" for \"%s\"",
2292 copy, node->key->name),
2293 errdetail("Value must be an integer.")));
2294
2295 if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2296 ereport(ERROR,
2297 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2298 errmsg("value for \"%s\" in source string is out of range",
2299 node->key->name),
2300 errdetail("Value must be in the range %d to %d.",
2301 INT_MIN, INT_MAX)));
2302
2303 if (dest != NULL)
2304 from_char_set_int(dest, (int) result, node);
2305 return *src - init;
2306 }
2307
2308 /*
2309 * Call from_char_parse_int_len(), using the length of the format keyword as
2310 * the expected length of the field.
2311 *
2312 * Don't call this function if the field differs in length from the format
2313 * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2314 * In such cases, call from_char_parse_int_len() instead to specify the
2315 * required length explicitly.
2316 */
2317 static int
from_char_parse_int(int * dest,const char ** src,FormatNode * node)2318 from_char_parse_int(int *dest, const char **src, FormatNode *node)
2319 {
2320 return from_char_parse_int_len(dest, src, node->key->len, node);
2321 }
2322
2323 /*
2324 * Sequentially search null-terminated "array" for a case-insensitive match
2325 * to the initial character(s) of "name".
2326 *
2327 * Returns array index of match, or -1 for no match.
2328 *
2329 * *len is set to the length of the match, or 0 for no match.
2330 *
2331 * Case-insensitivity is defined per pg_tolower, so this is only
2332 * suitable for comparisons to ASCII strings.
2333 */
2334 static int
seq_search(const char * name,const char * const * array,int * len)2335 seq_search(const char *name, const char *const *array, int *len)
2336 {
2337 unsigned char firstc;
2338 const char *const *a;
2339
2340 *len = 0;
2341
2342 /* empty string can't match anything */
2343 if (!*name)
2344 return -1;
2345
2346 /* we handle first char specially to gain some speed */
2347 firstc = pg_tolower((unsigned char) *name);
2348
2349 for (a = array; *a != NULL; a++)
2350 {
2351 const char *p;
2352 const char *n;
2353
2354 /* compare first chars */
2355 if (pg_tolower((unsigned char) **a) != firstc)
2356 continue;
2357
2358 /* compare rest of string */
2359 for (p = *a + 1, n = name + 1;; p++, n++)
2360 {
2361 /* return success if we matched whole array entry */
2362 if (*p == '\0')
2363 {
2364 *len = n - name;
2365 return a - array;
2366 }
2367 /* else, must have another character in "name" ... */
2368 if (*n == '\0')
2369 break;
2370 /* ... and it must match */
2371 if (pg_tolower((unsigned char) *p) !=
2372 pg_tolower((unsigned char) *n))
2373 break;
2374 }
2375 }
2376
2377 return -1;
2378 }
2379
2380 /*
2381 * Perform a sequential search in 'array' for an entry matching the first
2382 * character(s) of the 'src' string case-insensitively.
2383 *
2384 * If a match is found, copy the array index of the match into the integer
2385 * pointed to by 'dest', advance 'src' to the end of the part of the string
2386 * which matched, and return the number of characters consumed.
2387 *
2388 * If the string doesn't match, throw an error.
2389 *
2390 * 'node' is used only for error reports: node->key->name identifies the
2391 * field type we were searching for.
2392 */
2393 static int
from_char_seq_search(int * dest,const char ** src,const char * const * array,FormatNode * node)2394 from_char_seq_search(int *dest, const char **src, const char *const *array,
2395 FormatNode *node)
2396 {
2397 int len;
2398
2399 *dest = seq_search(*src, array, &len);
2400
2401 if (len <= 0)
2402 {
2403 /*
2404 * In the error report, truncate the string at the next whitespace (if
2405 * any) to avoid including irrelevant data.
2406 */
2407 char *copy = pstrdup(*src);
2408 char *c;
2409
2410 for (c = copy; *c; c++)
2411 {
2412 if (scanner_isspace(*c))
2413 {
2414 *c = '\0';
2415 break;
2416 }
2417 }
2418
2419 ereport(ERROR,
2420 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2421 errmsg("invalid value \"%s\" for \"%s\"",
2422 copy, node->key->name),
2423 errdetail("The given value did not match any of the allowed "
2424 "values for this field.")));
2425 }
2426 *src += len;
2427 return len;
2428 }
2429
2430 /* ----------
2431 * Process a TmToChar struct as denoted by a list of FormatNodes.
2432 * The formatted data is written to the string pointed to by 'out'.
2433 * ----------
2434 */
2435 static void
DCH_to_char(FormatNode * node,bool is_interval,TmToChar * in,char * out,Oid collid)2436 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2437 {
2438 FormatNode *n;
2439 char *s;
2440 struct pg_tm *tm = &in->tm;
2441 int i;
2442
2443 /* cache localized days and months */
2444 cache_locale_time();
2445
2446 s = out;
2447 for (n = node; n->type != NODE_TYPE_END; n++)
2448 {
2449 if (n->type != NODE_TYPE_ACTION)
2450 {
2451 strcpy(s, n->character);
2452 s += strlen(s);
2453 continue;
2454 }
2455
2456 switch (n->key->id)
2457 {
2458 case DCH_A_M:
2459 case DCH_P_M:
2460 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2461 ? P_M_STR : A_M_STR);
2462 s += strlen(s);
2463 break;
2464 case DCH_AM:
2465 case DCH_PM:
2466 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2467 ? PM_STR : AM_STR);
2468 s += strlen(s);
2469 break;
2470 case DCH_a_m:
2471 case DCH_p_m:
2472 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2473 ? p_m_STR : a_m_STR);
2474 s += strlen(s);
2475 break;
2476 case DCH_am:
2477 case DCH_pm:
2478 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2479 ? pm_STR : am_STR);
2480 s += strlen(s);
2481 break;
2482 case DCH_HH:
2483 case DCH_HH12:
2484
2485 /*
2486 * display time as shown on a 12-hour clock, even for
2487 * intervals
2488 */
2489 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2490 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2491 tm->tm_hour % (HOURS_PER_DAY / 2));
2492 if (S_THth(n->suffix))
2493 str_numth(s, s, S_TH_TYPE(n->suffix));
2494 s += strlen(s);
2495 break;
2496 case DCH_HH24:
2497 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2498 tm->tm_hour);
2499 if (S_THth(n->suffix))
2500 str_numth(s, s, S_TH_TYPE(n->suffix));
2501 s += strlen(s);
2502 break;
2503 case DCH_MI:
2504 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2505 tm->tm_min);
2506 if (S_THth(n->suffix))
2507 str_numth(s, s, S_TH_TYPE(n->suffix));
2508 s += strlen(s);
2509 break;
2510 case DCH_SS:
2511 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2512 tm->tm_sec);
2513 if (S_THth(n->suffix))
2514 str_numth(s, s, S_TH_TYPE(n->suffix));
2515 s += strlen(s);
2516 break;
2517 case DCH_MS: /* millisecond */
2518 sprintf(s, "%03d", (int) (in->fsec / INT64CONST(1000)));
2519 if (S_THth(n->suffix))
2520 str_numth(s, s, S_TH_TYPE(n->suffix));
2521 s += strlen(s);
2522 break;
2523 case DCH_US: /* microsecond */
2524 sprintf(s, "%06d", (int) in->fsec);
2525 if (S_THth(n->suffix))
2526 str_numth(s, s, S_TH_TYPE(n->suffix));
2527 s += strlen(s);
2528 break;
2529 case DCH_SSSS:
2530 sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2531 tm->tm_min * SECS_PER_MINUTE +
2532 tm->tm_sec);
2533 if (S_THth(n->suffix))
2534 str_numth(s, s, S_TH_TYPE(n->suffix));
2535 s += strlen(s);
2536 break;
2537 case DCH_tz:
2538 INVALID_FOR_INTERVAL;
2539 if (tmtcTzn(in))
2540 {
2541 /* We assume here that timezone names aren't localized */
2542 char *p = asc_tolower_z(tmtcTzn(in));
2543
2544 strcpy(s, p);
2545 pfree(p);
2546 s += strlen(s);
2547 }
2548 break;
2549 case DCH_TZ:
2550 INVALID_FOR_INTERVAL;
2551 if (tmtcTzn(in))
2552 {
2553 strcpy(s, tmtcTzn(in));
2554 s += strlen(s);
2555 }
2556 break;
2557 case DCH_TZH:
2558 INVALID_FOR_INTERVAL;
2559 sprintf(s, "%c%02d",
2560 (tm->tm_gmtoff >= 0) ? '+' : '-',
2561 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2562 s += strlen(s);
2563 break;
2564 case DCH_TZM:
2565 INVALID_FOR_INTERVAL;
2566 sprintf(s, "%02d",
2567 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2568 s += strlen(s);
2569 break;
2570 case DCH_OF:
2571 INVALID_FOR_INTERVAL;
2572 sprintf(s, "%c%0*d",
2573 (tm->tm_gmtoff >= 0) ? '+' : '-',
2574 S_FM(n->suffix) ? 0 : 2,
2575 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2576 s += strlen(s);
2577 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2578 {
2579 sprintf(s, ":%02d",
2580 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2581 s += strlen(s);
2582 }
2583 break;
2584 case DCH_A_D:
2585 case DCH_B_C:
2586 INVALID_FOR_INTERVAL;
2587 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2588 s += strlen(s);
2589 break;
2590 case DCH_AD:
2591 case DCH_BC:
2592 INVALID_FOR_INTERVAL;
2593 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2594 s += strlen(s);
2595 break;
2596 case DCH_a_d:
2597 case DCH_b_c:
2598 INVALID_FOR_INTERVAL;
2599 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2600 s += strlen(s);
2601 break;
2602 case DCH_ad:
2603 case DCH_bc:
2604 INVALID_FOR_INTERVAL;
2605 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2606 s += strlen(s);
2607 break;
2608 case DCH_MONTH:
2609 INVALID_FOR_INTERVAL;
2610 if (!tm->tm_mon)
2611 break;
2612 if (S_TM(n->suffix))
2613 {
2614 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2615
2616 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2617 strcpy(s, str);
2618 else
2619 ereport(ERROR,
2620 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2621 errmsg("localized string format value too long")));
2622 }
2623 else
2624 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2625 asc_toupper_z(months_full[tm->tm_mon - 1]));
2626 s += strlen(s);
2627 break;
2628 case DCH_Month:
2629 INVALID_FOR_INTERVAL;
2630 if (!tm->tm_mon)
2631 break;
2632 if (S_TM(n->suffix))
2633 {
2634 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2635
2636 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2637 strcpy(s, str);
2638 else
2639 ereport(ERROR,
2640 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2641 errmsg("localized string format value too long")));
2642 }
2643 else
2644 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2645 months_full[tm->tm_mon - 1]);
2646 s += strlen(s);
2647 break;
2648 case DCH_month:
2649 INVALID_FOR_INTERVAL;
2650 if (!tm->tm_mon)
2651 break;
2652 if (S_TM(n->suffix))
2653 {
2654 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2655
2656 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2657 strcpy(s, str);
2658 else
2659 ereport(ERROR,
2660 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2661 errmsg("localized string format value too long")));
2662 }
2663 else
2664 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2665 asc_tolower_z(months_full[tm->tm_mon - 1]));
2666 s += strlen(s);
2667 break;
2668 case DCH_MON:
2669 INVALID_FOR_INTERVAL;
2670 if (!tm->tm_mon)
2671 break;
2672 if (S_TM(n->suffix))
2673 {
2674 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2675
2676 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2677 strcpy(s, str);
2678 else
2679 ereport(ERROR,
2680 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2681 errmsg("localized string format value too long")));
2682 }
2683 else
2684 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2685 s += strlen(s);
2686 break;
2687 case DCH_Mon:
2688 INVALID_FOR_INTERVAL;
2689 if (!tm->tm_mon)
2690 break;
2691 if (S_TM(n->suffix))
2692 {
2693 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2694
2695 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2696 strcpy(s, str);
2697 else
2698 ereport(ERROR,
2699 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2700 errmsg("localized string format value too long")));
2701 }
2702 else
2703 strcpy(s, months[tm->tm_mon - 1]);
2704 s += strlen(s);
2705 break;
2706 case DCH_mon:
2707 INVALID_FOR_INTERVAL;
2708 if (!tm->tm_mon)
2709 break;
2710 if (S_TM(n->suffix))
2711 {
2712 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2713
2714 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2715 strcpy(s, str);
2716 else
2717 ereport(ERROR,
2718 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2719 errmsg("localized string format value too long")));
2720 }
2721 else
2722 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2723 s += strlen(s);
2724 break;
2725 case DCH_MM:
2726 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2727 tm->tm_mon);
2728 if (S_THth(n->suffix))
2729 str_numth(s, s, S_TH_TYPE(n->suffix));
2730 s += strlen(s);
2731 break;
2732 case DCH_DAY:
2733 INVALID_FOR_INTERVAL;
2734 if (S_TM(n->suffix))
2735 {
2736 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2737
2738 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2739 strcpy(s, str);
2740 else
2741 ereport(ERROR,
2742 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2743 errmsg("localized string format value too long")));
2744 }
2745 else
2746 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2747 asc_toupper_z(days[tm->tm_wday]));
2748 s += strlen(s);
2749 break;
2750 case DCH_Day:
2751 INVALID_FOR_INTERVAL;
2752 if (S_TM(n->suffix))
2753 {
2754 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2755
2756 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2757 strcpy(s, str);
2758 else
2759 ereport(ERROR,
2760 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2761 errmsg("localized string format value too long")));
2762 }
2763 else
2764 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2765 days[tm->tm_wday]);
2766 s += strlen(s);
2767 break;
2768 case DCH_day:
2769 INVALID_FOR_INTERVAL;
2770 if (S_TM(n->suffix))
2771 {
2772 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
2773
2774 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2775 strcpy(s, str);
2776 else
2777 ereport(ERROR,
2778 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2779 errmsg("localized string format value too long")));
2780 }
2781 else
2782 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2783 asc_tolower_z(days[tm->tm_wday]));
2784 s += strlen(s);
2785 break;
2786 case DCH_DY:
2787 INVALID_FOR_INTERVAL;
2788 if (S_TM(n->suffix))
2789 {
2790 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
2791
2792 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2793 strcpy(s, str);
2794 else
2795 ereport(ERROR,
2796 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2797 errmsg("localized string format value too long")));
2798 }
2799 else
2800 strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
2801 s += strlen(s);
2802 break;
2803 case DCH_Dy:
2804 INVALID_FOR_INTERVAL;
2805 if (S_TM(n->suffix))
2806 {
2807 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
2808
2809 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2810 strcpy(s, str);
2811 else
2812 ereport(ERROR,
2813 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2814 errmsg("localized string format value too long")));
2815 }
2816 else
2817 strcpy(s, days_short[tm->tm_wday]);
2818 s += strlen(s);
2819 break;
2820 case DCH_dy:
2821 INVALID_FOR_INTERVAL;
2822 if (S_TM(n->suffix))
2823 {
2824 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
2825
2826 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2827 strcpy(s, str);
2828 else
2829 ereport(ERROR,
2830 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2831 errmsg("localized string format value too long")));
2832 }
2833 else
2834 strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
2835 s += strlen(s);
2836 break;
2837 case DCH_DDD:
2838 case DCH_IDDD:
2839 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
2840 (n->key->id == DCH_DDD) ?
2841 tm->tm_yday :
2842 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
2843 if (S_THth(n->suffix))
2844 str_numth(s, s, S_TH_TYPE(n->suffix));
2845 s += strlen(s);
2846 break;
2847 case DCH_DD:
2848 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
2849 if (S_THth(n->suffix))
2850 str_numth(s, s, S_TH_TYPE(n->suffix));
2851 s += strlen(s);
2852 break;
2853 case DCH_D:
2854 INVALID_FOR_INTERVAL;
2855 sprintf(s, "%d", tm->tm_wday + 1);
2856 if (S_THth(n->suffix))
2857 str_numth(s, s, S_TH_TYPE(n->suffix));
2858 s += strlen(s);
2859 break;
2860 case DCH_ID:
2861 INVALID_FOR_INTERVAL;
2862 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
2863 if (S_THth(n->suffix))
2864 str_numth(s, s, S_TH_TYPE(n->suffix));
2865 s += strlen(s);
2866 break;
2867 case DCH_WW:
2868 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2869 (tm->tm_yday - 1) / 7 + 1);
2870 if (S_THth(n->suffix))
2871 str_numth(s, s, S_TH_TYPE(n->suffix));
2872 s += strlen(s);
2873 break;
2874 case DCH_IW:
2875 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2876 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
2877 if (S_THth(n->suffix))
2878 str_numth(s, s, S_TH_TYPE(n->suffix));
2879 s += strlen(s);
2880 break;
2881 case DCH_Q:
2882 if (!tm->tm_mon)
2883 break;
2884 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
2885 if (S_THth(n->suffix))
2886 str_numth(s, s, S_TH_TYPE(n->suffix));
2887 s += strlen(s);
2888 break;
2889 case DCH_CC:
2890 if (is_interval) /* straight calculation */
2891 i = tm->tm_year / 100;
2892 else
2893 {
2894 if (tm->tm_year > 0)
2895 /* Century 20 == 1901 - 2000 */
2896 i = (tm->tm_year - 1) / 100 + 1;
2897 else
2898 /* Century 6BC == 600BC - 501BC */
2899 i = tm->tm_year / 100 - 1;
2900 }
2901 if (i <= 99 && i >= -99)
2902 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
2903 else
2904 sprintf(s, "%d", i);
2905 if (S_THth(n->suffix))
2906 str_numth(s, s, S_TH_TYPE(n->suffix));
2907 s += strlen(s);
2908 break;
2909 case DCH_Y_YYY:
2910 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
2911 sprintf(s, "%d,%03d", i,
2912 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
2913 if (S_THth(n->suffix))
2914 str_numth(s, s, S_TH_TYPE(n->suffix));
2915 s += strlen(s);
2916 break;
2917 case DCH_YYYY:
2918 case DCH_IYYY:
2919 sprintf(s, "%0*d",
2920 S_FM(n->suffix) ? 0 :
2921 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
2922 (n->key->id == DCH_YYYY ?
2923 ADJUST_YEAR(tm->tm_year, is_interval) :
2924 ADJUST_YEAR(date2isoyear(tm->tm_year,
2925 tm->tm_mon,
2926 tm->tm_mday),
2927 is_interval)));
2928 if (S_THth(n->suffix))
2929 str_numth(s, s, S_TH_TYPE(n->suffix));
2930 s += strlen(s);
2931 break;
2932 case DCH_YYY:
2933 case DCH_IYY:
2934 sprintf(s, "%0*d",
2935 S_FM(n->suffix) ? 0 :
2936 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
2937 (n->key->id == DCH_YYY ?
2938 ADJUST_YEAR(tm->tm_year, is_interval) :
2939 ADJUST_YEAR(date2isoyear(tm->tm_year,
2940 tm->tm_mon,
2941 tm->tm_mday),
2942 is_interval)) % 1000);
2943 if (S_THth(n->suffix))
2944 str_numth(s, s, S_TH_TYPE(n->suffix));
2945 s += strlen(s);
2946 break;
2947 case DCH_YY:
2948 case DCH_IY:
2949 sprintf(s, "%0*d",
2950 S_FM(n->suffix) ? 0 :
2951 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
2952 (n->key->id == DCH_YY ?
2953 ADJUST_YEAR(tm->tm_year, is_interval) :
2954 ADJUST_YEAR(date2isoyear(tm->tm_year,
2955 tm->tm_mon,
2956 tm->tm_mday),
2957 is_interval)) % 100);
2958 if (S_THth(n->suffix))
2959 str_numth(s, s, S_TH_TYPE(n->suffix));
2960 s += strlen(s);
2961 break;
2962 case DCH_Y:
2963 case DCH_I:
2964 sprintf(s, "%1d",
2965 (n->key->id == DCH_Y ?
2966 ADJUST_YEAR(tm->tm_year, is_interval) :
2967 ADJUST_YEAR(date2isoyear(tm->tm_year,
2968 tm->tm_mon,
2969 tm->tm_mday),
2970 is_interval)) % 10);
2971 if (S_THth(n->suffix))
2972 str_numth(s, s, S_TH_TYPE(n->suffix));
2973 s += strlen(s);
2974 break;
2975 case DCH_RM:
2976 /* FALLTHROUGH */
2977 case DCH_rm:
2978
2979 /*
2980 * For intervals, values like '12 month' will be reduced to 0
2981 * month and some years. These should be processed.
2982 */
2983 if (!tm->tm_mon && !tm->tm_year)
2984 break;
2985 else
2986 {
2987 int mon = 0;
2988 const char *const *months;
2989
2990 if (n->key->id == DCH_RM)
2991 months = rm_months_upper;
2992 else
2993 months = rm_months_lower;
2994
2995 /*
2996 * Compute the position in the roman-numeral array. Note
2997 * that the contents of the array are reversed, December
2998 * being first and January last.
2999 */
3000 if (tm->tm_mon == 0)
3001 {
3002 /*
3003 * This case is special, and tracks the case of full
3004 * interval years.
3005 */
3006 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3007 }
3008 else if (tm->tm_mon < 0)
3009 {
3010 /*
3011 * Negative case. In this case, the calculation is
3012 * reversed, where -1 means December, -2 November,
3013 * etc.
3014 */
3015 mon = -1 * (tm->tm_mon + 1);
3016 }
3017 else
3018 {
3019 /*
3020 * Common case, with a strictly positive value. The
3021 * position in the array matches with the value of
3022 * tm_mon.
3023 */
3024 mon = MONTHS_PER_YEAR - tm->tm_mon;
3025 }
3026
3027 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3028 months[mon]);
3029 s += strlen(s);
3030 }
3031 break;
3032 case DCH_W:
3033 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3034 if (S_THth(n->suffix))
3035 str_numth(s, s, S_TH_TYPE(n->suffix));
3036 s += strlen(s);
3037 break;
3038 case DCH_J:
3039 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3040 if (S_THth(n->suffix))
3041 str_numth(s, s, S_TH_TYPE(n->suffix));
3042 s += strlen(s);
3043 break;
3044 }
3045 }
3046
3047 *s = '\0';
3048 }
3049
3050 /* ----------
3051 * Process a string as denoted by a list of FormatNodes.
3052 * The TmFromChar struct pointed to by 'out' is populated with the results.
3053 *
3054 * Note: we currently don't have any to_interval() function, so there
3055 * is no need here for INVALID_FOR_INTERVAL checks.
3056 * ----------
3057 */
3058 static void
DCH_from_char(FormatNode * node,const char * in,TmFromChar * out)3059 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out)
3060 {
3061 FormatNode *n;
3062 const char *s;
3063 int len,
3064 value;
3065 bool fx_mode = false;
3066
3067 /* number of extra skipped characters (more than given in format string) */
3068 int extra_skip = 0;
3069
3070 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3071 {
3072 /*
3073 * Ignore spaces at the beginning of the string and before fields when
3074 * not in FX (fixed width) mode.
3075 */
3076 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3077 (n->type == NODE_TYPE_ACTION || n == node))
3078 {
3079 while (*s != '\0' && isspace((unsigned char) *s))
3080 {
3081 s++;
3082 extra_skip++;
3083 }
3084 }
3085
3086 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3087 {
3088 if (!fx_mode)
3089 {
3090 /*
3091 * In non FX (fixed format) mode one format string space or
3092 * separator match to one space or separator in input string.
3093 * Or match nothing if there is no space or separator in the
3094 * current position of input string.
3095 */
3096 extra_skip--;
3097 if (isspace((unsigned char) *s) || is_separator_char(s))
3098 {
3099 s++;
3100 extra_skip++;
3101 }
3102 }
3103 else
3104 {
3105 /*
3106 * In FX mode, on format string space or separator we consume
3107 * exactly one character from input string. Notice we don't
3108 * insist that the consumed character match the format's
3109 * character.
3110 */
3111 s += pg_mblen(s);
3112 }
3113 continue;
3114 }
3115 else if (n->type != NODE_TYPE_ACTION)
3116 {
3117 /*
3118 * Text character, so consume one character from input string.
3119 * Notice we don't insist that the consumed character match the
3120 * format's character.
3121 */
3122 if (!fx_mode)
3123 {
3124 /*
3125 * In non FX mode we might have skipped some extra characters
3126 * (more than specified in format string) before. In this
3127 * case we don't skip input string character, because it might
3128 * be part of field.
3129 */
3130 if (extra_skip > 0)
3131 extra_skip--;
3132 else
3133 s += pg_mblen(s);
3134 }
3135 else
3136 {
3137 s += pg_mblen(s);
3138 }
3139 continue;
3140 }
3141
3142 from_char_set_mode(out, n->key->date_mode);
3143
3144 switch (n->key->id)
3145 {
3146 case DCH_FX:
3147 fx_mode = true;
3148 break;
3149 case DCH_A_M:
3150 case DCH_P_M:
3151 case DCH_a_m:
3152 case DCH_p_m:
3153 from_char_seq_search(&value, &s, ampm_strings_long,
3154 n);
3155 from_char_set_int(&out->pm, value % 2, n);
3156 out->clock = CLOCK_12_HOUR;
3157 break;
3158 case DCH_AM:
3159 case DCH_PM:
3160 case DCH_am:
3161 case DCH_pm:
3162 from_char_seq_search(&value, &s, ampm_strings,
3163 n);
3164 from_char_set_int(&out->pm, value % 2, n);
3165 out->clock = CLOCK_12_HOUR;
3166 break;
3167 case DCH_HH:
3168 case DCH_HH12:
3169 from_char_parse_int_len(&out->hh, &s, 2, n);
3170 out->clock = CLOCK_12_HOUR;
3171 SKIP_THth(s, n->suffix);
3172 break;
3173 case DCH_HH24:
3174 from_char_parse_int_len(&out->hh, &s, 2, n);
3175 SKIP_THth(s, n->suffix);
3176 break;
3177 case DCH_MI:
3178 from_char_parse_int(&out->mi, &s, n);
3179 SKIP_THth(s, n->suffix);
3180 break;
3181 case DCH_SS:
3182 from_char_parse_int(&out->ss, &s, n);
3183 SKIP_THth(s, n->suffix);
3184 break;
3185 case DCH_MS: /* millisecond */
3186 len = from_char_parse_int_len(&out->ms, &s, 3, n);
3187
3188 /*
3189 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3190 */
3191 out->ms *= len == 1 ? 100 :
3192 len == 2 ? 10 : 1;
3193
3194 SKIP_THth(s, n->suffix);
3195 break;
3196 case DCH_US: /* microsecond */
3197 len = from_char_parse_int_len(&out->us, &s, 6, n);
3198
3199 out->us *= len == 1 ? 100000 :
3200 len == 2 ? 10000 :
3201 len == 3 ? 1000 :
3202 len == 4 ? 100 :
3203 len == 5 ? 10 : 1;
3204
3205 SKIP_THth(s, n->suffix);
3206 break;
3207 case DCH_SSSS:
3208 from_char_parse_int(&out->ssss, &s, n);
3209 SKIP_THth(s, n->suffix);
3210 break;
3211 case DCH_tz:
3212 case DCH_TZ:
3213 case DCH_OF:
3214 ereport(ERROR,
3215 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3216 errmsg("formatting field \"%s\" is only supported in to_char",
3217 n->key->name)));
3218 break;
3219 case DCH_TZH:
3220
3221 /*
3222 * Value of TZH might be negative. And the issue is that we
3223 * might swallow minus sign as the separator. So, if we have
3224 * skipped more characters than specified in the format
3225 * string, then we consider prepending last skipped minus to
3226 * TZH.
3227 */
3228 if (*s == '+' || *s == '-' || *s == ' ')
3229 {
3230 out->tzsign = *s == '-' ? -1 : +1;
3231 s++;
3232 }
3233 else
3234 {
3235 if (extra_skip > 0 && *(s - 1) == '-')
3236 out->tzsign = -1;
3237 else
3238 out->tzsign = +1;
3239 }
3240
3241 from_char_parse_int_len(&out->tzh, &s, 2, n);
3242 break;
3243 case DCH_TZM:
3244 /* assign positive timezone sign if TZH was not seen before */
3245 if (!out->tzsign)
3246 out->tzsign = +1;
3247 from_char_parse_int_len(&out->tzm, &s, 2, n);
3248 break;
3249 case DCH_A_D:
3250 case DCH_B_C:
3251 case DCH_a_d:
3252 case DCH_b_c:
3253 from_char_seq_search(&value, &s, adbc_strings_long,
3254 n);
3255 from_char_set_int(&out->bc, value % 2, n);
3256 break;
3257 case DCH_AD:
3258 case DCH_BC:
3259 case DCH_ad:
3260 case DCH_bc:
3261 from_char_seq_search(&value, &s, adbc_strings,
3262 n);
3263 from_char_set_int(&out->bc, value % 2, n);
3264 break;
3265 case DCH_MONTH:
3266 case DCH_Month:
3267 case DCH_month:
3268 from_char_seq_search(&value, &s, months_full,
3269 n);
3270 from_char_set_int(&out->mm, value + 1, n);
3271 break;
3272 case DCH_MON:
3273 case DCH_Mon:
3274 case DCH_mon:
3275 from_char_seq_search(&value, &s, months,
3276 n);
3277 from_char_set_int(&out->mm, value + 1, n);
3278 break;
3279 case DCH_MM:
3280 from_char_parse_int(&out->mm, &s, n);
3281 SKIP_THth(s, n->suffix);
3282 break;
3283 case DCH_DAY:
3284 case DCH_Day:
3285 case DCH_day:
3286 from_char_seq_search(&value, &s, days,
3287 n);
3288 from_char_set_int(&out->d, value, n);
3289 out->d++;
3290 break;
3291 case DCH_DY:
3292 case DCH_Dy:
3293 case DCH_dy:
3294 from_char_seq_search(&value, &s, days_short,
3295 n);
3296 from_char_set_int(&out->d, value, n);
3297 out->d++;
3298 break;
3299 case DCH_DDD:
3300 from_char_parse_int(&out->ddd, &s, n);
3301 SKIP_THth(s, n->suffix);
3302 break;
3303 case DCH_IDDD:
3304 from_char_parse_int_len(&out->ddd, &s, 3, n);
3305 SKIP_THth(s, n->suffix);
3306 break;
3307 case DCH_DD:
3308 from_char_parse_int(&out->dd, &s, n);
3309 SKIP_THth(s, n->suffix);
3310 break;
3311 case DCH_D:
3312 from_char_parse_int(&out->d, &s, n);
3313 SKIP_THth(s, n->suffix);
3314 break;
3315 case DCH_ID:
3316 from_char_parse_int_len(&out->d, &s, 1, n);
3317 /* Shift numbering to match Gregorian where Sunday = 1 */
3318 if (++out->d > 7)
3319 out->d = 1;
3320 SKIP_THth(s, n->suffix);
3321 break;
3322 case DCH_WW:
3323 case DCH_IW:
3324 from_char_parse_int(&out->ww, &s, n);
3325 SKIP_THth(s, n->suffix);
3326 break;
3327 case DCH_Q:
3328
3329 /*
3330 * We ignore 'Q' when converting to date because it is unclear
3331 * which date in the quarter to use, and some people specify
3332 * both quarter and month, so if it was honored it might
3333 * conflict with the supplied month. That is also why we don't
3334 * throw an error.
3335 *
3336 * We still parse the source string for an integer, but it
3337 * isn't stored anywhere in 'out'.
3338 */
3339 from_char_parse_int((int *) NULL, &s, n);
3340 SKIP_THth(s, n->suffix);
3341 break;
3342 case DCH_CC:
3343 from_char_parse_int(&out->cc, &s, n);
3344 SKIP_THth(s, n->suffix);
3345 break;
3346 case DCH_Y_YYY:
3347 {
3348 int matched,
3349 years,
3350 millennia,
3351 nch;
3352
3353 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3354 if (matched < 2)
3355 ereport(ERROR,
3356 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3357 errmsg("invalid input string for \"Y,YYY\"")));
3358 years += (millennia * 1000);
3359 from_char_set_int(&out->year, years, n);
3360 out->yysz = 4;
3361 s += nch;
3362 SKIP_THth(s, n->suffix);
3363 }
3364 break;
3365 case DCH_YYYY:
3366 case DCH_IYYY:
3367 from_char_parse_int(&out->year, &s, n);
3368 out->yysz = 4;
3369 SKIP_THth(s, n->suffix);
3370 break;
3371 case DCH_YYY:
3372 case DCH_IYY:
3373 if (from_char_parse_int(&out->year, &s, n) < 4)
3374 out->year = adjust_partial_year_to_2020(out->year);
3375 out->yysz = 3;
3376 SKIP_THth(s, n->suffix);
3377 break;
3378 case DCH_YY:
3379 case DCH_IY:
3380 if (from_char_parse_int(&out->year, &s, n) < 4)
3381 out->year = adjust_partial_year_to_2020(out->year);
3382 out->yysz = 2;
3383 SKIP_THth(s, n->suffix);
3384 break;
3385 case DCH_Y:
3386 case DCH_I:
3387 if (from_char_parse_int(&out->year, &s, n) < 4)
3388 out->year = adjust_partial_year_to_2020(out->year);
3389 out->yysz = 1;
3390 SKIP_THth(s, n->suffix);
3391 break;
3392 case DCH_RM:
3393 case DCH_rm:
3394 from_char_seq_search(&value, &s, rm_months_lower,
3395 n);
3396 from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3397 break;
3398 case DCH_W:
3399 from_char_parse_int(&out->w, &s, n);
3400 SKIP_THth(s, n->suffix);
3401 break;
3402 case DCH_J:
3403 from_char_parse_int(&out->j, &s, n);
3404 SKIP_THth(s, n->suffix);
3405 break;
3406 }
3407
3408 /* Ignore all spaces after fields */
3409 if (!fx_mode)
3410 {
3411 extra_skip = 0;
3412 while (*s != '\0' && isspace((unsigned char) *s))
3413 {
3414 s++;
3415 extra_skip++;
3416 }
3417 }
3418 }
3419 }
3420
3421 /*
3422 * The invariant for DCH cache entry management is that DCHCounter is equal
3423 * to the maximum age value among the existing entries, and we increment it
3424 * whenever an access occurs. If we approach overflow, deal with that by
3425 * halving all the age values, so that we retain a fairly accurate idea of
3426 * which entries are oldest.
3427 */
3428 static inline void
DCH_prevent_counter_overflow(void)3429 DCH_prevent_counter_overflow(void)
3430 {
3431 if (DCHCounter >= (INT_MAX - 1))
3432 {
3433 for (int i = 0; i < n_DCHCache; i++)
3434 DCHCache[i]->age >>= 1;
3435 DCHCounter >>= 1;
3436 }
3437 }
3438
3439 /* select a DCHCacheEntry to hold the given format picture */
3440 static DCHCacheEntry *
DCH_cache_getnew(const char * str)3441 DCH_cache_getnew(const char *str)
3442 {
3443 DCHCacheEntry *ent;
3444
3445 /* Ensure we can advance DCHCounter below */
3446 DCH_prevent_counter_overflow();
3447
3448 /*
3449 * If cache is full, remove oldest entry (or recycle first not-valid one)
3450 */
3451 if (n_DCHCache >= DCH_CACHE_ENTRIES)
3452 {
3453 DCHCacheEntry *old = DCHCache[0];
3454
3455 #ifdef DEBUG_TO_FROM_CHAR
3456 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3457 #endif
3458 if (old->valid)
3459 {
3460 for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3461 {
3462 ent = DCHCache[i];
3463 if (!ent->valid)
3464 {
3465 old = ent;
3466 break;
3467 }
3468 if (ent->age < old->age)
3469 old = ent;
3470 }
3471 }
3472 #ifdef DEBUG_TO_FROM_CHAR
3473 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3474 #endif
3475 old->valid = false;
3476 StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3477 old->age = (++DCHCounter);
3478 /* caller is expected to fill format, then set valid */
3479 return old;
3480 }
3481 else
3482 {
3483 #ifdef DEBUG_TO_FROM_CHAR
3484 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3485 #endif
3486 Assert(DCHCache[n_DCHCache] == NULL);
3487 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3488 MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
3489 ent->valid = false;
3490 StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3491 ent->age = (++DCHCounter);
3492 /* caller is expected to fill format, then set valid */
3493 ++n_DCHCache;
3494 return ent;
3495 }
3496 }
3497
3498 /* look for an existing DCHCacheEntry matching the given format picture */
3499 static DCHCacheEntry *
DCH_cache_search(const char * str)3500 DCH_cache_search(const char *str)
3501 {
3502 /* Ensure we can advance DCHCounter below */
3503 DCH_prevent_counter_overflow();
3504
3505 for (int i = 0; i < n_DCHCache; i++)
3506 {
3507 DCHCacheEntry *ent = DCHCache[i];
3508
3509 if (ent->valid && strcmp(ent->str, str) == 0)
3510 {
3511 ent->age = (++DCHCounter);
3512 return ent;
3513 }
3514 }
3515
3516 return NULL;
3517 }
3518
3519 /* Find or create a DCHCacheEntry for the given format picture */
3520 static DCHCacheEntry *
DCH_cache_fetch(const char * str)3521 DCH_cache_fetch(const char *str)
3522 {
3523 DCHCacheEntry *ent;
3524
3525 if ((ent = DCH_cache_search(str)) == NULL)
3526 {
3527 /*
3528 * Not in the cache, must run parser and save a new format-picture to
3529 * the cache. Do not mark the cache entry valid until parsing
3530 * succeeds.
3531 */
3532 ent = DCH_cache_getnew(str);
3533
3534 parse_format(ent->format, str, DCH_keywords,
3535 DCH_suff, DCH_index, DCH_TYPE, NULL);
3536
3537 ent->valid = true;
3538 }
3539 return ent;
3540 }
3541
3542 /*
3543 * Format a date/time or interval into a string according to fmt.
3544 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3545 * for formatting.
3546 */
3547 static text *
datetime_to_char_body(TmToChar * tmtc,text * fmt,bool is_interval,Oid collid)3548 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3549 {
3550 FormatNode *format;
3551 char *fmt_str,
3552 *result;
3553 bool incache;
3554 int fmt_len;
3555 text *res;
3556
3557 /*
3558 * Convert fmt to C string
3559 */
3560 fmt_str = text_to_cstring(fmt);
3561 fmt_len = strlen(fmt_str);
3562
3563 /*
3564 * Allocate workspace for result as C string
3565 */
3566 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3567 *result = '\0';
3568
3569 if (fmt_len > DCH_CACHE_SIZE)
3570 {
3571 /*
3572 * Allocate new memory if format picture is bigger than static cache
3573 * and do not use cache (call parser always)
3574 */
3575 incache = false;
3576
3577 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3578
3579 parse_format(format, fmt_str, DCH_keywords,
3580 DCH_suff, DCH_index, DCH_TYPE, NULL);
3581 }
3582 else
3583 {
3584 /*
3585 * Use cache buffers
3586 */
3587 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3588
3589 incache = true;
3590 format = ent->format;
3591 }
3592
3593 /* The real work is here */
3594 DCH_to_char(format, is_interval, tmtc, result, collid);
3595
3596 if (!incache)
3597 pfree(format);
3598
3599 pfree(fmt_str);
3600
3601 /* convert C-string result to TEXT format */
3602 res = cstring_to_text(result);
3603
3604 pfree(result);
3605 return res;
3606 }
3607
3608 /****************************************************************************
3609 * Public routines
3610 ***************************************************************************/
3611
3612 /* -------------------
3613 * TIMESTAMP to_char()
3614 * -------------------
3615 */
3616 Datum
timestamp_to_char(PG_FUNCTION_ARGS)3617 timestamp_to_char(PG_FUNCTION_ARGS)
3618 {
3619 Timestamp dt = PG_GETARG_TIMESTAMP(0);
3620 text *fmt = PG_GETARG_TEXT_PP(1),
3621 *res;
3622 TmToChar tmtc;
3623 struct pg_tm *tm;
3624 int thisdate;
3625
3626 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3627 PG_RETURN_NULL();
3628
3629 ZERO_tmtc(&tmtc);
3630 tm = tmtcTm(&tmtc);
3631
3632 if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
3633 ereport(ERROR,
3634 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3635 errmsg("timestamp out of range")));
3636
3637 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3638 tm->tm_wday = (thisdate + 1) % 7;
3639 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3640
3641 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3642 PG_RETURN_NULL();
3643
3644 PG_RETURN_TEXT_P(res);
3645 }
3646
3647 Datum
timestamptz_to_char(PG_FUNCTION_ARGS)3648 timestamptz_to_char(PG_FUNCTION_ARGS)
3649 {
3650 TimestampTz dt = PG_GETARG_TIMESTAMP(0);
3651 text *fmt = PG_GETARG_TEXT_PP(1),
3652 *res;
3653 TmToChar tmtc;
3654 int tz;
3655 struct pg_tm *tm;
3656 int thisdate;
3657
3658 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3659 PG_RETURN_NULL();
3660
3661 ZERO_tmtc(&tmtc);
3662 tm = tmtcTm(&tmtc);
3663
3664 if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
3665 ereport(ERROR,
3666 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3667 errmsg("timestamp out of range")));
3668
3669 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3670 tm->tm_wday = (thisdate + 1) % 7;
3671 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3672
3673 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3674 PG_RETURN_NULL();
3675
3676 PG_RETURN_TEXT_P(res);
3677 }
3678
3679
3680 /* -------------------
3681 * INTERVAL to_char()
3682 * -------------------
3683 */
3684 Datum
interval_to_char(PG_FUNCTION_ARGS)3685 interval_to_char(PG_FUNCTION_ARGS)
3686 {
3687 Interval *it = PG_GETARG_INTERVAL_P(0);
3688 text *fmt = PG_GETARG_TEXT_PP(1),
3689 *res;
3690 TmToChar tmtc;
3691 struct pg_tm *tm;
3692
3693 if (VARSIZE_ANY_EXHDR(fmt) <= 0)
3694 PG_RETURN_NULL();
3695
3696 ZERO_tmtc(&tmtc);
3697 tm = tmtcTm(&tmtc);
3698
3699 if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
3700 PG_RETURN_NULL();
3701
3702 /* wday is meaningless, yday approximates the total span in days */
3703 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
3704
3705 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
3706 PG_RETURN_NULL();
3707
3708 PG_RETURN_TEXT_P(res);
3709 }
3710
3711 /* ---------------------
3712 * TO_TIMESTAMP()
3713 *
3714 * Make Timestamp from date_str which is formatted at argument 'fmt'
3715 * ( to_timestamp is reverse to_char() )
3716 * ---------------------
3717 */
3718 Datum
to_timestamp(PG_FUNCTION_ARGS)3719 to_timestamp(PG_FUNCTION_ARGS)
3720 {
3721 text *date_txt = PG_GETARG_TEXT_PP(0);
3722 text *fmt = PG_GETARG_TEXT_PP(1);
3723 Timestamp result;
3724 int tz;
3725 struct pg_tm tm;
3726 fsec_t fsec;
3727
3728 do_to_timestamp(date_txt, fmt, &tm, &fsec);
3729
3730 /* Use the specified time zone, if any. */
3731 if (tm.tm_zone)
3732 {
3733 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
3734
3735 if (dterr)
3736 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
3737 }
3738 else
3739 tz = DetermineTimeZoneOffset(&tm, session_timezone);
3740
3741 if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
3742 ereport(ERROR,
3743 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3744 errmsg("timestamp out of range")));
3745
3746 PG_RETURN_TIMESTAMP(result);
3747 }
3748
3749 /* ----------
3750 * TO_DATE
3751 * Make Date from date_str which is formatted at argument 'fmt'
3752 * ----------
3753 */
3754 Datum
to_date(PG_FUNCTION_ARGS)3755 to_date(PG_FUNCTION_ARGS)
3756 {
3757 text *date_txt = PG_GETARG_TEXT_PP(0);
3758 text *fmt = PG_GETARG_TEXT_PP(1);
3759 DateADT result;
3760 struct pg_tm tm;
3761 fsec_t fsec;
3762
3763 do_to_timestamp(date_txt, fmt, &tm, &fsec);
3764
3765 /* Prevent overflow in Julian-day routines */
3766 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
3767 ereport(ERROR,
3768 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3769 errmsg("date out of range: \"%s\"",
3770 text_to_cstring(date_txt))));
3771
3772 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
3773
3774 /* Now check for just-out-of-range dates */
3775 if (!IS_VALID_DATE(result))
3776 ereport(ERROR,
3777 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3778 errmsg("date out of range: \"%s\"",
3779 text_to_cstring(date_txt))));
3780
3781 PG_RETURN_DATEADT(result);
3782 }
3783
3784 /*
3785 * do_to_timestamp: shared code for to_timestamp and to_date
3786 *
3787 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm
3788 * and fractional seconds.
3789 *
3790 * We parse 'fmt' into a list of FormatNodes, which is then passed to
3791 * DCH_from_char to populate a TmFromChar with the parsed contents of
3792 * 'date_txt'.
3793 *
3794 * The TmFromChar is then analysed and converted into the final results in
3795 * struct 'tm' and 'fsec'.
3796 */
3797 static void
do_to_timestamp(text * date_txt,text * fmt,struct pg_tm * tm,fsec_t * fsec)3798 do_to_timestamp(text *date_txt, text *fmt,
3799 struct pg_tm *tm, fsec_t *fsec)
3800 {
3801 FormatNode *format;
3802 TmFromChar tmfc;
3803 int fmt_len;
3804 char *date_str;
3805 int fmask;
3806
3807 date_str = text_to_cstring(date_txt);
3808
3809 ZERO_tmfc(&tmfc);
3810 ZERO_tm(tm);
3811 *fsec = 0;
3812 fmask = 0; /* bit mask for ValidateDate() */
3813
3814 fmt_len = VARSIZE_ANY_EXHDR(fmt);
3815
3816 if (fmt_len)
3817 {
3818 char *fmt_str;
3819 bool incache;
3820
3821 fmt_str = text_to_cstring(fmt);
3822
3823 if (fmt_len > DCH_CACHE_SIZE)
3824 {
3825 /*
3826 * Allocate new memory if format picture is bigger than static
3827 * cache and do not use cache (call parser always)
3828 */
3829 incache = false;
3830
3831 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3832
3833 parse_format(format, fmt_str, DCH_keywords,
3834 DCH_suff, DCH_index, DCH_TYPE, NULL);
3835 }
3836 else
3837 {
3838 /*
3839 * Use cache buffers
3840 */
3841 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3842
3843 incache = true;
3844 format = ent->format;
3845 }
3846
3847 #ifdef DEBUG_TO_FROM_CHAR
3848 /* dump_node(format, fmt_len); */
3849 /* dump_index(DCH_keywords, DCH_index); */
3850 #endif
3851
3852 DCH_from_char(format, date_str, &tmfc);
3853
3854 pfree(fmt_str);
3855 if (!incache)
3856 pfree(format);
3857 }
3858
3859 DEBUG_TMFC(&tmfc);
3860
3861 /*
3862 * Convert to_date/to_timestamp input fields to standard 'tm'
3863 */
3864 if (tmfc.ssss)
3865 {
3866 int x = tmfc.ssss;
3867
3868 tm->tm_hour = x / SECS_PER_HOUR;
3869 x %= SECS_PER_HOUR;
3870 tm->tm_min = x / SECS_PER_MINUTE;
3871 x %= SECS_PER_MINUTE;
3872 tm->tm_sec = x;
3873 }
3874
3875 if (tmfc.ss)
3876 tm->tm_sec = tmfc.ss;
3877 if (tmfc.mi)
3878 tm->tm_min = tmfc.mi;
3879 if (tmfc.hh)
3880 tm->tm_hour = tmfc.hh;
3881
3882 if (tmfc.clock == CLOCK_12_HOUR)
3883 {
3884 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
3885 ereport(ERROR,
3886 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3887 errmsg("hour \"%d\" is invalid for the 12-hour clock",
3888 tm->tm_hour),
3889 errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
3890
3891 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
3892 tm->tm_hour += HOURS_PER_DAY / 2;
3893 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
3894 tm->tm_hour = 0;
3895 }
3896
3897 if (tmfc.year)
3898 {
3899 /*
3900 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
3901 * the year in the given century. Keep in mind that the 21st century
3902 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
3903 * 600BC to 501BC.
3904 */
3905 if (tmfc.cc && tmfc.yysz <= 2)
3906 {
3907 if (tmfc.bc)
3908 tmfc.cc = -tmfc.cc;
3909 tm->tm_year = tmfc.year % 100;
3910 if (tm->tm_year)
3911 {
3912 if (tmfc.cc >= 0)
3913 tm->tm_year += (tmfc.cc - 1) * 100;
3914 else
3915 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
3916 }
3917 else
3918 {
3919 /* find century year for dates ending in "00" */
3920 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
3921 }
3922 }
3923 else
3924 {
3925 /* If a 4-digit year is provided, we use that and ignore CC. */
3926 tm->tm_year = tmfc.year;
3927 if (tmfc.bc)
3928 tm->tm_year = -tm->tm_year;
3929 /* correct for our representation of BC years */
3930 if (tm->tm_year < 0)
3931 tm->tm_year++;
3932 }
3933 fmask |= DTK_M(YEAR);
3934 }
3935 else if (tmfc.cc)
3936 {
3937 /* use first year of century */
3938 if (tmfc.bc)
3939 tmfc.cc = -tmfc.cc;
3940 if (tmfc.cc >= 0)
3941 /* +1 because 21st century started in 2001 */
3942 tm->tm_year = (tmfc.cc - 1) * 100 + 1;
3943 else
3944 /* +1 because year == 599 is 600 BC */
3945 tm->tm_year = tmfc.cc * 100 + 1;
3946 fmask |= DTK_M(YEAR);
3947 }
3948
3949 if (tmfc.j)
3950 {
3951 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3952 fmask |= DTK_DATE_M;
3953 }
3954
3955 if (tmfc.ww)
3956 {
3957 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3958 {
3959 /*
3960 * If tmfc.d is not set, then the date is left at the beginning of
3961 * the ISO week (Monday).
3962 */
3963 if (tmfc.d)
3964 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3965 else
3966 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3967 fmask |= DTK_DATE_M;
3968 }
3969 else
3970 tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
3971 }
3972
3973 if (tmfc.w)
3974 tmfc.dd = (tmfc.w - 1) * 7 + 1;
3975 if (tmfc.dd)
3976 {
3977 tm->tm_mday = tmfc.dd;
3978 fmask |= DTK_M(DAY);
3979 }
3980 if (tmfc.mm)
3981 {
3982 tm->tm_mon = tmfc.mm;
3983 fmask |= DTK_M(MONTH);
3984 }
3985
3986 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
3987 {
3988 /*
3989 * The month and day field have not been set, so we use the
3990 * day-of-year field to populate them. Depending on the date mode,
3991 * this field may be interpreted as a Gregorian day-of-year, or an ISO
3992 * week date day-of-year.
3993 */
3994
3995 if (!tm->tm_year && !tmfc.bc)
3996 ereport(ERROR,
3997 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3998 errmsg("cannot calculate day of year without year information")));
3999
4000 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4001 {
4002 int j0; /* zeroth day of the ISO year, in Julian */
4003
4004 j0 = isoweek2j(tm->tm_year, 1) - 1;
4005
4006 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4007 fmask |= DTK_DATE_M;
4008 }
4009 else
4010 {
4011 const int *y;
4012 int i;
4013
4014 static const int ysum[2][13] = {
4015 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4016 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4017
4018 y = ysum[isleap(tm->tm_year)];
4019
4020 for (i = 1; i <= MONTHS_PER_YEAR; i++)
4021 {
4022 if (tmfc.ddd <= y[i])
4023 break;
4024 }
4025 if (tm->tm_mon <= 1)
4026 tm->tm_mon = i;
4027
4028 if (tm->tm_mday <= 1)
4029 tm->tm_mday = tmfc.ddd - y[i - 1];
4030
4031 fmask |= DTK_M(MONTH) | DTK_M(DAY);
4032 }
4033 }
4034
4035 if (tmfc.ms)
4036 *fsec += tmfc.ms * 1000;
4037 if (tmfc.us)
4038 *fsec += tmfc.us;
4039
4040 /* Range-check date fields according to bit mask computed above */
4041 if (fmask != 0)
4042 {
4043 /* We already dealt with AD/BC, so pass isjulian = true */
4044 int dterr = ValidateDate(fmask, true, false, false, tm);
4045
4046 if (dterr != 0)
4047 {
4048 /*
4049 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4050 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4051 * irrelevant hint about datestyle.
4052 */
4053 DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
4054 }
4055 }
4056
4057 /* Range-check time fields too */
4058 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4059 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4060 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4061 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4062 DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
4063
4064 /* Save parsed time-zone into tm->tm_zone if it was specified */
4065 if (tmfc.tzsign)
4066 {
4067 char *tz;
4068
4069 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4070 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4071 DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp");
4072
4073 tz = psprintf("%c%02d:%02d",
4074 tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4075
4076 tm->tm_zone = tz;
4077 }
4078
4079 DEBUG_TM(tm);
4080
4081 pfree(date_str);
4082 }
4083
4084
4085 /**********************************************************************
4086 * the NUMBER version part
4087 *********************************************************************/
4088
4089
4090 static char *
fill_str(char * str,int c,int max)4091 fill_str(char *str, int c, int max)
4092 {
4093 memset(str, c, max);
4094 *(str + max) = '\0';
4095 return str;
4096 }
4097
4098 #define zeroize_NUM(_n) \
4099 do { \
4100 (_n)->flag = 0; \
4101 (_n)->lsign = 0; \
4102 (_n)->pre = 0; \
4103 (_n)->post = 0; \
4104 (_n)->pre_lsign_num = 0; \
4105 (_n)->need_locale = 0; \
4106 (_n)->multi = 0; \
4107 (_n)->zero_start = 0; \
4108 (_n)->zero_end = 0; \
4109 } while(0)
4110
4111 /* This works the same as DCH_prevent_counter_overflow */
4112 static inline void
NUM_prevent_counter_overflow(void)4113 NUM_prevent_counter_overflow(void)
4114 {
4115 if (NUMCounter >= (INT_MAX - 1))
4116 {
4117 for (int i = 0; i < n_NUMCache; i++)
4118 NUMCache[i]->age >>= 1;
4119 NUMCounter >>= 1;
4120 }
4121 }
4122
4123 /* select a NUMCacheEntry to hold the given format picture */
4124 static NUMCacheEntry *
NUM_cache_getnew(const char * str)4125 NUM_cache_getnew(const char *str)
4126 {
4127 NUMCacheEntry *ent;
4128
4129 /* Ensure we can advance NUMCounter below */
4130 NUM_prevent_counter_overflow();
4131
4132 /*
4133 * If cache is full, remove oldest entry (or recycle first not-valid one)
4134 */
4135 if (n_NUMCache >= NUM_CACHE_ENTRIES)
4136 {
4137 NUMCacheEntry *old = NUMCache[0];
4138
4139 #ifdef DEBUG_TO_FROM_CHAR
4140 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4141 #endif
4142 if (old->valid)
4143 {
4144 for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4145 {
4146 ent = NUMCache[i];
4147 if (!ent->valid)
4148 {
4149 old = ent;
4150 break;
4151 }
4152 if (ent->age < old->age)
4153 old = ent;
4154 }
4155 }
4156 #ifdef DEBUG_TO_FROM_CHAR
4157 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4158 #endif
4159 old->valid = false;
4160 StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
4161 old->age = (++NUMCounter);
4162 /* caller is expected to fill format and Num, then set valid */
4163 return old;
4164 }
4165 else
4166 {
4167 #ifdef DEBUG_TO_FROM_CHAR
4168 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4169 #endif
4170 Assert(NUMCache[n_NUMCache] == NULL);
4171 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4172 MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
4173 ent->valid = false;
4174 StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4175 ent->age = (++NUMCounter);
4176 /* caller is expected to fill format and Num, then set valid */
4177 ++n_NUMCache;
4178 return ent;
4179 }
4180 }
4181
4182 /* look for an existing NUMCacheEntry matching the given format picture */
4183 static NUMCacheEntry *
NUM_cache_search(const char * str)4184 NUM_cache_search(const char *str)
4185 {
4186 /* Ensure we can advance NUMCounter below */
4187 NUM_prevent_counter_overflow();
4188
4189 for (int i = 0; i < n_NUMCache; i++)
4190 {
4191 NUMCacheEntry *ent = NUMCache[i];
4192
4193 if (ent->valid && strcmp(ent->str, str) == 0)
4194 {
4195 ent->age = (++NUMCounter);
4196 return ent;
4197 }
4198 }
4199
4200 return NULL;
4201 }
4202
4203 /* Find or create a NUMCacheEntry for the given format picture */
4204 static NUMCacheEntry *
NUM_cache_fetch(const char * str)4205 NUM_cache_fetch(const char *str)
4206 {
4207 NUMCacheEntry *ent;
4208
4209 if ((ent = NUM_cache_search(str)) == NULL)
4210 {
4211 /*
4212 * Not in the cache, must run parser and save a new format-picture to
4213 * the cache. Do not mark the cache entry valid until parsing
4214 * succeeds.
4215 */
4216 ent = NUM_cache_getnew(str);
4217
4218 zeroize_NUM(&ent->Num);
4219
4220 parse_format(ent->format, str, NUM_keywords,
4221 NULL, NUM_index, NUM_TYPE, &ent->Num);
4222
4223 ent->valid = true;
4224 }
4225 return ent;
4226 }
4227
4228 /* ----------
4229 * Cache routine for NUM to_char version
4230 * ----------
4231 */
4232 static FormatNode *
NUM_cache(int len,NUMDesc * Num,text * pars_str,bool * shouldFree)4233 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4234 {
4235 FormatNode *format = NULL;
4236 char *str;
4237
4238 str = text_to_cstring(pars_str);
4239
4240 if (len > NUM_CACHE_SIZE)
4241 {
4242 /*
4243 * Allocate new memory if format picture is bigger than static cache
4244 * and do not use cache (call parser always)
4245 */
4246 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4247
4248 *shouldFree = true;
4249
4250 zeroize_NUM(Num);
4251
4252 parse_format(format, str, NUM_keywords,
4253 NULL, NUM_index, NUM_TYPE, Num);
4254 }
4255 else
4256 {
4257 /*
4258 * Use cache buffers
4259 */
4260 NUMCacheEntry *ent = NUM_cache_fetch(str);
4261
4262 *shouldFree = false;
4263
4264 format = ent->format;
4265
4266 /*
4267 * Copy cache to used struct
4268 */
4269 Num->flag = ent->Num.flag;
4270 Num->lsign = ent->Num.lsign;
4271 Num->pre = ent->Num.pre;
4272 Num->post = ent->Num.post;
4273 Num->pre_lsign_num = ent->Num.pre_lsign_num;
4274 Num->need_locale = ent->Num.need_locale;
4275 Num->multi = ent->Num.multi;
4276 Num->zero_start = ent->Num.zero_start;
4277 Num->zero_end = ent->Num.zero_end;
4278 }
4279
4280 #ifdef DEBUG_TO_FROM_CHAR
4281 /* dump_node(format, len); */
4282 dump_index(NUM_keywords, NUM_index);
4283 #endif
4284
4285 pfree(str);
4286 return format;
4287 }
4288
4289
4290 static char *
int_to_roman(int number)4291 int_to_roman(int number)
4292 {
4293 int len = 0,
4294 num = 0;
4295 char *p = NULL,
4296 *result,
4297 numstr[12];
4298
4299 result = (char *) palloc(16);
4300 *result = '\0';
4301
4302 if (number > 3999 || number < 1)
4303 {
4304 fill_str(result, '#', 15);
4305 return result;
4306 }
4307 len = snprintf(numstr, sizeof(numstr), "%d", number);
4308
4309 for (p = numstr; *p != '\0'; p++, --len)
4310 {
4311 num = *p - 49; /* 48 ascii + 1 */
4312 if (num < 0)
4313 continue;
4314
4315 if (len > 3)
4316 {
4317 while (num-- != -1)
4318 strcat(result, "M");
4319 }
4320 else
4321 {
4322 if (len == 3)
4323 strcat(result, rm100[num]);
4324 else if (len == 2)
4325 strcat(result, rm10[num]);
4326 else if (len == 1)
4327 strcat(result, rm1[num]);
4328 }
4329 }
4330 return result;
4331 }
4332
4333
4334
4335 /* ----------
4336 * Locale
4337 * ----------
4338 */
4339 static void
NUM_prepare_locale(NUMProc * Np)4340 NUM_prepare_locale(NUMProc *Np)
4341 {
4342 if (Np->Num->need_locale)
4343 {
4344 struct lconv *lconv;
4345
4346 /*
4347 * Get locales
4348 */
4349 lconv = PGLC_localeconv();
4350
4351 /*
4352 * Positive / Negative number sign
4353 */
4354 if (lconv->negative_sign && *lconv->negative_sign)
4355 Np->L_negative_sign = lconv->negative_sign;
4356 else
4357 Np->L_negative_sign = "-";
4358
4359 if (lconv->positive_sign && *lconv->positive_sign)
4360 Np->L_positive_sign = lconv->positive_sign;
4361 else
4362 Np->L_positive_sign = "+";
4363
4364 /*
4365 * Number decimal point
4366 */
4367 if (lconv->decimal_point && *lconv->decimal_point)
4368 Np->decimal = lconv->decimal_point;
4369
4370 else
4371 Np->decimal = ".";
4372
4373 if (!IS_LDECIMAL(Np->Num))
4374 Np->decimal = ".";
4375
4376 /*
4377 * Number thousands separator
4378 *
4379 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
4380 * but "" for thousands_sep, so we set the thousands_sep too.
4381 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
4382 */
4383 if (lconv->thousands_sep && *lconv->thousands_sep)
4384 Np->L_thousands_sep = lconv->thousands_sep;
4385 /* Make sure thousands separator doesn't match decimal point symbol. */
4386 else if (strcmp(Np->decimal, ",") !=0)
4387 Np->L_thousands_sep = ",";
4388 else
4389 Np->L_thousands_sep = ".";
4390
4391 /*
4392 * Currency symbol
4393 */
4394 if (lconv->currency_symbol && *lconv->currency_symbol)
4395 Np->L_currency_symbol = lconv->currency_symbol;
4396 else
4397 Np->L_currency_symbol = " ";
4398 }
4399 else
4400 {
4401 /*
4402 * Default values
4403 */
4404 Np->L_negative_sign = "-";
4405 Np->L_positive_sign = "+";
4406 Np->decimal = ".";
4407
4408 Np->L_thousands_sep = ",";
4409 Np->L_currency_symbol = " ";
4410 }
4411 }
4412
4413 /* ----------
4414 * Return pointer of last relevant number after decimal point
4415 * 12.0500 --> last relevant is '5'
4416 * 12.0000 --> last relevant is '.'
4417 * If there is no decimal point, return NULL (which will result in same
4418 * behavior as if FM hadn't been specified).
4419 * ----------
4420 */
4421 static char *
get_last_relevant_decnum(char * num)4422 get_last_relevant_decnum(char *num)
4423 {
4424 char *result,
4425 *p = strchr(num, '.');
4426
4427 #ifdef DEBUG_TO_FROM_CHAR
4428 elog(DEBUG_elog_output, "get_last_relevant_decnum()");
4429 #endif
4430
4431 if (!p)
4432 return NULL;
4433
4434 result = p;
4435
4436 while (*(++p))
4437 {
4438 if (*p != '0')
4439 result = p;
4440 }
4441
4442 return result;
4443 }
4444
4445 /*
4446 * These macros are used in NUM_processor() and its subsidiary routines.
4447 * OVERLOAD_TEST: true if we've reached end of input string
4448 * AMOUNT_TEST(s): true if at least s bytes remain in string
4449 */
4450 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
4451 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
4452
4453 /* ----------
4454 * Number extraction for TO_NUMBER()
4455 * ----------
4456 */
4457 static void
NUM_numpart_from_char(NUMProc * Np,int id,int input_len)4458 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
4459 {
4460 bool isread = false;
4461
4462 #ifdef DEBUG_TO_FROM_CHAR
4463 elog(DEBUG_elog_output, " --- scan start --- id=%s",
4464 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
4465 #endif
4466
4467 if (OVERLOAD_TEST)
4468 return;
4469
4470 if (*Np->inout_p == ' ')
4471 Np->inout_p++;
4472
4473 if (OVERLOAD_TEST)
4474 return;
4475
4476 /*
4477 * read sign before number
4478 */
4479 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
4480 (Np->read_pre + Np->read_post) == 0)
4481 {
4482 #ifdef DEBUG_TO_FROM_CHAR
4483 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
4484 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
4485 #endif
4486
4487 /*
4488 * locale sign
4489 */
4490 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
4491 {
4492 int x = 0;
4493
4494 #ifdef DEBUG_TO_FROM_CHAR
4495 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
4496 #endif
4497 if ((x = strlen(Np->L_negative_sign)) &&
4498 AMOUNT_TEST(x) &&
4499 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4500 {
4501 Np->inout_p += x;
4502 *Np->number = '-';
4503 }
4504 else if ((x = strlen(Np->L_positive_sign)) &&
4505 AMOUNT_TEST(x) &&
4506 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4507 {
4508 Np->inout_p += x;
4509 *Np->number = '+';
4510 }
4511 }
4512 else
4513 {
4514 #ifdef DEBUG_TO_FROM_CHAR
4515 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
4516 #endif
4517
4518 /*
4519 * simple + - < >
4520 */
4521 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
4522 *Np->inout_p == '<'))
4523 {
4524 *Np->number = '-'; /* set - */
4525 Np->inout_p++;
4526 }
4527 else if (*Np->inout_p == '+')
4528 {
4529 *Np->number = '+'; /* set + */
4530 Np->inout_p++;
4531 }
4532 }
4533 }
4534
4535 if (OVERLOAD_TEST)
4536 return;
4537
4538 #ifdef DEBUG_TO_FROM_CHAR
4539 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
4540 #endif
4541
4542 /*
4543 * read digit or decimal point
4544 */
4545 if (isdigit((unsigned char) *Np->inout_p))
4546 {
4547 if (Np->read_dec && Np->read_post == Np->Num->post)
4548 return;
4549
4550 *Np->number_p = *Np->inout_p;
4551 Np->number_p++;
4552
4553 if (Np->read_dec)
4554 Np->read_post++;
4555 else
4556 Np->read_pre++;
4557
4558 isread = true;
4559
4560 #ifdef DEBUG_TO_FROM_CHAR
4561 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
4562 #endif
4563 }
4564 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
4565 {
4566 /*
4567 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
4568 * Np->decimal is always just "." if we don't have a D format token.
4569 * So we just unconditionally match to Np->decimal.
4570 */
4571 int x = strlen(Np->decimal);
4572
4573 #ifdef DEBUG_TO_FROM_CHAR
4574 elog(DEBUG_elog_output, "Try read decimal point (%c)",
4575 *Np->inout_p);
4576 #endif
4577 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
4578 {
4579 Np->inout_p += x - 1;
4580 *Np->number_p = '.';
4581 Np->number_p++;
4582 Np->read_dec = true;
4583 isread = true;
4584 }
4585 }
4586
4587 if (OVERLOAD_TEST)
4588 return;
4589
4590 /*
4591 * Read sign behind "last" number
4592 *
4593 * We need sign detection because determine exact position of post-sign is
4594 * difficult:
4595 *
4596 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
4597 * 5.01-
4598 */
4599 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
4600 {
4601 /*
4602 * locale sign (NUM_S) is always anchored behind a last number, if: -
4603 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
4604 * next char is not digit
4605 */
4606 if (IS_LSIGN(Np->Num) && isread &&
4607 (Np->inout_p + 1) < Np->inout + input_len &&
4608 !isdigit((unsigned char) *(Np->inout_p + 1)))
4609 {
4610 int x;
4611 char *tmp = Np->inout_p++;
4612
4613 #ifdef DEBUG_TO_FROM_CHAR
4614 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
4615 #endif
4616 if ((x = strlen(Np->L_negative_sign)) &&
4617 AMOUNT_TEST(x) &&
4618 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4619 {
4620 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4621 *Np->number = '-';
4622 }
4623 else if ((x = strlen(Np->L_positive_sign)) &&
4624 AMOUNT_TEST(x) &&
4625 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4626 {
4627 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4628 *Np->number = '+';
4629 }
4630 if (*Np->number == ' ')
4631 /* no sign read */
4632 Np->inout_p = tmp;
4633 }
4634
4635 /*
4636 * try read non-locale sign, it's happen only if format is not exact
4637 * and we cannot determine sign position of MI/PL/SG, an example:
4638 *
4639 * FM9.999999MI -> 5.01-
4640 *
4641 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
4642 * like to_number('1 -', '9S') where sign is not anchored to last
4643 * number.
4644 */
4645 else if (isread == false && IS_LSIGN(Np->Num) == false &&
4646 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
4647 {
4648 #ifdef DEBUG_TO_FROM_CHAR
4649 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
4650 #endif
4651
4652 /*
4653 * simple + -
4654 */
4655 if (*Np->inout_p == '-' || *Np->inout_p == '+')
4656 /* NUM_processor() do inout_p++ */
4657 *Np->number = *Np->inout_p;
4658 }
4659 }
4660 }
4661
4662 #define IS_PREDEC_SPACE(_n) \
4663 (IS_ZERO((_n)->Num)==false && \
4664 (_n)->number == (_n)->number_p && \
4665 *(_n)->number == '0' && \
4666 (_n)->Num->post != 0)
4667
4668 /* ----------
4669 * Add digit or sign to number-string
4670 * ----------
4671 */
4672 static void
NUM_numpart_to_char(NUMProc * Np,int id)4673 NUM_numpart_to_char(NUMProc *Np, int id)
4674 {
4675 int end;
4676
4677 if (IS_ROMAN(Np->Num))
4678 return;
4679
4680 /* Note: in this elog() output not set '\0' in 'inout' */
4681
4682 #ifdef DEBUG_TO_FROM_CHAR
4683
4684 /*
4685 * Np->num_curr is number of current item in format-picture, it is not
4686 * current position in inout!
4687 */
4688 elog(DEBUG_elog_output,
4689 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
4690 Np->sign_wrote,
4691 Np->num_curr,
4692 Np->number_p,
4693 Np->inout);
4694 #endif
4695 Np->num_in = false;
4696
4697 /*
4698 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
4699 * handle "9.9" --> " .1"
4700 */
4701 if (Np->sign_wrote == false &&
4702 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
4703 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
4704 {
4705 if (IS_LSIGN(Np->Num))
4706 {
4707 if (Np->Num->lsign == NUM_LSIGN_PRE)
4708 {
4709 if (Np->sign == '-')
4710 strcpy(Np->inout_p, Np->L_negative_sign);
4711 else
4712 strcpy(Np->inout_p, Np->L_positive_sign);
4713 Np->inout_p += strlen(Np->inout_p);
4714 Np->sign_wrote = true;
4715 }
4716 }
4717 else if (IS_BRACKET(Np->Num))
4718 {
4719 *Np->inout_p = Np->sign == '+' ? ' ' : '<';
4720 ++Np->inout_p;
4721 Np->sign_wrote = true;
4722 }
4723 else if (Np->sign == '+')
4724 {
4725 if (!IS_FILLMODE(Np->Num))
4726 {
4727 *Np->inout_p = ' '; /* Write + */
4728 ++Np->inout_p;
4729 }
4730 Np->sign_wrote = true;
4731 }
4732 else if (Np->sign == '-')
4733 { /* Write - */
4734 *Np->inout_p = '-';
4735 ++Np->inout_p;
4736 Np->sign_wrote = true;
4737 }
4738 }
4739
4740
4741 /*
4742 * digits / FM / Zero / Dec. point
4743 */
4744 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
4745 {
4746 if (Np->num_curr < Np->out_pre_spaces &&
4747 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
4748 {
4749 /*
4750 * Write blank space
4751 */
4752 if (!IS_FILLMODE(Np->Num))
4753 {
4754 *Np->inout_p = ' '; /* Write ' ' */
4755 ++Np->inout_p;
4756 }
4757 }
4758 else if (IS_ZERO(Np->Num) &&
4759 Np->num_curr < Np->out_pre_spaces &&
4760 Np->Num->zero_start <= Np->num_curr)
4761 {
4762 /*
4763 * Write ZERO
4764 */
4765 *Np->inout_p = '0'; /* Write '0' */
4766 ++Np->inout_p;
4767 Np->num_in = true;
4768 }
4769 else
4770 {
4771 /*
4772 * Write Decimal point
4773 */
4774 if (*Np->number_p == '.')
4775 {
4776 if (!Np->last_relevant || *Np->last_relevant != '.')
4777 {
4778 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4779 Np->inout_p += strlen(Np->inout_p);
4780 }
4781
4782 /*
4783 * Ora 'n' -- FM9.9 --> 'n.'
4784 */
4785 else if (IS_FILLMODE(Np->Num) &&
4786 Np->last_relevant && *Np->last_relevant == '.')
4787 {
4788 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4789 Np->inout_p += strlen(Np->inout_p);
4790 }
4791 }
4792 else
4793 {
4794 /*
4795 * Write Digits
4796 */
4797 if (Np->last_relevant && Np->number_p > Np->last_relevant &&
4798 id != NUM_0)
4799 ;
4800
4801 /*
4802 * '0.1' -- 9.9 --> ' .1'
4803 */
4804 else if (IS_PREDEC_SPACE(Np))
4805 {
4806 if (!IS_FILLMODE(Np->Num))
4807 {
4808 *Np->inout_p = ' ';
4809 ++Np->inout_p;
4810 }
4811
4812 /*
4813 * '0' -- FM9.9 --> '0.'
4814 */
4815 else if (Np->last_relevant && *Np->last_relevant == '.')
4816 {
4817 *Np->inout_p = '0';
4818 ++Np->inout_p;
4819 }
4820 }
4821 else
4822 {
4823 *Np->inout_p = *Np->number_p; /* Write DIGIT */
4824 ++Np->inout_p;
4825 Np->num_in = true;
4826 }
4827 }
4828 /* do no exceed string length */
4829 if (*Np->number_p)
4830 ++Np->number_p;
4831 }
4832
4833 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
4834
4835 if (Np->last_relevant && Np->last_relevant == Np->number_p)
4836 end = Np->num_curr;
4837
4838 if (Np->num_curr + 1 == end)
4839 {
4840 if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
4841 {
4842 *Np->inout_p = Np->sign == '+' ? ' ' : '>';
4843 ++Np->inout_p;
4844 }
4845 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
4846 {
4847 if (Np->sign == '-')
4848 strcpy(Np->inout_p, Np->L_negative_sign);
4849 else
4850 strcpy(Np->inout_p, Np->L_positive_sign);
4851 Np->inout_p += strlen(Np->inout_p);
4852 }
4853 }
4854 }
4855
4856 ++Np->num_curr;
4857 }
4858
4859 /*
4860 * Skip over "n" input characters, but only if they aren't numeric data
4861 */
4862 static void
NUM_eat_non_data_chars(NUMProc * Np,int n,int input_len)4863 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
4864 {
4865 while (n-- > 0)
4866 {
4867 if (OVERLOAD_TEST)
4868 break; /* end of input */
4869 if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
4870 break; /* it's a data character */
4871 Np->inout_p += pg_mblen(Np->inout_p);
4872 }
4873 }
4874
4875 static char *
NUM_processor(FormatNode * node,NUMDesc * Num,char * inout,char * number,int input_len,int to_char_out_pre_spaces,int sign,bool is_to_char,Oid collid)4876 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
4877 char *number, int input_len, int to_char_out_pre_spaces,
4878 int sign, bool is_to_char, Oid collid)
4879 {
4880 FormatNode *n;
4881 NUMProc _Np,
4882 *Np = &_Np;
4883 const char *pattern;
4884 int pattern_len;
4885
4886 MemSet(Np, 0, sizeof(NUMProc));
4887
4888 Np->Num = Num;
4889 Np->is_to_char = is_to_char;
4890 Np->number = number;
4891 Np->inout = inout;
4892 Np->last_relevant = NULL;
4893 Np->read_post = 0;
4894 Np->read_pre = 0;
4895 Np->read_dec = false;
4896
4897 if (Np->Num->zero_start)
4898 --Np->Num->zero_start;
4899
4900 if (IS_EEEE(Np->Num))
4901 {
4902 if (!Np->is_to_char)
4903 ereport(ERROR,
4904 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4905 errmsg("\"EEEE\" not supported for input")));
4906 return strcpy(inout, number);
4907 }
4908
4909 /*
4910 * Roman correction
4911 */
4912 if (IS_ROMAN(Np->Num))
4913 {
4914 if (!Np->is_to_char)
4915 ereport(ERROR,
4916 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4917 errmsg("\"RN\" not supported for input")));
4918
4919 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
4920 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
4921
4922 if (IS_FILLMODE(Np->Num))
4923 {
4924 Np->Num->flag = 0;
4925 Np->Num->flag |= NUM_F_FILLMODE;
4926 }
4927 else
4928 Np->Num->flag = 0;
4929 Np->Num->flag |= NUM_F_ROMAN;
4930 }
4931
4932 /*
4933 * Sign
4934 */
4935 if (is_to_char)
4936 {
4937 Np->sign = sign;
4938
4939 /* MI/PL/SG - write sign itself and not in number */
4940 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
4941 {
4942 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
4943 Np->sign_wrote = false; /* need sign */
4944 else
4945 Np->sign_wrote = true; /* needn't sign */
4946 }
4947 else
4948 {
4949 if (Np->sign != '-')
4950 {
4951 if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
4952 Np->Num->flag &= ~NUM_F_BRACKET;
4953 if (IS_MINUS(Np->Num))
4954 Np->Num->flag &= ~NUM_F_MINUS;
4955 }
4956 else if (Np->sign != '+' && IS_PLUS(Np->Num))
4957 Np->Num->flag &= ~NUM_F_PLUS;
4958
4959 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
4960 Np->sign_wrote = true; /* needn't sign */
4961 else
4962 Np->sign_wrote = false; /* need sign */
4963
4964 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
4965 Np->Num->lsign = NUM_LSIGN_POST;
4966 }
4967 }
4968 else
4969 Np->sign = false;
4970
4971 /*
4972 * Count
4973 */
4974 Np->num_count = Np->Num->post + Np->Num->pre - 1;
4975
4976 if (is_to_char)
4977 {
4978 Np->out_pre_spaces = to_char_out_pre_spaces;
4979
4980 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
4981 {
4982 Np->last_relevant = get_last_relevant_decnum(Np->number);
4983
4984 /*
4985 * If any '0' specifiers are present, make sure we don't strip
4986 * those digits.
4987 */
4988 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
4989 {
4990 char *last_zero;
4991
4992 last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
4993 if (Np->last_relevant < last_zero)
4994 Np->last_relevant = last_zero;
4995 }
4996 }
4997
4998 if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
4999 ++Np->num_count;
5000 }
5001 else
5002 {
5003 Np->out_pre_spaces = 0;
5004 *Np->number = ' '; /* sign space */
5005 *(Np->number + 1) = '\0';
5006 }
5007
5008 Np->num_in = 0;
5009 Np->num_curr = 0;
5010
5011 #ifdef DEBUG_TO_FROM_CHAR
5012 elog(DEBUG_elog_output,
5013 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5014 Np->sign,
5015 Np->number,
5016 Np->Num->pre,
5017 Np->Num->post,
5018 Np->num_count,
5019 Np->out_pre_spaces,
5020 Np->sign_wrote ? "Yes" : "No",
5021 IS_ZERO(Np->Num) ? "Yes" : "No",
5022 Np->Num->zero_start,
5023 Np->Num->zero_end,
5024 Np->last_relevant ? Np->last_relevant : "<not set>",
5025 IS_BRACKET(Np->Num) ? "Yes" : "No",
5026 IS_PLUS(Np->Num) ? "Yes" : "No",
5027 IS_MINUS(Np->Num) ? "Yes" : "No",
5028 IS_FILLMODE(Np->Num) ? "Yes" : "No",
5029 IS_ROMAN(Np->Num) ? "Yes" : "No",
5030 IS_EEEE(Np->Num) ? "Yes" : "No"
5031 );
5032 #endif
5033
5034 /*
5035 * Locale
5036 */
5037 NUM_prepare_locale(Np);
5038
5039 /*
5040 * Processor direct cycle
5041 */
5042 if (Np->is_to_char)
5043 Np->number_p = Np->number;
5044 else
5045 Np->number_p = Np->number + 1; /* first char is space for sign */
5046
5047 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5048 {
5049 if (!Np->is_to_char)
5050 {
5051 /*
5052 * Check at least one byte remains to be scanned. (In actions
5053 * below, must use AMOUNT_TEST if we want to read more bytes than
5054 * that.)
5055 */
5056 if (OVERLOAD_TEST)
5057 break;
5058 }
5059
5060 /*
5061 * Format pictures actions
5062 */
5063 if (n->type == NODE_TYPE_ACTION)
5064 {
5065 /*
5066 * Create/read digit/zero/blank/sign/special-case
5067 *
5068 * 'NUM_S' note: The locale sign is anchored to number and we
5069 * read/write it when we work with first or last number
5070 * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
5071 *
5072 * Notice the "Np->inout_p++" at the bottom of the loop. This is
5073 * why most of the actions advance inout_p one less than you might
5074 * expect. In cases where we don't want that increment to happen,
5075 * a switch case ends with "continue" not "break".
5076 */
5077 switch (n->key->id)
5078 {
5079 case NUM_9:
5080 case NUM_0:
5081 case NUM_DEC:
5082 case NUM_D:
5083 if (Np->is_to_char)
5084 {
5085 NUM_numpart_to_char(Np, n->key->id);
5086 continue; /* for() */
5087 }
5088 else
5089 {
5090 NUM_numpart_from_char(Np, n->key->id, input_len);
5091 break; /* switch() case: */
5092 }
5093
5094 case NUM_COMMA:
5095 if (Np->is_to_char)
5096 {
5097 if (!Np->num_in)
5098 {
5099 if (IS_FILLMODE(Np->Num))
5100 continue;
5101 else
5102 *Np->inout_p = ' ';
5103 }
5104 else
5105 *Np->inout_p = ',';
5106 }
5107 else
5108 {
5109 if (!Np->num_in)
5110 {
5111 if (IS_FILLMODE(Np->Num))
5112 continue;
5113 }
5114 if (*Np->inout_p != ',')
5115 continue;
5116 }
5117 break;
5118
5119 case NUM_G:
5120 pattern = Np->L_thousands_sep;
5121 pattern_len = strlen(pattern);
5122 if (Np->is_to_char)
5123 {
5124 if (!Np->num_in)
5125 {
5126 if (IS_FILLMODE(Np->Num))
5127 continue;
5128 else
5129 {
5130 /* just in case there are MB chars */
5131 pattern_len = pg_mbstrlen(pattern);
5132 memset(Np->inout_p, ' ', pattern_len);
5133 Np->inout_p += pattern_len - 1;
5134 }
5135 }
5136 else
5137 {
5138 strcpy(Np->inout_p, pattern);
5139 Np->inout_p += pattern_len - 1;
5140 }
5141 }
5142 else
5143 {
5144 if (!Np->num_in)
5145 {
5146 if (IS_FILLMODE(Np->Num))
5147 continue;
5148 }
5149
5150 /*
5151 * Because L_thousands_sep typically contains data
5152 * characters (either '.' or ','), we can't use
5153 * NUM_eat_non_data_chars here. Instead skip only if
5154 * the input matches L_thousands_sep.
5155 */
5156 if (AMOUNT_TEST(pattern_len) &&
5157 strncmp(Np->inout_p, pattern, pattern_len) == 0)
5158 Np->inout_p += pattern_len - 1;
5159 else
5160 continue;
5161 }
5162 break;
5163
5164 case NUM_L:
5165 pattern = Np->L_currency_symbol;
5166 if (Np->is_to_char)
5167 {
5168 strcpy(Np->inout_p, pattern);
5169 Np->inout_p += strlen(pattern) - 1;
5170 }
5171 else
5172 {
5173 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5174 continue;
5175 }
5176 break;
5177
5178 case NUM_RN:
5179 if (IS_FILLMODE(Np->Num))
5180 {
5181 strcpy(Np->inout_p, Np->number_p);
5182 Np->inout_p += strlen(Np->inout_p) - 1;
5183 }
5184 else
5185 {
5186 sprintf(Np->inout_p, "%15s", Np->number_p);
5187 Np->inout_p += strlen(Np->inout_p) - 1;
5188 }
5189 break;
5190
5191 case NUM_rn:
5192 if (IS_FILLMODE(Np->Num))
5193 {
5194 strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5195 Np->inout_p += strlen(Np->inout_p) - 1;
5196 }
5197 else
5198 {
5199 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5200 Np->inout_p += strlen(Np->inout_p) - 1;
5201 }
5202 break;
5203
5204 case NUM_th:
5205 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5206 Np->sign == '-' || IS_DECIMAL(Np->Num))
5207 continue;
5208
5209 if (Np->is_to_char)
5210 {
5211 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5212 Np->inout_p += 1;
5213 }
5214 else
5215 {
5216 /* All variants of 'th' occupy 2 characters */
5217 NUM_eat_non_data_chars(Np, 2, input_len);
5218 continue;
5219 }
5220 break;
5221
5222 case NUM_TH:
5223 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5224 Np->sign == '-' || IS_DECIMAL(Np->Num))
5225 continue;
5226
5227 if (Np->is_to_char)
5228 {
5229 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5230 Np->inout_p += 1;
5231 }
5232 else
5233 {
5234 /* All variants of 'TH' occupy 2 characters */
5235 NUM_eat_non_data_chars(Np, 2, input_len);
5236 continue;
5237 }
5238 break;
5239
5240 case NUM_MI:
5241 if (Np->is_to_char)
5242 {
5243 if (Np->sign == '-')
5244 *Np->inout_p = '-';
5245 else if (IS_FILLMODE(Np->Num))
5246 continue;
5247 else
5248 *Np->inout_p = ' ';
5249 }
5250 else
5251 {
5252 if (*Np->inout_p == '-')
5253 *Np->number = '-';
5254 else
5255 {
5256 NUM_eat_non_data_chars(Np, 1, input_len);
5257 continue;
5258 }
5259 }
5260 break;
5261
5262 case NUM_PL:
5263 if (Np->is_to_char)
5264 {
5265 if (Np->sign == '+')
5266 *Np->inout_p = '+';
5267 else if (IS_FILLMODE(Np->Num))
5268 continue;
5269 else
5270 *Np->inout_p = ' ';
5271 }
5272 else
5273 {
5274 if (*Np->inout_p == '+')
5275 *Np->number = '+';
5276 else
5277 {
5278 NUM_eat_non_data_chars(Np, 1, input_len);
5279 continue;
5280 }
5281 }
5282 break;
5283
5284 case NUM_SG:
5285 if (Np->is_to_char)
5286 *Np->inout_p = Np->sign;
5287 else
5288 {
5289 if (*Np->inout_p == '-')
5290 *Np->number = '-';
5291 else if (*Np->inout_p == '+')
5292 *Np->number = '+';
5293 else
5294 {
5295 NUM_eat_non_data_chars(Np, 1, input_len);
5296 continue;
5297 }
5298 }
5299 break;
5300
5301 default:
5302 continue;
5303 break;
5304 }
5305 }
5306 else
5307 {
5308 /*
5309 * In TO_CHAR, non-pattern characters in the format are copied to
5310 * the output. In TO_NUMBER, we skip one input character for each
5311 * non-pattern format character, whether or not it matches the
5312 * format character.
5313 */
5314 if (Np->is_to_char)
5315 {
5316 strcpy(Np->inout_p, n->character);
5317 Np->inout_p += strlen(Np->inout_p);
5318 }
5319 else
5320 {
5321 Np->inout_p += pg_mblen(Np->inout_p);
5322 }
5323 continue;
5324 }
5325 Np->inout_p++;
5326 }
5327
5328 if (Np->is_to_char)
5329 {
5330 *Np->inout_p = '\0';
5331 return Np->inout;
5332 }
5333 else
5334 {
5335 if (*(Np->number_p - 1) == '.')
5336 *(Np->number_p - 1) = '\0';
5337 else
5338 *Np->number_p = '\0';
5339
5340 /*
5341 * Correction - precision of dec. number
5342 */
5343 Np->Num->post = Np->read_post;
5344
5345 #ifdef DEBUG_TO_FROM_CHAR
5346 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
5347 #endif
5348 return Np->number;
5349 }
5350 }
5351
5352 /* ----------
5353 * MACRO: Start part of NUM - for all NUM's to_char variants
5354 * (sorry, but I hate copy same code - macro is better..)
5355 * ----------
5356 */
5357 #define NUM_TOCHAR_prepare \
5358 do { \
5359 int len = VARSIZE_ANY_EXHDR(fmt); \
5360 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
5361 PG_RETURN_TEXT_P(cstring_to_text("")); \
5362 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
5363 format = NUM_cache(len, &Num, fmt, &shouldFree); \
5364 } while (0)
5365
5366 /* ----------
5367 * MACRO: Finish part of NUM
5368 * ----------
5369 */
5370 #define NUM_TOCHAR_finish \
5371 do { \
5372 int len; \
5373 \
5374 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
5375 \
5376 if (shouldFree) \
5377 pfree(format); \
5378 \
5379 /* \
5380 * Convert null-terminated representation of result to standard text. \
5381 * The result is usually much bigger than it needs to be, but there \
5382 * seems little point in realloc'ing it smaller. \
5383 */ \
5384 len = strlen(VARDATA(result)); \
5385 SET_VARSIZE(result, len + VARHDRSZ); \
5386 } while (0)
5387
5388 /* -------------------
5389 * NUMERIC to_number() (convert string to numeric)
5390 * -------------------
5391 */
5392 Datum
numeric_to_number(PG_FUNCTION_ARGS)5393 numeric_to_number(PG_FUNCTION_ARGS)
5394 {
5395 text *value = PG_GETARG_TEXT_PP(0);
5396 text *fmt = PG_GETARG_TEXT_PP(1);
5397 NUMDesc Num;
5398 Datum result;
5399 FormatNode *format;
5400 char *numstr;
5401 bool shouldFree;
5402 int len = 0;
5403 int scale,
5404 precision;
5405
5406 len = VARSIZE_ANY_EXHDR(fmt);
5407
5408 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
5409 PG_RETURN_NULL();
5410
5411 format = NUM_cache(len, &Num, fmt, &shouldFree);
5412
5413 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
5414
5415 NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
5416 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
5417
5418 scale = Num.post;
5419 precision = Num.pre + Num.multi + scale;
5420
5421 if (shouldFree)
5422 pfree(format);
5423
5424 result = DirectFunctionCall3(numeric_in,
5425 CStringGetDatum(numstr),
5426 ObjectIdGetDatum(InvalidOid),
5427 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
5428
5429 if (IS_MULTI(&Num))
5430 {
5431 Numeric x;
5432 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5433 Int32GetDatum(10)));
5434 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5435 Int32GetDatum(-Num.multi)));
5436
5437 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
5438 NumericGetDatum(a),
5439 NumericGetDatum(b)));
5440 result = DirectFunctionCall2(numeric_mul,
5441 result,
5442 NumericGetDatum(x));
5443 }
5444
5445 pfree(numstr);
5446 return result;
5447 }
5448
5449 /* ------------------
5450 * NUMERIC to_char()
5451 * ------------------
5452 */
5453 Datum
numeric_to_char(PG_FUNCTION_ARGS)5454 numeric_to_char(PG_FUNCTION_ARGS)
5455 {
5456 Numeric value = PG_GETARG_NUMERIC(0);
5457 text *fmt = PG_GETARG_TEXT_PP(1);
5458 NUMDesc Num;
5459 FormatNode *format;
5460 text *result;
5461 bool shouldFree;
5462 int out_pre_spaces = 0,
5463 sign = 0;
5464 char *numstr,
5465 *orgnum,
5466 *p;
5467 Numeric x;
5468
5469 NUM_TOCHAR_prepare;
5470
5471 /*
5472 * On DateType depend part (numeric)
5473 */
5474 if (IS_ROMAN(&Num))
5475 {
5476 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
5477 NumericGetDatum(value),
5478 Int32GetDatum(0)));
5479 numstr = orgnum =
5480 int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
5481 NumericGetDatum(x))));
5482 }
5483 else if (IS_EEEE(&Num))
5484 {
5485 orgnum = numeric_out_sci(value, Num.post);
5486
5487 /*
5488 * numeric_out_sci() does not emit a sign for positive numbers. We
5489 * need to add a space in this case so that positive and negative
5490 * numbers are aligned. We also have to do the right thing for NaN.
5491 */
5492 if (strcmp(orgnum, "NaN") == 0)
5493 {
5494 /*
5495 * Allow 6 characters for the leading sign, the decimal point,
5496 * "e", the exponent's sign and two exponent digits.
5497 */
5498 numstr = (char *) palloc(Num.pre + Num.post + 7);
5499 fill_str(numstr, '#', Num.pre + Num.post + 6);
5500 *numstr = ' ';
5501 *(numstr + Num.pre + 1) = '.';
5502 }
5503 else if (*orgnum != '-')
5504 {
5505 numstr = (char *) palloc(strlen(orgnum) + 2);
5506 *numstr = ' ';
5507 strcpy(numstr + 1, orgnum);
5508 }
5509 else
5510 {
5511 numstr = orgnum;
5512 }
5513 }
5514 else
5515 {
5516 int numstr_pre_len;
5517 Numeric val = value;
5518
5519 if (IS_MULTI(&Num))
5520 {
5521 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5522 Int32GetDatum(10)));
5523 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
5524 Int32GetDatum(Num.multi)));
5525
5526 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
5527 NumericGetDatum(a),
5528 NumericGetDatum(b)));
5529 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
5530 NumericGetDatum(value),
5531 NumericGetDatum(x)));
5532 Num.pre += Num.multi;
5533 }
5534
5535 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
5536 NumericGetDatum(val),
5537 Int32GetDatum(Num.post)));
5538 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
5539 NumericGetDatum(x)));
5540
5541 if (*orgnum == '-')
5542 {
5543 sign = '-';
5544 numstr = orgnum + 1;
5545 }
5546 else
5547 {
5548 sign = '+';
5549 numstr = orgnum;
5550 }
5551
5552 if ((p = strchr(numstr, '.')))
5553 numstr_pre_len = p - numstr;
5554 else
5555 numstr_pre_len = strlen(numstr);
5556
5557 /* needs padding? */
5558 if (numstr_pre_len < Num.pre)
5559 out_pre_spaces = Num.pre - numstr_pre_len;
5560 /* overflowed prefix digit format? */
5561 else if (numstr_pre_len > Num.pre)
5562 {
5563 numstr = (char *) palloc(Num.pre + Num.post + 2);
5564 fill_str(numstr, '#', Num.pre + Num.post + 1);
5565 *(numstr + Num.pre) = '.';
5566 }
5567 }
5568
5569 NUM_TOCHAR_finish;
5570 PG_RETURN_TEXT_P(result);
5571 }
5572
5573 /* ---------------
5574 * INT4 to_char()
5575 * ---------------
5576 */
5577 Datum
int4_to_char(PG_FUNCTION_ARGS)5578 int4_to_char(PG_FUNCTION_ARGS)
5579 {
5580 int32 value = PG_GETARG_INT32(0);
5581 text *fmt = PG_GETARG_TEXT_PP(1);
5582 NUMDesc Num;
5583 FormatNode *format;
5584 text *result;
5585 bool shouldFree;
5586 int out_pre_spaces = 0,
5587 sign = 0;
5588 char *numstr,
5589 *orgnum;
5590
5591 NUM_TOCHAR_prepare;
5592
5593 /*
5594 * On DateType depend part (int32)
5595 */
5596 if (IS_ROMAN(&Num))
5597 numstr = orgnum = int_to_roman(value);
5598 else if (IS_EEEE(&Num))
5599 {
5600 /* we can do it easily because float8 won't lose any precision */
5601 float8 val = (float8) value;
5602
5603 orgnum = (char *) psprintf("%+.*e", Num.post, val);
5604
5605 /*
5606 * Swap a leading positive sign for a space.
5607 */
5608 if (*orgnum == '+')
5609 *orgnum = ' ';
5610
5611 numstr = orgnum;
5612 }
5613 else
5614 {
5615 int numstr_pre_len;
5616
5617 if (IS_MULTI(&Num))
5618 {
5619 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
5620 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
5621 Num.pre += Num.multi;
5622 }
5623 else
5624 {
5625 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
5626 Int32GetDatum(value)));
5627 }
5628
5629 if (*orgnum == '-')
5630 {
5631 sign = '-';
5632 orgnum++;
5633 }
5634 else
5635 sign = '+';
5636
5637 numstr_pre_len = strlen(orgnum);
5638
5639 /* post-decimal digits? Pad out with zeros. */
5640 if (Num.post)
5641 {
5642 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
5643 strcpy(numstr, orgnum);
5644 *(numstr + numstr_pre_len) = '.';
5645 memset(numstr + numstr_pre_len + 1, '0', Num.post);
5646 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
5647 }
5648 else
5649 numstr = orgnum;
5650
5651 /* needs padding? */
5652 if (numstr_pre_len < Num.pre)
5653 out_pre_spaces = Num.pre - numstr_pre_len;
5654 /* overflowed prefix digit format? */
5655 else if (numstr_pre_len > Num.pre)
5656 {
5657 numstr = (char *) palloc(Num.pre + Num.post + 2);
5658 fill_str(numstr, '#', Num.pre + Num.post + 1);
5659 *(numstr + Num.pre) = '.';
5660 }
5661 }
5662
5663 NUM_TOCHAR_finish;
5664 PG_RETURN_TEXT_P(result);
5665 }
5666
5667 /* ---------------
5668 * INT8 to_char()
5669 * ---------------
5670 */
5671 Datum
int8_to_char(PG_FUNCTION_ARGS)5672 int8_to_char(PG_FUNCTION_ARGS)
5673 {
5674 int64 value = PG_GETARG_INT64(0);
5675 text *fmt = PG_GETARG_TEXT_PP(1);
5676 NUMDesc Num;
5677 FormatNode *format;
5678 text *result;
5679 bool shouldFree;
5680 int out_pre_spaces = 0,
5681 sign = 0;
5682 char *numstr,
5683 *orgnum;
5684
5685 NUM_TOCHAR_prepare;
5686
5687 /*
5688 * On DateType depend part (int32)
5689 */
5690 if (IS_ROMAN(&Num))
5691 {
5692 /* Currently don't support int8 conversion to roman... */
5693 numstr = orgnum = int_to_roman(DatumGetInt32(
5694 DirectFunctionCall1(int84, Int64GetDatum(value))));
5695 }
5696 else if (IS_EEEE(&Num))
5697 {
5698 /* to avoid loss of precision, must go via numeric not float8 */
5699 Numeric val;
5700
5701 val = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
5702 Int64GetDatum(value)));
5703 orgnum = numeric_out_sci(val, Num.post);
5704
5705 /*
5706 * numeric_out_sci() does not emit a sign for positive numbers. We
5707 * need to add a space in this case so that positive and negative
5708 * numbers are aligned. We don't have to worry about NaN here.
5709 */
5710 if (*orgnum != '-')
5711 {
5712 numstr = (char *) palloc(strlen(orgnum) + 2);
5713 *numstr = ' ';
5714 strcpy(numstr + 1, orgnum);
5715 }
5716 else
5717 {
5718 numstr = orgnum;
5719 }
5720 }
5721 else
5722 {
5723 int numstr_pre_len;
5724
5725 if (IS_MULTI(&Num))
5726 {
5727 double multi = pow((double) 10, (double) Num.multi);
5728
5729 value = DatumGetInt64(DirectFunctionCall2(int8mul,
5730 Int64GetDatum(value),
5731 DirectFunctionCall1(dtoi8,
5732 Float8GetDatum(multi))));
5733 Num.pre += Num.multi;
5734 }
5735
5736 orgnum = DatumGetCString(DirectFunctionCall1(int8out,
5737 Int64GetDatum(value)));
5738
5739 if (*orgnum == '-')
5740 {
5741 sign = '-';
5742 orgnum++;
5743 }
5744 else
5745 sign = '+';
5746
5747 numstr_pre_len = strlen(orgnum);
5748
5749 /* post-decimal digits? Pad out with zeros. */
5750 if (Num.post)
5751 {
5752 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
5753 strcpy(numstr, orgnum);
5754 *(numstr + numstr_pre_len) = '.';
5755 memset(numstr + numstr_pre_len + 1, '0', Num.post);
5756 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
5757 }
5758 else
5759 numstr = orgnum;
5760
5761 /* needs padding? */
5762 if (numstr_pre_len < Num.pre)
5763 out_pre_spaces = Num.pre - numstr_pre_len;
5764 /* overflowed prefix digit format? */
5765 else if (numstr_pre_len > Num.pre)
5766 {
5767 numstr = (char *) palloc(Num.pre + Num.post + 2);
5768 fill_str(numstr, '#', Num.pre + Num.post + 1);
5769 *(numstr + Num.pre) = '.';
5770 }
5771 }
5772
5773 NUM_TOCHAR_finish;
5774 PG_RETURN_TEXT_P(result);
5775 }
5776
5777 /* -----------------
5778 * FLOAT4 to_char()
5779 * -----------------
5780 */
5781 Datum
float4_to_char(PG_FUNCTION_ARGS)5782 float4_to_char(PG_FUNCTION_ARGS)
5783 {
5784 float4 value = PG_GETARG_FLOAT4(0);
5785 text *fmt = PG_GETARG_TEXT_PP(1);
5786 NUMDesc Num;
5787 FormatNode *format;
5788 text *result;
5789 bool shouldFree;
5790 int out_pre_spaces = 0,
5791 sign = 0;
5792 char *numstr,
5793 *orgnum,
5794 *p;
5795
5796 NUM_TOCHAR_prepare;
5797
5798 if (IS_ROMAN(&Num))
5799 numstr = orgnum = int_to_roman((int) rint(value));
5800 else if (IS_EEEE(&Num))
5801 {
5802 if (isnan(value) || isinf(value))
5803 {
5804 /*
5805 * Allow 6 characters for the leading sign, the decimal point,
5806 * "e", the exponent's sign and two exponent digits.
5807 */
5808 numstr = (char *) palloc(Num.pre + Num.post + 7);
5809 fill_str(numstr, '#', Num.pre + Num.post + 6);
5810 *numstr = ' ';
5811 *(numstr + Num.pre + 1) = '.';
5812 }
5813 else
5814 {
5815 numstr = orgnum = psprintf("%+.*e", Num.post, value);
5816
5817 /*
5818 * Swap a leading positive sign for a space.
5819 */
5820 if (*orgnum == '+')
5821 *orgnum = ' ';
5822
5823 numstr = orgnum;
5824 }
5825 }
5826 else
5827 {
5828 float4 val = value;
5829 int numstr_pre_len;
5830
5831 if (IS_MULTI(&Num))
5832 {
5833 float multi = pow((double) 10, (double) Num.multi);
5834
5835 val = value * multi;
5836 Num.pre += Num.multi;
5837 }
5838
5839 orgnum = (char *) psprintf("%.0f", fabs(val));
5840 numstr_pre_len = strlen(orgnum);
5841
5842 /* adjust post digits to fit max float digits */
5843 if (numstr_pre_len >= FLT_DIG)
5844 Num.post = 0;
5845 else if (numstr_pre_len + Num.post > FLT_DIG)
5846 Num.post = FLT_DIG - numstr_pre_len;
5847 orgnum = psprintf("%.*f", Num.post, val);
5848
5849 if (*orgnum == '-')
5850 { /* < 0 */
5851 sign = '-';
5852 numstr = orgnum + 1;
5853 }
5854 else
5855 {
5856 sign = '+';
5857 numstr = orgnum;
5858 }
5859
5860 if ((p = strchr(numstr, '.')))
5861 numstr_pre_len = p - numstr;
5862 else
5863 numstr_pre_len = strlen(numstr);
5864
5865 /* needs padding? */
5866 if (numstr_pre_len < Num.pre)
5867 out_pre_spaces = Num.pre - numstr_pre_len;
5868 /* overflowed prefix digit format? */
5869 else if (numstr_pre_len > Num.pre)
5870 {
5871 numstr = (char *) palloc(Num.pre + Num.post + 2);
5872 fill_str(numstr, '#', Num.pre + Num.post + 1);
5873 *(numstr + Num.pre) = '.';
5874 }
5875 }
5876
5877 NUM_TOCHAR_finish;
5878 PG_RETURN_TEXT_P(result);
5879 }
5880
5881 /* -----------------
5882 * FLOAT8 to_char()
5883 * -----------------
5884 */
5885 Datum
float8_to_char(PG_FUNCTION_ARGS)5886 float8_to_char(PG_FUNCTION_ARGS)
5887 {
5888 float8 value = PG_GETARG_FLOAT8(0);
5889 text *fmt = PG_GETARG_TEXT_PP(1);
5890 NUMDesc Num;
5891 FormatNode *format;
5892 text *result;
5893 bool shouldFree;
5894 int out_pre_spaces = 0,
5895 sign = 0;
5896 char *numstr,
5897 *orgnum,
5898 *p;
5899
5900 NUM_TOCHAR_prepare;
5901
5902 if (IS_ROMAN(&Num))
5903 numstr = orgnum = int_to_roman((int) rint(value));
5904 else if (IS_EEEE(&Num))
5905 {
5906 if (isnan(value) || isinf(value))
5907 {
5908 /*
5909 * Allow 6 characters for the leading sign, the decimal point,
5910 * "e", the exponent's sign and two exponent digits.
5911 */
5912 numstr = (char *) palloc(Num.pre + Num.post + 7);
5913 fill_str(numstr, '#', Num.pre + Num.post + 6);
5914 *numstr = ' ';
5915 *(numstr + Num.pre + 1) = '.';
5916 }
5917 else
5918 {
5919 numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value);
5920
5921 /*
5922 * Swap a leading positive sign for a space.
5923 */
5924 if (*orgnum == '+')
5925 *orgnum = ' ';
5926
5927 numstr = orgnum;
5928 }
5929 }
5930 else
5931 {
5932 float8 val = value;
5933 int numstr_pre_len;
5934
5935 if (IS_MULTI(&Num))
5936 {
5937 double multi = pow((double) 10, (double) Num.multi);
5938
5939 val = value * multi;
5940 Num.pre += Num.multi;
5941 }
5942 orgnum = psprintf("%.0f", fabs(val));
5943 numstr_pre_len = strlen(orgnum);
5944
5945 /* adjust post digits to fit max double digits */
5946 if (numstr_pre_len >= DBL_DIG)
5947 Num.post = 0;
5948 else if (numstr_pre_len + Num.post > DBL_DIG)
5949 Num.post = DBL_DIG - numstr_pre_len;
5950 orgnum = psprintf("%.*f", Num.post, val);
5951
5952 if (*orgnum == '-')
5953 { /* < 0 */
5954 sign = '-';
5955 numstr = orgnum + 1;
5956 }
5957 else
5958 {
5959 sign = '+';
5960 numstr = orgnum;
5961 }
5962
5963 if ((p = strchr(numstr, '.')))
5964 numstr_pre_len = p - numstr;
5965 else
5966 numstr_pre_len = strlen(numstr);
5967
5968 /* needs padding? */
5969 if (numstr_pre_len < Num.pre)
5970 out_pre_spaces = Num.pre - numstr_pre_len;
5971 /* overflowed prefix digit format? */
5972 else if (numstr_pre_len > Num.pre)
5973 {
5974 numstr = (char *) palloc(Num.pre + Num.post + 2);
5975 fill_str(numstr, '#', Num.pre + Num.post + 1);
5976 *(numstr + Num.pre) = '.';
5977 }
5978 }
5979
5980 NUM_TOCHAR_finish;
5981 PG_RETURN_TEXT_P(result);
5982 }
5983