1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  *	 Portions Copyright (c) 1999-2021, PostgreSQL Global Development Group
8  *
9  *
10  *	 TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  *	 The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  *	 inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  *	 Cache & Memory:
17  *	Routines use (itself) internal cache for format pictures.
18  *
19  *	The cache uses a static buffer and is persistent across transactions.  If
20  *	the format-picture is bigger than the cache buffer, the parser is called
21  *	always.
22  *
23  *	 NOTE for Number version:
24  *	All in this version is implemented as keywords ( => not used
25  *	suffixes), because a format picture is for *one* item (number)
26  *	only. It not is as a timestamp version, where each keyword (can)
27  *	has suffix.
28  *
29  *	 NOTE for Timestamp routines:
30  *	In this module the POSIX 'struct tm' type is *not* used, but rather
31  *	PgSQL type, which has tm_mon based on one (*non* zero) and
32  *	year *not* based on 1900, but is used full year number.
33  *	Module supports AD / BC / AM / PM.
34  *
35  *	Supported types for to_char():
36  *
37  *		Timestamp, Numeric, int4, int8, float4, float8
38  *
39  *	Supported types for reverse conversion:
40  *
41  *		Timestamp	- to_timestamp()
42  *		Date		- to_date()
43  *		Numeric		- to_number()
44  *
45  *
46  *	Karel Zak
47  *
48  * TODO
49  *	- better number building (formatting) / parsing, now it isn't
50  *		  ideal code
51  *	- use Assert()
52  *	- add support for roman number to standard number conversion
53  *	- add support for number spelling
54  *	- add support for string to string formatting (we must be better
55  *	  than Oracle :-),
56  *		to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output	DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>, so include that too.
76  */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81 
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85 
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99 
100 /* ----------
101  * Convenience macros for error handling
102  * ----------
103  *
104  * Two macros below help to handle errors in functions that take
105  * 'bool *have_error' argument.  When this argument is not NULL, it's expected
106  * that function will suppress ereports when possible.  Instead it should
107  * return some default value and set *have_error flag.
108  *
109  * RETURN_ERROR() macro intended to wrap ereport() calls.  When have_error
110  * function argument is not NULL, then instead of ereport'ing we set
111  * *have_error flag and go to on_error label.  It's supposed that jump
112  * resources will be freed and some 'default' value returned.
113  *
114  * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115  * It's supposed to be used for immediate exit from the function on error
116  * after call of another function with 'bool *have_error' argument.
117  */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120 	if (have_error) \
121 	{ \
122 		*have_error = true; \
123 		goto on_error; \
124 	} \
125 	else \
126 	{ \
127 		throw_error; \
128 	} \
129 } while (0)
130 
131 #define CHECK_ERROR \
132 do { \
133 	if (have_error && *have_error) \
134 		goto on_error; \
135 } while (0)
136 
137 /* ----------
138  * Routines flags
139  * ----------
140  */
141 #define DCH_FLAG		0x1		/* DATE-TIME flag	*/
142 #define NUM_FLAG		0x2		/* NUMBER flag	*/
143 #define STD_FLAG		0x4		/* STANDARD flag	*/
144 
145 /* ----------
146  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147  * ----------
148  */
149 #define KeyWord_INDEX_SIZE		('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c)	((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151 
152 /* ----------
153  * Maximal length of one node
154  * ----------
155  */
156 #define DCH_MAX_ITEM_SIZ	   12	/* max localized day name		*/
157 #define NUM_MAX_ITEM_SIZ		8	/* roman number (RN has 15 chars)	*/
158 
159 
160 /* ----------
161  * Format parser structs
162  * ----------
163  */
164 typedef struct
165 {
166 	const char *name;			/* suffix string		*/
167 	int			len,			/* suffix length		*/
168 				id,				/* used in node->suffix */
169 				type;			/* prefix / postfix		*/
170 } KeySuffix;
171 
172 /* ----------
173  * FromCharDateMode
174  * ----------
175  *
176  * This value is used to nominate one of several distinct (and mutually
177  * exclusive) date conventions that a keyword can belong to.
178  */
179 typedef enum
180 {
181 	FROM_CHAR_DATE_NONE = 0,	/* Value does not affect date mode. */
182 	FROM_CHAR_DATE_GREGORIAN,	/* Gregorian (day, month, year) style date */
183 	FROM_CHAR_DATE_ISOWEEK		/* ISO 8601 week date */
184 } FromCharDateMode;
185 
186 typedef struct
187 {
188 	const char *name;
189 	int			len;
190 	int			id;
191 	bool		is_digit;
192 	FromCharDateMode date_mode;
193 } KeyWord;
194 
195 typedef struct
196 {
197 	uint8		type;			/* NODE_TYPE_XXX, see below */
198 	char		character[MAX_MULTIBYTE_CHAR_LEN + 1];	/* if type is CHAR */
199 	uint8		suffix;			/* keyword prefix/suffix code, if any */
200 	const KeyWord *key;			/* if type is ACTION */
201 } FormatNode;
202 
203 #define NODE_TYPE_END		1
204 #define NODE_TYPE_ACTION	2
205 #define NODE_TYPE_CHAR		3
206 #define NODE_TYPE_SEPARATOR	4
207 #define NODE_TYPE_SPACE		5
208 
209 #define SUFFTYPE_PREFIX		1
210 #define SUFFTYPE_POSTFIX	2
211 
212 #define CLOCK_24_HOUR		0
213 #define CLOCK_12_HOUR		1
214 
215 
216 /* ----------
217  * Full months
218  * ----------
219  */
220 static const char *const months_full[] = {
221 	"January", "February", "March", "April", "May", "June", "July",
222 	"August", "September", "October", "November", "December", NULL
223 };
224 
225 static const char *const days_short[] = {
226 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228 
229 /* ----------
230  * AD / BC
231  * ----------
232  *	There is no 0 AD.  Years go from 1 BC to 1 AD, so we make it
233  *	positive and map year == -1 to year zero, and shift all negative
234  *	years up one.  For interval years, we just return the year.
235  */
236 #define ADJUST_YEAR(year, is_interval)	((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237 
238 #define A_D_STR		"A.D."
239 #define a_d_STR		"a.d."
240 #define AD_STR		"AD"
241 #define ad_STR		"ad"
242 
243 #define B_C_STR		"B.C."
244 #define b_c_STR		"b.c."
245 #define BC_STR		"BC"
246 #define bc_STR		"bc"
247 
248 /*
249  * AD / BC strings for seq_search.
250  *
251  * These are given in two variants, a long form with periods and a standard
252  * form without.
253  *
254  * The array is laid out such that matches for AD have an even index, and
255  * matches for BC have an odd index.  So the boolean value for BC is given by
256  * taking the array index of the match, modulo 2.
257  */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260 
261 /* ----------
262  * AM / PM
263  * ----------
264  */
265 #define A_M_STR		"A.M."
266 #define a_m_STR		"a.m."
267 #define AM_STR		"AM"
268 #define am_STR		"am"
269 
270 #define P_M_STR		"P.M."
271 #define p_m_STR		"p.m."
272 #define PM_STR		"PM"
273 #define pm_STR		"pm"
274 
275 /*
276  * AM / PM strings for seq_search.
277  *
278  * These are given in two variants, a long form with periods and a standard
279  * form without.
280  *
281  * The array is laid out such that matches for AM have an even index, and
282  * matches for PM have an odd index.  So the boolean value for PM is given by
283  * taking the array index of the match, modulo 2.
284  */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287 
288 /* ----------
289  * Months in roman-numeral
290  * (Must be in reverse order for seq_search (in FROM_CHAR), because
291  *	'VIII' must have higher precedence than 'V')
292  * ----------
293  */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296 
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299 
300 /* ----------
301  * Roman numbers
302  * ----------
303  */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307 
308 /* ----------
309  * Ordinal postfixes
310  * ----------
311  */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314 
315 /* ----------
316  * Flags & Options:
317  * ----------
318  */
319 #define TH_UPPER		1
320 #define TH_LOWER		2
321 
322 /* ----------
323  * Number description struct
324  * ----------
325  */
326 typedef struct
327 {
328 	int			pre,			/* (count) numbers before decimal */
329 				post,			/* (count) numbers after decimal  */
330 				lsign,			/* want locales sign		  */
331 				flag,			/* number parameters		  */
332 				pre_lsign_num,	/* tmp value for lsign		  */
333 				multi,			/* multiplier for 'V'		  */
334 				zero_start,		/* position of first zero	  */
335 				zero_end,		/* position of last zero	  */
336 				need_locale;	/* needs it locale		  */
337 } NUMDesc;
338 
339 /* ----------
340  * Flags for NUMBER version
341  * ----------
342  */
343 #define NUM_F_DECIMAL		(1 << 1)
344 #define NUM_F_LDECIMAL		(1 << 2)
345 #define NUM_F_ZERO			(1 << 3)
346 #define NUM_F_BLANK			(1 << 4)
347 #define NUM_F_FILLMODE		(1 << 5)
348 #define NUM_F_LSIGN			(1 << 6)
349 #define NUM_F_BRACKET		(1 << 7)
350 #define NUM_F_MINUS			(1 << 8)
351 #define NUM_F_PLUS			(1 << 9)
352 #define NUM_F_ROMAN			(1 << 10)
353 #define NUM_F_MULTI			(1 << 11)
354 #define NUM_F_PLUS_POST		(1 << 12)
355 #define NUM_F_MINUS_POST	(1 << 13)
356 #define NUM_F_EEEE			(1 << 14)
357 
358 #define NUM_LSIGN_PRE	(-1)
359 #define NUM_LSIGN_POST	1
360 #define NUM_LSIGN_NONE	0
361 
362 /* ----------
363  * Tests
364  * ----------
365  */
366 #define IS_DECIMAL(_f)	((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f)	((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f)	((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f)	((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f)	((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f)	((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f)	((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f)		((_f)->flag & NUM_F_EEEE)
378 
379 /* ----------
380  * Format picture cache
381  *
382  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384  *
385  * For simplicity, the cache entries are fixed-size, so they allow for the
386  * worst case of a FormatNode for each byte in the picture string.
387  *
388  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390  * we don't waste too much space by palloc'ing them individually.  Be sure
391  * to adjust those macros if you add fields to those structs.
392  *
393  * The max number of entries in each cache is DCH_CACHE_ENTRIES
394  * resp. NUM_CACHE_ENTRIES.
395  * ----------
396  */
397 #define DCH_CACHE_OVERHEAD \
398 	MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400 	MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401 
402 #define DCH_CACHE_SIZE \
403 	((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405 	((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406 
407 #define DCH_CACHE_ENTRIES	20
408 #define NUM_CACHE_ENTRIES	20
409 
410 typedef struct
411 {
412 	FormatNode	format[DCH_CACHE_SIZE + 1];
413 	char		str[DCH_CACHE_SIZE + 1];
414 	bool		std;
415 	bool		valid;
416 	int			age;
417 } DCHCacheEntry;
418 
419 typedef struct
420 {
421 	FormatNode	format[NUM_CACHE_SIZE + 1];
422 	char		str[NUM_CACHE_SIZE + 1];
423 	bool		valid;
424 	int			age;
425 	NUMDesc		Num;
426 } NUMCacheEntry;
427 
428 /* global cache for date/time format pictures */
429 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
430 static int	n_DCHCache = 0;		/* current number of entries */
431 static int	DCHCounter = 0;		/* aging-event counter */
432 
433 /* global cache for number format pictures */
434 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
435 static int	n_NUMCache = 0;		/* current number of entries */
436 static int	NUMCounter = 0;		/* aging-event counter */
437 
438 /* ----------
439  * For char->date/time conversion
440  * ----------
441  */
442 typedef struct
443 {
444 	FromCharDateMode mode;
445 	int			hh,
446 				pm,
447 				mi,
448 				ss,
449 				ssss,
450 				d,				/* stored as 1-7, Sunday = 1, 0 means missing */
451 				dd,
452 				ddd,
453 				mm,
454 				ms,
455 				year,
456 				bc,
457 				ww,
458 				w,
459 				cc,
460 				j,
461 				us,
462 				yysz,			/* is it YY or YYYY ? */
463 				clock,			/* 12 or 24 hour clock? */
464 				tzsign,			/* +1, -1 or 0 if timezone info is absent */
465 				tzh,
466 				tzm,
467 				ff;				/* fractional precision */
468 } TmFromChar;
469 
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471 
472 /* ----------
473  * Debug
474  * ----------
475  */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478 		elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479 			(_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480 			(_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481 			(_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482 			(_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484 		elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485 			(_X)->tm_sec, (_X)->tm_year,\
486 			(_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487 			(_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492 
493 /* ----------
494  * Datetime to char conversion
495  * ----------
496  */
497 typedef struct TmToChar
498 {
499 	struct pg_tm tm;			/* classic 'tm' struct */
500 	fsec_t		fsec;			/* fractional seconds */
501 	const char *tzn;			/* timezone */
502 } TmToChar;
503 
504 #define tmtcTm(_X)	(&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X)	((_X)->fsec)
507 
508 #define ZERO_tm(_X) \
509 do {	\
510 	(_X)->tm_sec  = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511 	(_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512 	(_X)->tm_mday = (_X)->tm_mon  = 1; \
513 	(_X)->tm_zone = NULL; \
514 } while(0)
515 
516 #define ZERO_tmtc(_X) \
517 do { \
518 	ZERO_tm( tmtcTm(_X) ); \
519 	tmtcFsec(_X) = 0; \
520 	tmtcTzn(_X) = NULL; \
521 } while(0)
522 
523 /*
524  *	to_char(time) appears to to_char() as an interval, so this check
525  *	is really for interval and time data types.
526  */
527 #define INVALID_FOR_INTERVAL  \
528 do { \
529 	if (is_interval) \
530 		ereport(ERROR, \
531 				(errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532 				 errmsg("invalid format specification for an interval value"), \
533 				 errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535 
536 /*****************************************************************************
537  *			KeyWord definitions
538  *****************************************************************************/
539 
540 /* ----------
541  * Suffixes (FormatNode.suffix is an OR of these codes)
542  * ----------
543  */
544 #define DCH_S_FM	0x01
545 #define DCH_S_TH	0x02
546 #define DCH_S_th	0x04
547 #define DCH_S_SP	0x08
548 #define DCH_S_TM	0x10
549 
550 /* ----------
551  * Suffix tests
552  * ----------
553  */
554 #define S_THth(_s)	((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s)	(((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s)	(((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s)	(((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558 
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s)	(((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s)	(((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s)	(((_s) & DCH_S_TM) ? 1 : 0)
563 
564 /* ----------
565  * Suffixes definition for DATE-TIME TO/FROM CHAR
566  * ----------
567  */
568 #define TM_SUFFIX_LEN	2
569 
570 static const KeySuffix DCH_suff[] = {
571 	{"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572 	{"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573 	{"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574 	{"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575 	{"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576 	{"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577 	{"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578 	/* last */
579 	{NULL, 0, 0, 0}
580 };
581 
582 
583 /* ----------
584  * Format-pictures (KeyWord).
585  *
586  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587  *		  complicated -to-> easy:
588  *
589  *	(example: "DDD","DD","Day","D" )
590  *
591  * (this specific sort needs the algorithm for sequential search for strings,
592  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593  * or "HH12"? You must first try "HH12", because "HH" is in string, but
594  * it is not good.
595  *
596  * (!)
597  *	 - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598  * (!)
599  *
600  * For fast search is used the 'int index[]', index is ascii table from position
601  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602  * position or -1 if char is not used in the KeyWord. Search example for
603  * string "MM":
604  *	1)	see in index to index['M' - 32],
605  *	2)	take keywords position (enum DCH_MI) from index
606  *	3)	run sequential search in keywords[] from this position
607  *
608  * ----------
609  */
610 
611 typedef enum
612 {
613 	DCH_A_D,
614 	DCH_A_M,
615 	DCH_AD,
616 	DCH_AM,
617 	DCH_B_C,
618 	DCH_BC,
619 	DCH_CC,
620 	DCH_DAY,
621 	DCH_DDD,
622 	DCH_DD,
623 	DCH_DY,
624 	DCH_Day,
625 	DCH_Dy,
626 	DCH_D,
627 	DCH_FF1,
628 	DCH_FF2,
629 	DCH_FF3,
630 	DCH_FF4,
631 	DCH_FF5,
632 	DCH_FF6,
633 	DCH_FX,						/* global suffix */
634 	DCH_HH24,
635 	DCH_HH12,
636 	DCH_HH,
637 	DCH_IDDD,
638 	DCH_ID,
639 	DCH_IW,
640 	DCH_IYYY,
641 	DCH_IYY,
642 	DCH_IY,
643 	DCH_I,
644 	DCH_J,
645 	DCH_MI,
646 	DCH_MM,
647 	DCH_MONTH,
648 	DCH_MON,
649 	DCH_MS,
650 	DCH_Month,
651 	DCH_Mon,
652 	DCH_OF,
653 	DCH_P_M,
654 	DCH_PM,
655 	DCH_Q,
656 	DCH_RM,
657 	DCH_SSSSS,
658 	DCH_SSSS,
659 	DCH_SS,
660 	DCH_TZH,
661 	DCH_TZM,
662 	DCH_TZ,
663 	DCH_US,
664 	DCH_WW,
665 	DCH_W,
666 	DCH_Y_YYY,
667 	DCH_YYYY,
668 	DCH_YYY,
669 	DCH_YY,
670 	DCH_Y,
671 	DCH_a_d,
672 	DCH_a_m,
673 	DCH_ad,
674 	DCH_am,
675 	DCH_b_c,
676 	DCH_bc,
677 	DCH_cc,
678 	DCH_day,
679 	DCH_ddd,
680 	DCH_dd,
681 	DCH_dy,
682 	DCH_d,
683 	DCH_ff1,
684 	DCH_ff2,
685 	DCH_ff3,
686 	DCH_ff4,
687 	DCH_ff5,
688 	DCH_ff6,
689 	DCH_fx,
690 	DCH_hh24,
691 	DCH_hh12,
692 	DCH_hh,
693 	DCH_iddd,
694 	DCH_id,
695 	DCH_iw,
696 	DCH_iyyy,
697 	DCH_iyy,
698 	DCH_iy,
699 	DCH_i,
700 	DCH_j,
701 	DCH_mi,
702 	DCH_mm,
703 	DCH_month,
704 	DCH_mon,
705 	DCH_ms,
706 	DCH_p_m,
707 	DCH_pm,
708 	DCH_q,
709 	DCH_rm,
710 	DCH_sssss,
711 	DCH_ssss,
712 	DCH_ss,
713 	DCH_tz,
714 	DCH_us,
715 	DCH_ww,
716 	DCH_w,
717 	DCH_y_yyy,
718 	DCH_yyyy,
719 	DCH_yyy,
720 	DCH_yy,
721 	DCH_y,
722 
723 	/* last */
724 	_DCH_last_
725 }			DCH_poz;
726 
727 typedef enum
728 {
729 	NUM_COMMA,
730 	NUM_DEC,
731 	NUM_0,
732 	NUM_9,
733 	NUM_B,
734 	NUM_C,
735 	NUM_D,
736 	NUM_E,
737 	NUM_FM,
738 	NUM_G,
739 	NUM_L,
740 	NUM_MI,
741 	NUM_PL,
742 	NUM_PR,
743 	NUM_RN,
744 	NUM_SG,
745 	NUM_SP,
746 	NUM_S,
747 	NUM_TH,
748 	NUM_V,
749 	NUM_b,
750 	NUM_c,
751 	NUM_d,
752 	NUM_e,
753 	NUM_fm,
754 	NUM_g,
755 	NUM_l,
756 	NUM_mi,
757 	NUM_pl,
758 	NUM_pr,
759 	NUM_rn,
760 	NUM_sg,
761 	NUM_sp,
762 	NUM_s,
763 	NUM_th,
764 	NUM_v,
765 
766 	/* last */
767 	_NUM_last_
768 }			NUM_poz;
769 
770 /* ----------
771  * KeyWords for DATE-TIME version
772  * ----------
773  */
774 static const KeyWord DCH_keywords[] = {
775 /*	name, len, id, is_digit, date_mode */
776 	{"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE},	/* A */
777 	{"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778 	{"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779 	{"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780 	{"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE},	/* B */
781 	{"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782 	{"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE},	/* C */
783 	{"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE},	/* D */
784 	{"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785 	{"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786 	{"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787 	{"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788 	{"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789 	{"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790 	{"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE},	/* F */
791 	{"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792 	{"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793 	{"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794 	{"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795 	{"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796 	{"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797 	{"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE},	/* H */
798 	{"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799 	{"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800 	{"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK},	/* I */
801 	{"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802 	{"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803 	{"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804 	{"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805 	{"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806 	{"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807 	{"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808 	{"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE},	/* M */
809 	{"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810 	{"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811 	{"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812 	{"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813 	{"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814 	{"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815 	{"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE},	/* O */
816 	{"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE},	/* P */
817 	{"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818 	{"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819 	{"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820 	{"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE},	/* S */
821 	{"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822 	{"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823 	{"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE},	/* T */
824 	{"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825 	{"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826 	{"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE},	/* U */
827 	{"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN},	/* W */
828 	{"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829 	{"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN},	/* Y */
830 	{"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831 	{"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832 	{"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833 	{"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834 	{"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE},	/* a */
835 	{"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836 	{"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837 	{"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838 	{"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE},	/* b */
839 	{"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840 	{"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE},	/* c */
841 	{"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE},	/* d */
842 	{"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843 	{"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844 	{"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845 	{"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846 	{"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE},	/* f */
847 	{"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848 	{"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849 	{"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850 	{"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851 	{"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852 	{"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853 	{"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE},	/* h */
854 	{"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855 	{"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856 	{"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK},	/* i */
857 	{"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858 	{"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859 	{"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860 	{"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861 	{"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862 	{"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863 	{"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864 	{"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE},	/* m */
865 	{"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866 	{"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867 	{"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868 	{"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869 	{"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE},	/* p */
870 	{"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871 	{"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872 	{"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873 	{"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE},	/* s */
874 	{"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875 	{"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876 	{"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},	/* t */
877 	{"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE},	/* u */
878 	{"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN},	/* w */
879 	{"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880 	{"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN},	/* y */
881 	{"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882 	{"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883 	{"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884 	{"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885 
886 	/* last */
887 	{NULL, 0, 0, 0, 0}
888 };
889 
890 /* ----------
891  * KeyWords for NUMBER version
892  *
893  * The is_digit and date_mode fields are not relevant here.
894  * ----------
895  */
896 static const KeyWord NUM_keywords[] = {
897 /*	name, len, id			is in Index */
898 	{",", 1, NUM_COMMA},		/* , */
899 	{".", 1, NUM_DEC},			/* . */
900 	{"0", 1, NUM_0},			/* 0 */
901 	{"9", 1, NUM_9},			/* 9 */
902 	{"B", 1, NUM_B},			/* B */
903 	{"C", 1, NUM_C},			/* C */
904 	{"D", 1, NUM_D},			/* D */
905 	{"EEEE", 4, NUM_E},			/* E */
906 	{"FM", 2, NUM_FM},			/* F */
907 	{"G", 1, NUM_G},			/* G */
908 	{"L", 1, NUM_L},			/* L */
909 	{"MI", 2, NUM_MI},			/* M */
910 	{"PL", 2, NUM_PL},			/* P */
911 	{"PR", 2, NUM_PR},
912 	{"RN", 2, NUM_RN},			/* R */
913 	{"SG", 2, NUM_SG},			/* S */
914 	{"SP", 2, NUM_SP},
915 	{"S", 1, NUM_S},
916 	{"TH", 2, NUM_TH},			/* T */
917 	{"V", 1, NUM_V},			/* V */
918 	{"b", 1, NUM_B},			/* b */
919 	{"c", 1, NUM_C},			/* c */
920 	{"d", 1, NUM_D},			/* d */
921 	{"eeee", 4, NUM_E},			/* e */
922 	{"fm", 2, NUM_FM},			/* f */
923 	{"g", 1, NUM_G},			/* g */
924 	{"l", 1, NUM_L},			/* l */
925 	{"mi", 2, NUM_MI},			/* m */
926 	{"pl", 2, NUM_PL},			/* p */
927 	{"pr", 2, NUM_PR},
928 	{"rn", 2, NUM_rn},			/* r */
929 	{"sg", 2, NUM_SG},			/* s */
930 	{"sp", 2, NUM_SP},
931 	{"s", 1, NUM_S},
932 	{"th", 2, NUM_th},			/* t */
933 	{"v", 1, NUM_V},			/* v */
934 
935 	/* last */
936 	{NULL, 0, 0}
937 };
938 
939 
940 /* ----------
941  * KeyWords index for DATE-TIME version
942  * ----------
943  */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0	1	2	3	4	5	6	7	8	9
947 */
948 	/*---- first 0..31 chars are skipped ----*/
949 
950 	-1, -1, -1, -1, -1, -1, -1, -1,
951 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953 	-1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954 	DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
955 	DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
956 	-1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957 	DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958 	-1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959 	-1, DCH_y_yyy, -1, -1, -1, -1
960 
961 	/*---- chars over 126 are skipped ----*/
962 };
963 
964 /* ----------
965  * KeyWords index for NUMBER version
966  * ----------
967  */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0	1	2	3	4	5	6	7	8	9
971 */
972 	/*---- first 0..31 chars are skipped ----*/
973 
974 	-1, -1, -1, -1, -1, -1, -1, -1,
975 	-1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976 	-1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977 	-1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978 	NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979 	NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980 	-1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981 	NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982 	-1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983 	-1, -1, -1, -1, -1, -1
984 
985 	/*---- chars over 126 are skipped ----*/
986 };
987 
988 /* ----------
989  * Number processor struct
990  * ----------
991  */
992 typedef struct NUMProc
993 {
994 	bool		is_to_char;
995 	NUMDesc    *Num;			/* number description		*/
996 
997 	int			sign,			/* '-' or '+'			*/
998 				sign_wrote,		/* was sign write		*/
999 				num_count,		/* number of write digits	*/
1000 				num_in,			/* is inside number		*/
1001 				num_curr,		/* current position in number	*/
1002 				out_pre_spaces, /* spaces before first digit	*/
1003 
1004 				read_dec,		/* to_number - was read dec. point	*/
1005 				read_post,		/* to_number - number of dec. digit */
1006 				read_pre;		/* to_number - number non-dec. digit */
1007 
1008 	char	   *number,			/* string with number	*/
1009 			   *number_p,		/* pointer to current number position */
1010 			   *inout,			/* in / out buffer	*/
1011 			   *inout_p,		/* pointer to current inout position */
1012 			   *last_relevant,	/* last relevant number after decimal point */
1013 
1014 			   *L_negative_sign,	/* Locale */
1015 			   *L_positive_sign,
1016 			   *decimal,
1017 			   *L_thousands_sep,
1018 			   *L_currency_symbol;
1019 } NUMProc;
1020 
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED	0x01
1023 #define DCH_TIMED	0x02
1024 #define DCH_ZONED	0x04
1025 
1026 /* ----------
1027  * Functions
1028  * ----------
1029  */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031 									   const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036 						 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037 
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039 						TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041 						  Oid collid, bool std, bool *have_error);
1042 
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047 
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int	adjust_partial_year_to_2020(int year);
1051 static int	strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053 							   bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055 							  bool *have_error);
1056 static int	from_char_parse_int_len(int *dest, const char **src, const int len,
1057 									FormatNode *node, bool *have_error);
1058 static int	from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059 								bool *have_error);
1060 static int	seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int	seq_search_localized(const char *name, char **array, int *len,
1062 								 Oid collid);
1063 static int	from_char_seq_search(int *dest, const char **src,
1064 								 const char *const *array,
1065 								 char **localized_array, Oid collid,
1066 								 FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068 							struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069 							uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078 						   char *number, int input_len, int to_char_out_pre_spaces,
1079 						   int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086 
1087 
1088 /* ----------
1089  * Fast sequential search, use index for data selection which
1090  * go to seq. cycle (it is very fast for unwanted strings)
1091  * (can't be used binary search in format parsing)
1092  * ----------
1093  */
1094 static const KeyWord *
index_seq_search(const char * str,const KeyWord * kw,const int * index)1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097 	int			poz;
1098 
1099 	if (!KeyWord_INDEX_FILTER(*str))
1100 		return NULL;
1101 
1102 	if ((poz = *(index + (*str - ' '))) > -1)
1103 	{
1104 		const KeyWord *k = kw + poz;
1105 
1106 		do
1107 		{
1108 			if (strncmp(str, k->name, k->len) == 0)
1109 				return k;
1110 			k++;
1111 			if (!k->name)
1112 				return NULL;
1113 		} while (*str == *k->name);
1114 	}
1115 	return NULL;
1116 }
1117 
1118 static const KeySuffix *
suff_search(const char * str,const KeySuffix * suf,int type)1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121 	const KeySuffix *s;
1122 
1123 	for (s = suf; s->name != NULL; s++)
1124 	{
1125 		if (s->type != type)
1126 			continue;
1127 
1128 		if (strncmp(str, s->name, s->len) == 0)
1129 			return s;
1130 	}
1131 	return NULL;
1132 }
1133 
1134 static bool
is_separator_char(const char * str)1135 is_separator_char(const char *str)
1136 {
1137 	/* ASCII printable character, but not letter or digit */
1138 	return (*str > 0x20 && *str < 0x7F &&
1139 			!(*str >= 'A' && *str <= 'Z') &&
1140 			!(*str >= 'a' && *str <= 'z') &&
1141 			!(*str >= '0' && *str <= '9'));
1142 }
1143 
1144 /* ----------
1145  * Prepare NUMDesc (number description struct) via FormatNode struct
1146  * ----------
1147  */
1148 static void
NUMDesc_prepare(NUMDesc * num,FormatNode * n)1149 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1150 {
1151 	if (n->type != NODE_TYPE_ACTION)
1152 		return;
1153 
1154 	if (IS_EEEE(num) && n->key->id != NUM_E)
1155 		ereport(ERROR,
1156 				(errcode(ERRCODE_SYNTAX_ERROR),
1157 				 errmsg("\"EEEE\" must be the last pattern used")));
1158 
1159 	switch (n->key->id)
1160 	{
1161 		case NUM_9:
1162 			if (IS_BRACKET(num))
1163 				ereport(ERROR,
1164 						(errcode(ERRCODE_SYNTAX_ERROR),
1165 						 errmsg("\"9\" must be ahead of \"PR\"")));
1166 			if (IS_MULTI(num))
1167 			{
1168 				++num->multi;
1169 				break;
1170 			}
1171 			if (IS_DECIMAL(num))
1172 				++num->post;
1173 			else
1174 				++num->pre;
1175 			break;
1176 
1177 		case NUM_0:
1178 			if (IS_BRACKET(num))
1179 				ereport(ERROR,
1180 						(errcode(ERRCODE_SYNTAX_ERROR),
1181 						 errmsg("\"0\" must be ahead of \"PR\"")));
1182 			if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183 			{
1184 				num->flag |= NUM_F_ZERO;
1185 				num->zero_start = num->pre + 1;
1186 			}
1187 			if (!IS_DECIMAL(num))
1188 				++num->pre;
1189 			else
1190 				++num->post;
1191 
1192 			num->zero_end = num->pre + num->post;
1193 			break;
1194 
1195 		case NUM_B:
1196 			if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197 				num->flag |= NUM_F_BLANK;
1198 			break;
1199 
1200 		case NUM_D:
1201 			num->flag |= NUM_F_LDECIMAL;
1202 			num->need_locale = true;
1203 			/* FALLTHROUGH */
1204 		case NUM_DEC:
1205 			if (IS_DECIMAL(num))
1206 				ereport(ERROR,
1207 						(errcode(ERRCODE_SYNTAX_ERROR),
1208 						 errmsg("multiple decimal points")));
1209 			if (IS_MULTI(num))
1210 				ereport(ERROR,
1211 						(errcode(ERRCODE_SYNTAX_ERROR),
1212 						 errmsg("cannot use \"V\" and decimal point together")));
1213 			num->flag |= NUM_F_DECIMAL;
1214 			break;
1215 
1216 		case NUM_FM:
1217 			num->flag |= NUM_F_FILLMODE;
1218 			break;
1219 
1220 		case NUM_S:
1221 			if (IS_LSIGN(num))
1222 				ereport(ERROR,
1223 						(errcode(ERRCODE_SYNTAX_ERROR),
1224 						 errmsg("cannot use \"S\" twice")));
1225 			if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226 				ereport(ERROR,
1227 						(errcode(ERRCODE_SYNTAX_ERROR),
1228 						 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229 			if (!IS_DECIMAL(num))
1230 			{
1231 				num->lsign = NUM_LSIGN_PRE;
1232 				num->pre_lsign_num = num->pre;
1233 				num->need_locale = true;
1234 				num->flag |= NUM_F_LSIGN;
1235 			}
1236 			else if (num->lsign == NUM_LSIGN_NONE)
1237 			{
1238 				num->lsign = NUM_LSIGN_POST;
1239 				num->need_locale = true;
1240 				num->flag |= NUM_F_LSIGN;
1241 			}
1242 			break;
1243 
1244 		case NUM_MI:
1245 			if (IS_LSIGN(num))
1246 				ereport(ERROR,
1247 						(errcode(ERRCODE_SYNTAX_ERROR),
1248 						 errmsg("cannot use \"S\" and \"MI\" together")));
1249 			num->flag |= NUM_F_MINUS;
1250 			if (IS_DECIMAL(num))
1251 				num->flag |= NUM_F_MINUS_POST;
1252 			break;
1253 
1254 		case NUM_PL:
1255 			if (IS_LSIGN(num))
1256 				ereport(ERROR,
1257 						(errcode(ERRCODE_SYNTAX_ERROR),
1258 						 errmsg("cannot use \"S\" and \"PL\" together")));
1259 			num->flag |= NUM_F_PLUS;
1260 			if (IS_DECIMAL(num))
1261 				num->flag |= NUM_F_PLUS_POST;
1262 			break;
1263 
1264 		case NUM_SG:
1265 			if (IS_LSIGN(num))
1266 				ereport(ERROR,
1267 						(errcode(ERRCODE_SYNTAX_ERROR),
1268 						 errmsg("cannot use \"S\" and \"SG\" together")));
1269 			num->flag |= NUM_F_MINUS;
1270 			num->flag |= NUM_F_PLUS;
1271 			break;
1272 
1273 		case NUM_PR:
1274 			if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275 				ereport(ERROR,
1276 						(errcode(ERRCODE_SYNTAX_ERROR),
1277 						 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278 			num->flag |= NUM_F_BRACKET;
1279 			break;
1280 
1281 		case NUM_rn:
1282 		case NUM_RN:
1283 			num->flag |= NUM_F_ROMAN;
1284 			break;
1285 
1286 		case NUM_L:
1287 		case NUM_G:
1288 			num->need_locale = true;
1289 			break;
1290 
1291 		case NUM_V:
1292 			if (IS_DECIMAL(num))
1293 				ereport(ERROR,
1294 						(errcode(ERRCODE_SYNTAX_ERROR),
1295 						 errmsg("cannot use \"V\" and decimal point together")));
1296 			num->flag |= NUM_F_MULTI;
1297 			break;
1298 
1299 		case NUM_E:
1300 			if (IS_EEEE(num))
1301 				ereport(ERROR,
1302 						(errcode(ERRCODE_SYNTAX_ERROR),
1303 						 errmsg("cannot use \"EEEE\" twice")));
1304 			if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305 				IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306 				IS_ROMAN(num) || IS_MULTI(num))
1307 				ereport(ERROR,
1308 						(errcode(ERRCODE_SYNTAX_ERROR),
1309 						 errmsg("\"EEEE\" is incompatible with other formats"),
1310 						 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311 			num->flag |= NUM_F_EEEE;
1312 			break;
1313 	}
1314 }
1315 
1316 /* ----------
1317  * Format parser, search small keywords and keyword's suffixes, and make
1318  * format-node tree.
1319  *
1320  * for DATE-TIME & NUMBER version
1321  * ----------
1322  */
1323 static void
parse_format(FormatNode * node,const char * str,const KeyWord * kw,const KeySuffix * suf,const int * index,uint32 flags,NUMDesc * Num)1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325 			 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327 	FormatNode *n;
1328 
1329 #ifdef DEBUG_TO_FROM_CHAR
1330 	elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332 
1333 	n = node;
1334 
1335 	while (*str)
1336 	{
1337 		int			suffix = 0;
1338 		const KeySuffix *s;
1339 
1340 		/*
1341 		 * Prefix
1342 		 */
1343 		if ((flags & DCH_FLAG) &&
1344 			(s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345 		{
1346 			suffix |= s->id;
1347 			if (s->len)
1348 				str += s->len;
1349 		}
1350 
1351 		/*
1352 		 * Keyword
1353 		 */
1354 		if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355 		{
1356 			n->type = NODE_TYPE_ACTION;
1357 			n->suffix = suffix;
1358 			if (n->key->len)
1359 				str += n->key->len;
1360 
1361 			/*
1362 			 * NUM version: Prepare global NUMDesc struct
1363 			 */
1364 			if (flags & NUM_FLAG)
1365 				NUMDesc_prepare(Num, n);
1366 
1367 			/*
1368 			 * Postfix
1369 			 */
1370 			if ((flags & DCH_FLAG) && *str &&
1371 				(s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372 			{
1373 				n->suffix |= s->id;
1374 				if (s->len)
1375 					str += s->len;
1376 			}
1377 
1378 			n++;
1379 		}
1380 		else if (*str)
1381 		{
1382 			int			chlen;
1383 
1384 			if ((flags & STD_FLAG) && *str != '"')
1385 			{
1386 				/*
1387 				 * Standard mode, allow only following separators: "-./,':; ".
1388 				 * However, we support double quotes even in standard mode
1389 				 * (see below).  This is our extension of standard mode.
1390 				 */
1391 				if (strchr("-./,':; ", *str) == NULL)
1392 					ereport(ERROR,
1393 							(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1394 							 errmsg("invalid datetime format separator: \"%s\"",
1395 									pnstrdup(str, pg_mblen(str)))));
1396 
1397 				if (*str == ' ')
1398 					n->type = NODE_TYPE_SPACE;
1399 				else
1400 					n->type = NODE_TYPE_SEPARATOR;
1401 
1402 				n->character[0] = *str;
1403 				n->character[1] = '\0';
1404 				n->key = NULL;
1405 				n->suffix = 0;
1406 				n++;
1407 				str++;
1408 			}
1409 			else if (*str == '"')
1410 			{
1411 				/*
1412 				 * Process double-quoted literal string, if any
1413 				 */
1414 				str++;
1415 				while (*str)
1416 				{
1417 					if (*str == '"')
1418 					{
1419 						str++;
1420 						break;
1421 					}
1422 					/* backslash quotes the next character, if any */
1423 					if (*str == '\\' && *(str + 1))
1424 						str++;
1425 					chlen = pg_mblen(str);
1426 					n->type = NODE_TYPE_CHAR;
1427 					memcpy(n->character, str, chlen);
1428 					n->character[chlen] = '\0';
1429 					n->key = NULL;
1430 					n->suffix = 0;
1431 					n++;
1432 					str += chlen;
1433 				}
1434 			}
1435 			else
1436 			{
1437 				/*
1438 				 * Outside double-quoted strings, backslash is only special if
1439 				 * it immediately precedes a double quote.
1440 				 */
1441 				if (*str == '\\' && *(str + 1) == '"')
1442 					str++;
1443 				chlen = pg_mblen(str);
1444 
1445 				if ((flags & DCH_FLAG) && is_separator_char(str))
1446 					n->type = NODE_TYPE_SEPARATOR;
1447 				else if (isspace((unsigned char) *str))
1448 					n->type = NODE_TYPE_SPACE;
1449 				else
1450 					n->type = NODE_TYPE_CHAR;
1451 
1452 				memcpy(n->character, str, chlen);
1453 				n->character[chlen] = '\0';
1454 				n->key = NULL;
1455 				n->suffix = 0;
1456 				n++;
1457 				str += chlen;
1458 			}
1459 		}
1460 	}
1461 
1462 	n->type = NODE_TYPE_END;
1463 	n->suffix = 0;
1464 }
1465 
1466 /* ----------
1467  * DEBUG: Dump the FormatNode Tree (debug)
1468  * ----------
1469  */
1470 #ifdef DEBUG_TO_FROM_CHAR
1471 
1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1473 #define DUMP_FM(_suf)	(S_FM(_suf) ? "FM" : " ")
1474 
1475 static void
dump_node(FormatNode * node,int max)1476 dump_node(FormatNode *node, int max)
1477 {
1478 	FormatNode *n;
1479 	int			a;
1480 
1481 	elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1482 
1483 	for (a = 0, n = node; a <= max; n++, a++)
1484 	{
1485 		if (n->type == NODE_TYPE_ACTION)
1486 			elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1487 				 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1488 		else if (n->type == NODE_TYPE_CHAR)
1489 			elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1490 				 a, n->character);
1491 		else if (n->type == NODE_TYPE_END)
1492 		{
1493 			elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1494 			return;
1495 		}
1496 		else
1497 			elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1498 	}
1499 }
1500 #endif							/* DEBUG */
1501 
1502 /*****************************************************************************
1503  *			Private utils
1504  *****************************************************************************/
1505 
1506 /* ----------
1507  * Return ST/ND/RD/TH for simple (1..9) numbers
1508  * type --> 0 upper, 1 lower
1509  * ----------
1510  */
1511 static const char *
get_th(char * num,int type)1512 get_th(char *num, int type)
1513 {
1514 	int			len = strlen(num),
1515 				last;
1516 
1517 	last = *(num + (len - 1));
1518 	if (!isdigit((unsigned char) last))
1519 		ereport(ERROR,
1520 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1521 				 errmsg("\"%s\" is not a number", num)));
1522 
1523 	/*
1524 	 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1525 	 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1526 	 */
1527 	if ((len > 1) && (num[len - 2] == '1'))
1528 		last = 0;
1529 
1530 	switch (last)
1531 	{
1532 		case '1':
1533 			if (type == TH_UPPER)
1534 				return numTH[0];
1535 			return numth[0];
1536 		case '2':
1537 			if (type == TH_UPPER)
1538 				return numTH[1];
1539 			return numth[1];
1540 		case '3':
1541 			if (type == TH_UPPER)
1542 				return numTH[2];
1543 			return numth[2];
1544 		default:
1545 			if (type == TH_UPPER)
1546 				return numTH[3];
1547 			return numth[3];
1548 	}
1549 }
1550 
1551 /* ----------
1552  * Convert string-number to ordinal string-number
1553  * type --> 0 upper, 1 lower
1554  * ----------
1555  */
1556 static char *
str_numth(char * dest,char * num,int type)1557 str_numth(char *dest, char *num, int type)
1558 {
1559 	if (dest != num)
1560 		strcpy(dest, num);
1561 	strcat(dest, get_th(num, type));
1562 	return dest;
1563 }
1564 
1565 /*****************************************************************************
1566  *			upper/lower/initcap functions
1567  *****************************************************************************/
1568 
1569 #ifdef USE_ICU
1570 
1571 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1572 									 const UChar *src, int32_t srcLength,
1573 									 const char *locale,
1574 									 UErrorCode *pErrorCode);
1575 
1576 static int32_t
icu_convert_case(ICU_Convert_Func func,pg_locale_t mylocale,UChar ** buff_dest,UChar * buff_source,int32_t len_source)1577 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1578 				 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1579 {
1580 	UErrorCode	status;
1581 	int32_t		len_dest;
1582 
1583 	len_dest = len_source;		/* try first with same length */
1584 	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
1585 	status = U_ZERO_ERROR;
1586 	len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1587 					mylocale->info.icu.locale, &status);
1588 	if (status == U_BUFFER_OVERFLOW_ERROR)
1589 	{
1590 		/* try again with adjusted length */
1591 		pfree(*buff_dest);
1592 		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
1593 		status = U_ZERO_ERROR;
1594 		len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1595 						mylocale->info.icu.locale, &status);
1596 	}
1597 	if (U_FAILURE(status))
1598 		ereport(ERROR,
1599 				(errmsg("case conversion failed: %s", u_errorName(status))));
1600 	return len_dest;
1601 }
1602 
1603 static int32_t
u_strToTitle_default_BI(UChar * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,const char * locale,UErrorCode * pErrorCode)1604 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1605 						const UChar *src, int32_t srcLength,
1606 						const char *locale,
1607 						UErrorCode *pErrorCode)
1608 {
1609 	return u_strToTitle(dest, destCapacity, src, srcLength,
1610 						NULL, locale, pErrorCode);
1611 }
1612 
1613 #endif							/* USE_ICU */
1614 
1615 /*
1616  * If the system provides the needed functions for wide-character manipulation
1617  * (which are all standardized by C99), then we implement upper/lower/initcap
1618  * using wide-character functions, if necessary.  Otherwise we use the
1619  * traditional <ctype.h> functions, which of course will not work as desired
1620  * in multibyte character sets.  Note that in either case we are effectively
1621  * assuming that the database character encoding matches the encoding implied
1622  * by LC_CTYPE.
1623  *
1624  * If the system provides locale_t and associated functions (which are
1625  * standardized by Open Group's XBD), we can support collations that are
1626  * neither default nor C.  The code is written to handle both combinations
1627  * of have-wide-characters and have-locale_t, though it's rather unlikely
1628  * a platform would have the latter without the former.
1629  */
1630 
1631 /*
1632  * collation-aware, wide-character-aware lower function
1633  *
1634  * We pass the number of bytes so we can pass varlena and char*
1635  * to this function.  The result is a palloc'd, null-terminated string.
1636  */
1637 char *
str_tolower(const char * buff,size_t nbytes,Oid collid)1638 str_tolower(const char *buff, size_t nbytes, Oid collid)
1639 {
1640 	char	   *result;
1641 
1642 	if (!buff)
1643 		return NULL;
1644 
1645 	/* C/POSIX collations use this path regardless of database encoding */
1646 	if (lc_ctype_is_c(collid))
1647 	{
1648 		result = asc_tolower(buff, nbytes);
1649 	}
1650 	else
1651 	{
1652 		pg_locale_t mylocale = 0;
1653 
1654 		if (collid != DEFAULT_COLLATION_OID)
1655 		{
1656 			if (!OidIsValid(collid))
1657 			{
1658 				/*
1659 				 * This typically means that the parser could not resolve a
1660 				 * conflict of implicit collations, so report it that way.
1661 				 */
1662 				ereport(ERROR,
1663 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1664 						 errmsg("could not determine which collation to use for %s function",
1665 								"lower()"),
1666 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1667 			}
1668 			mylocale = pg_newlocale_from_collation(collid);
1669 		}
1670 
1671 #ifdef USE_ICU
1672 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1673 		{
1674 			int32_t		len_uchar;
1675 			int32_t		len_conv;
1676 			UChar	   *buff_uchar;
1677 			UChar	   *buff_conv;
1678 
1679 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1680 			len_conv = icu_convert_case(u_strToLower, mylocale,
1681 										&buff_conv, buff_uchar, len_uchar);
1682 			icu_from_uchar(&result, buff_conv, len_conv);
1683 			pfree(buff_uchar);
1684 			pfree(buff_conv);
1685 		}
1686 		else
1687 #endif
1688 		{
1689 			if (pg_database_encoding_max_length() > 1)
1690 			{
1691 				wchar_t    *workspace;
1692 				size_t		curr_char;
1693 				size_t		result_size;
1694 
1695 				/* Overflow paranoia */
1696 				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1697 					ereport(ERROR,
1698 							(errcode(ERRCODE_OUT_OF_MEMORY),
1699 							 errmsg("out of memory")));
1700 
1701 				/* Output workspace cannot have more codes than input bytes */
1702 				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1703 
1704 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1705 
1706 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1707 				{
1708 #ifdef HAVE_LOCALE_T
1709 					if (mylocale)
1710 						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1711 					else
1712 #endif
1713 						workspace[curr_char] = towlower(workspace[curr_char]);
1714 				}
1715 
1716 				/*
1717 				 * Make result large enough; case change might change number
1718 				 * of bytes
1719 				 */
1720 				result_size = curr_char * pg_database_encoding_max_length() + 1;
1721 				result = palloc(result_size);
1722 
1723 				wchar2char(result, workspace, result_size, mylocale);
1724 				pfree(workspace);
1725 			}
1726 			else
1727 			{
1728 				char	   *p;
1729 
1730 				result = pnstrdup(buff, nbytes);
1731 
1732 				/*
1733 				 * Note: we assume that tolower_l() will not be so broken as
1734 				 * to need an isupper_l() guard test.  When using the default
1735 				 * collation, we apply the traditional Postgres behavior that
1736 				 * forces ASCII-style treatment of I/i, but in non-default
1737 				 * collations you get exactly what the collation says.
1738 				 */
1739 				for (p = result; *p; p++)
1740 				{
1741 #ifdef HAVE_LOCALE_T
1742 					if (mylocale)
1743 						*p = tolower_l((unsigned char) *p, mylocale->info.lt);
1744 					else
1745 #endif
1746 						*p = pg_tolower((unsigned char) *p);
1747 				}
1748 			}
1749 		}
1750 	}
1751 
1752 	return result;
1753 }
1754 
1755 /*
1756  * collation-aware, wide-character-aware upper function
1757  *
1758  * We pass the number of bytes so we can pass varlena and char*
1759  * to this function.  The result is a palloc'd, null-terminated string.
1760  */
1761 char *
str_toupper(const char * buff,size_t nbytes,Oid collid)1762 str_toupper(const char *buff, size_t nbytes, Oid collid)
1763 {
1764 	char	   *result;
1765 
1766 	if (!buff)
1767 		return NULL;
1768 
1769 	/* C/POSIX collations use this path regardless of database encoding */
1770 	if (lc_ctype_is_c(collid))
1771 	{
1772 		result = asc_toupper(buff, nbytes);
1773 	}
1774 	else
1775 	{
1776 		pg_locale_t mylocale = 0;
1777 
1778 		if (collid != DEFAULT_COLLATION_OID)
1779 		{
1780 			if (!OidIsValid(collid))
1781 			{
1782 				/*
1783 				 * This typically means that the parser could not resolve a
1784 				 * conflict of implicit collations, so report it that way.
1785 				 */
1786 				ereport(ERROR,
1787 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1788 						 errmsg("could not determine which collation to use for %s function",
1789 								"upper()"),
1790 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1791 			}
1792 			mylocale = pg_newlocale_from_collation(collid);
1793 		}
1794 
1795 #ifdef USE_ICU
1796 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1797 		{
1798 			int32_t		len_uchar,
1799 						len_conv;
1800 			UChar	   *buff_uchar;
1801 			UChar	   *buff_conv;
1802 
1803 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1804 			len_conv = icu_convert_case(u_strToUpper, mylocale,
1805 										&buff_conv, buff_uchar, len_uchar);
1806 			icu_from_uchar(&result, buff_conv, len_conv);
1807 			pfree(buff_uchar);
1808 			pfree(buff_conv);
1809 		}
1810 		else
1811 #endif
1812 		{
1813 			if (pg_database_encoding_max_length() > 1)
1814 			{
1815 				wchar_t    *workspace;
1816 				size_t		curr_char;
1817 				size_t		result_size;
1818 
1819 				/* Overflow paranoia */
1820 				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1821 					ereport(ERROR,
1822 							(errcode(ERRCODE_OUT_OF_MEMORY),
1823 							 errmsg("out of memory")));
1824 
1825 				/* Output workspace cannot have more codes than input bytes */
1826 				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1827 
1828 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1829 
1830 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1831 				{
1832 #ifdef HAVE_LOCALE_T
1833 					if (mylocale)
1834 						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1835 					else
1836 #endif
1837 						workspace[curr_char] = towupper(workspace[curr_char]);
1838 				}
1839 
1840 				/*
1841 				 * Make result large enough; case change might change number
1842 				 * of bytes
1843 				 */
1844 				result_size = curr_char * pg_database_encoding_max_length() + 1;
1845 				result = palloc(result_size);
1846 
1847 				wchar2char(result, workspace, result_size, mylocale);
1848 				pfree(workspace);
1849 			}
1850 			else
1851 			{
1852 				char	   *p;
1853 
1854 				result = pnstrdup(buff, nbytes);
1855 
1856 				/*
1857 				 * Note: we assume that toupper_l() will not be so broken as
1858 				 * to need an islower_l() guard test.  When using the default
1859 				 * collation, we apply the traditional Postgres behavior that
1860 				 * forces ASCII-style treatment of I/i, but in non-default
1861 				 * collations you get exactly what the collation says.
1862 				 */
1863 				for (p = result; *p; p++)
1864 				{
1865 #ifdef HAVE_LOCALE_T
1866 					if (mylocale)
1867 						*p = toupper_l((unsigned char) *p, mylocale->info.lt);
1868 					else
1869 #endif
1870 						*p = pg_toupper((unsigned char) *p);
1871 				}
1872 			}
1873 		}
1874 	}
1875 
1876 	return result;
1877 }
1878 
1879 /*
1880  * collation-aware, wide-character-aware initcap function
1881  *
1882  * We pass the number of bytes so we can pass varlena and char*
1883  * to this function.  The result is a palloc'd, null-terminated string.
1884  */
1885 char *
str_initcap(const char * buff,size_t nbytes,Oid collid)1886 str_initcap(const char *buff, size_t nbytes, Oid collid)
1887 {
1888 	char	   *result;
1889 	int			wasalnum = false;
1890 
1891 	if (!buff)
1892 		return NULL;
1893 
1894 	/* C/POSIX collations use this path regardless of database encoding */
1895 	if (lc_ctype_is_c(collid))
1896 	{
1897 		result = asc_initcap(buff, nbytes);
1898 	}
1899 	else
1900 	{
1901 		pg_locale_t mylocale = 0;
1902 
1903 		if (collid != DEFAULT_COLLATION_OID)
1904 		{
1905 			if (!OidIsValid(collid))
1906 			{
1907 				/*
1908 				 * This typically means that the parser could not resolve a
1909 				 * conflict of implicit collations, so report it that way.
1910 				 */
1911 				ereport(ERROR,
1912 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1913 						 errmsg("could not determine which collation to use for %s function",
1914 								"initcap()"),
1915 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1916 			}
1917 			mylocale = pg_newlocale_from_collation(collid);
1918 		}
1919 
1920 #ifdef USE_ICU
1921 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1922 		{
1923 			int32_t		len_uchar,
1924 						len_conv;
1925 			UChar	   *buff_uchar;
1926 			UChar	   *buff_conv;
1927 
1928 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1929 			len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1930 										&buff_conv, buff_uchar, len_uchar);
1931 			icu_from_uchar(&result, buff_conv, len_conv);
1932 			pfree(buff_uchar);
1933 			pfree(buff_conv);
1934 		}
1935 		else
1936 #endif
1937 		{
1938 			if (pg_database_encoding_max_length() > 1)
1939 			{
1940 				wchar_t    *workspace;
1941 				size_t		curr_char;
1942 				size_t		result_size;
1943 
1944 				/* Overflow paranoia */
1945 				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1946 					ereport(ERROR,
1947 							(errcode(ERRCODE_OUT_OF_MEMORY),
1948 							 errmsg("out of memory")));
1949 
1950 				/* Output workspace cannot have more codes than input bytes */
1951 				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1952 
1953 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1954 
1955 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1956 				{
1957 #ifdef HAVE_LOCALE_T
1958 					if (mylocale)
1959 					{
1960 						if (wasalnum)
1961 							workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1962 						else
1963 							workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1964 						wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1965 					}
1966 					else
1967 #endif
1968 					{
1969 						if (wasalnum)
1970 							workspace[curr_char] = towlower(workspace[curr_char]);
1971 						else
1972 							workspace[curr_char] = towupper(workspace[curr_char]);
1973 						wasalnum = iswalnum(workspace[curr_char]);
1974 					}
1975 				}
1976 
1977 				/*
1978 				 * Make result large enough; case change might change number
1979 				 * of bytes
1980 				 */
1981 				result_size = curr_char * pg_database_encoding_max_length() + 1;
1982 				result = palloc(result_size);
1983 
1984 				wchar2char(result, workspace, result_size, mylocale);
1985 				pfree(workspace);
1986 			}
1987 			else
1988 			{
1989 				char	   *p;
1990 
1991 				result = pnstrdup(buff, nbytes);
1992 
1993 				/*
1994 				 * Note: we assume that toupper_l()/tolower_l() will not be so
1995 				 * broken as to need guard tests.  When using the default
1996 				 * collation, we apply the traditional Postgres behavior that
1997 				 * forces ASCII-style treatment of I/i, but in non-default
1998 				 * collations you get exactly what the collation says.
1999 				 */
2000 				for (p = result; *p; p++)
2001 				{
2002 #ifdef HAVE_LOCALE_T
2003 					if (mylocale)
2004 					{
2005 						if (wasalnum)
2006 							*p = tolower_l((unsigned char) *p, mylocale->info.lt);
2007 						else
2008 							*p = toupper_l((unsigned char) *p, mylocale->info.lt);
2009 						wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2010 					}
2011 					else
2012 #endif
2013 					{
2014 						if (wasalnum)
2015 							*p = pg_tolower((unsigned char) *p);
2016 						else
2017 							*p = pg_toupper((unsigned char) *p);
2018 						wasalnum = isalnum((unsigned char) *p);
2019 					}
2020 				}
2021 			}
2022 		}
2023 	}
2024 
2025 	return result;
2026 }
2027 
2028 /*
2029  * ASCII-only lower function
2030  *
2031  * We pass the number of bytes so we can pass varlena and char*
2032  * to this function.  The result is a palloc'd, null-terminated string.
2033  */
2034 char *
asc_tolower(const char * buff,size_t nbytes)2035 asc_tolower(const char *buff, size_t nbytes)
2036 {
2037 	char	   *result;
2038 	char	   *p;
2039 
2040 	if (!buff)
2041 		return NULL;
2042 
2043 	result = pnstrdup(buff, nbytes);
2044 
2045 	for (p = result; *p; p++)
2046 		*p = pg_ascii_tolower((unsigned char) *p);
2047 
2048 	return result;
2049 }
2050 
2051 /*
2052  * ASCII-only upper function
2053  *
2054  * We pass the number of bytes so we can pass varlena and char*
2055  * to this function.  The result is a palloc'd, null-terminated string.
2056  */
2057 char *
asc_toupper(const char * buff,size_t nbytes)2058 asc_toupper(const char *buff, size_t nbytes)
2059 {
2060 	char	   *result;
2061 	char	   *p;
2062 
2063 	if (!buff)
2064 		return NULL;
2065 
2066 	result = pnstrdup(buff, nbytes);
2067 
2068 	for (p = result; *p; p++)
2069 		*p = pg_ascii_toupper((unsigned char) *p);
2070 
2071 	return result;
2072 }
2073 
2074 /*
2075  * ASCII-only initcap function
2076  *
2077  * We pass the number of bytes so we can pass varlena and char*
2078  * to this function.  The result is a palloc'd, null-terminated string.
2079  */
2080 char *
asc_initcap(const char * buff,size_t nbytes)2081 asc_initcap(const char *buff, size_t nbytes)
2082 {
2083 	char	   *result;
2084 	char	   *p;
2085 	int			wasalnum = false;
2086 
2087 	if (!buff)
2088 		return NULL;
2089 
2090 	result = pnstrdup(buff, nbytes);
2091 
2092 	for (p = result; *p; p++)
2093 	{
2094 		char		c;
2095 
2096 		if (wasalnum)
2097 			*p = c = pg_ascii_tolower((unsigned char) *p);
2098 		else
2099 			*p = c = pg_ascii_toupper((unsigned char) *p);
2100 		/* we don't trust isalnum() here */
2101 		wasalnum = ((c >= 'A' && c <= 'Z') ||
2102 					(c >= 'a' && c <= 'z') ||
2103 					(c >= '0' && c <= '9'));
2104 	}
2105 
2106 	return result;
2107 }
2108 
2109 /* convenience routines for when the input is null-terminated */
2110 
2111 static char *
str_tolower_z(const char * buff,Oid collid)2112 str_tolower_z(const char *buff, Oid collid)
2113 {
2114 	return str_tolower(buff, strlen(buff), collid);
2115 }
2116 
2117 static char *
str_toupper_z(const char * buff,Oid collid)2118 str_toupper_z(const char *buff, Oid collid)
2119 {
2120 	return str_toupper(buff, strlen(buff), collid);
2121 }
2122 
2123 static char *
str_initcap_z(const char * buff,Oid collid)2124 str_initcap_z(const char *buff, Oid collid)
2125 {
2126 	return str_initcap(buff, strlen(buff), collid);
2127 }
2128 
2129 static char *
asc_tolower_z(const char * buff)2130 asc_tolower_z(const char *buff)
2131 {
2132 	return asc_tolower(buff, strlen(buff));
2133 }
2134 
2135 static char *
asc_toupper_z(const char * buff)2136 asc_toupper_z(const char *buff)
2137 {
2138 	return asc_toupper(buff, strlen(buff));
2139 }
2140 
2141 /* asc_initcap_z is not currently needed */
2142 
2143 
2144 /* ----------
2145  * Skip TM / th in FROM_CHAR
2146  *
2147  * If S_THth is on, skip two chars, assuming there are two available
2148  * ----------
2149  */
2150 #define SKIP_THth(ptr, _suf) \
2151 	do { \
2152 		if (S_THth(_suf)) \
2153 		{ \
2154 			if (*(ptr)) (ptr) += pg_mblen(ptr); \
2155 			if (*(ptr)) (ptr) += pg_mblen(ptr); \
2156 		} \
2157 	} while (0)
2158 
2159 
2160 #ifdef DEBUG_TO_FROM_CHAR
2161 /* -----------
2162  * DEBUG: Call for debug and for index checking; (Show ASCII char
2163  * and defined keyword for each used position
2164  * ----------
2165  */
2166 static void
dump_index(const KeyWord * k,const int * index)2167 dump_index(const KeyWord *k, const int *index)
2168 {
2169 	int			i,
2170 				count = 0,
2171 				free_i = 0;
2172 
2173 	elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2174 
2175 	for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2176 	{
2177 		if (index[i] != -1)
2178 		{
2179 			elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2180 			count++;
2181 		}
2182 		else
2183 		{
2184 			free_i++;
2185 			elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2186 		}
2187 	}
2188 	elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2189 		 count, free_i);
2190 }
2191 #endif							/* DEBUG */
2192 
2193 /* ----------
2194  * Return true if next format picture is not digit value
2195  * ----------
2196  */
2197 static bool
is_next_separator(FormatNode * n)2198 is_next_separator(FormatNode *n)
2199 {
2200 	if (n->type == NODE_TYPE_END)
2201 		return false;
2202 
2203 	if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2204 		return true;
2205 
2206 	/*
2207 	 * Next node
2208 	 */
2209 	n++;
2210 
2211 	/* end of format string is treated like a non-digit separator */
2212 	if (n->type == NODE_TYPE_END)
2213 		return true;
2214 
2215 	if (n->type == NODE_TYPE_ACTION)
2216 	{
2217 		if (n->key->is_digit)
2218 			return false;
2219 
2220 		return true;
2221 	}
2222 	else if (n->character[1] == '\0' &&
2223 			 isdigit((unsigned char) n->character[0]))
2224 		return false;
2225 
2226 	return true;				/* some non-digit input (separator) */
2227 }
2228 
2229 
2230 static int
adjust_partial_year_to_2020(int year)2231 adjust_partial_year_to_2020(int year)
2232 {
2233 	/*
2234 	 * Adjust all dates toward 2020; this is effectively what happens when we
2235 	 * assume '70' is 1970 and '69' is 2069.
2236 	 */
2237 	/* Force 0-69 into the 2000's */
2238 	if (year < 70)
2239 		return year + 2000;
2240 	/* Force 70-99 into the 1900's */
2241 	else if (year < 100)
2242 		return year + 1900;
2243 	/* Force 100-519 into the 2000's */
2244 	else if (year < 520)
2245 		return year + 2000;
2246 	/* Force 520-999 into the 1000's */
2247 	else if (year < 1000)
2248 		return year + 1000;
2249 	else
2250 		return year;
2251 }
2252 
2253 
2254 static int
strspace_len(const char * str)2255 strspace_len(const char *str)
2256 {
2257 	int			len = 0;
2258 
2259 	while (*str && isspace((unsigned char) *str))
2260 	{
2261 		str++;
2262 		len++;
2263 	}
2264 	return len;
2265 }
2266 
2267 /*
2268  * Set the date mode of a from-char conversion.
2269  *
2270  * Puke if the date mode has already been set, and the caller attempts to set
2271  * it to a conflicting mode.
2272  *
2273  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2274  */
2275 static void
from_char_set_mode(TmFromChar * tmfc,const FromCharDateMode mode,bool * have_error)2276 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2277 {
2278 	if (mode != FROM_CHAR_DATE_NONE)
2279 	{
2280 		if (tmfc->mode == FROM_CHAR_DATE_NONE)
2281 			tmfc->mode = mode;
2282 		else if (tmfc->mode != mode)
2283 			RETURN_ERROR(ereport(ERROR,
2284 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2285 								  errmsg("invalid combination of date conventions"),
2286 								  errhint("Do not mix Gregorian and ISO week date "
2287 										  "conventions in a formatting template."))));
2288 	}
2289 
2290 on_error:
2291 	return;
2292 }
2293 
2294 /*
2295  * Set the integer pointed to by 'dest' to the given value.
2296  *
2297  * Puke if the destination integer has previously been set to some other
2298  * non-zero value.
2299  *
2300  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2301  */
2302 static void
from_char_set_int(int * dest,const int value,const FormatNode * node,bool * have_error)2303 from_char_set_int(int *dest, const int value, const FormatNode *node,
2304 				  bool *have_error)
2305 {
2306 	if (*dest != 0 && *dest != value)
2307 		RETURN_ERROR(ereport(ERROR,
2308 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2309 							  errmsg("conflicting values for \"%s\" field in "
2310 									 "formatting string",
2311 									 node->key->name),
2312 							  errdetail("This value contradicts a previous setting "
2313 										"for the same field type."))));
2314 	*dest = value;
2315 
2316 on_error:
2317 	return;
2318 }
2319 
2320 /*
2321  * Read a single integer from the source string, into the int pointed to by
2322  * 'dest'. If 'dest' is NULL, the result is discarded.
2323  *
2324  * In fixed-width mode (the node does not have the FM suffix), consume at most
2325  * 'len' characters.  However, any leading whitespace isn't counted in 'len'.
2326  *
2327  * We use strtol() to recover the integer value from the source string, in
2328  * accordance with the given FormatNode.
2329  *
2330  * If the conversion completes successfully, src will have been advanced to
2331  * point at the character immediately following the last character used in the
2332  * conversion.
2333  *
2334  * Return the number of characters consumed.
2335  *
2336  * Note that from_char_parse_int() provides a more convenient wrapper where
2337  * the length of the field is the same as the length of the format keyword (as
2338  * with DD and MI).
2339  *
2340  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2341  * and -1 is returned.
2342  */
2343 static int
from_char_parse_int_len(int * dest,const char ** src,const int len,FormatNode * node,bool * have_error)2344 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2345 						bool *have_error)
2346 {
2347 	long		result;
2348 	char		copy[DCH_MAX_ITEM_SIZ + 1];
2349 	const char *init = *src;
2350 	int			used;
2351 
2352 	/*
2353 	 * Skip any whitespace before parsing the integer.
2354 	 */
2355 	*src += strspace_len(*src);
2356 
2357 	Assert(len <= DCH_MAX_ITEM_SIZ);
2358 	used = (int) strlcpy(copy, *src, len + 1);
2359 
2360 	if (S_FM(node->suffix) || is_next_separator(node))
2361 	{
2362 		/*
2363 		 * This node is in Fill Mode, or the next node is known to be a
2364 		 * non-digit value, so we just slurp as many characters as we can get.
2365 		 */
2366 		char	   *endptr;
2367 
2368 		errno = 0;
2369 		result = strtol(init, &endptr, 10);
2370 		*src = endptr;
2371 	}
2372 	else
2373 	{
2374 		/*
2375 		 * We need to pull exactly the number of characters given in 'len' out
2376 		 * of the string, and convert those.
2377 		 */
2378 		char	   *last;
2379 
2380 		if (used < len)
2381 			RETURN_ERROR(ereport(ERROR,
2382 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2383 								  errmsg("source string too short for \"%s\" "
2384 										 "formatting field",
2385 										 node->key->name),
2386 								  errdetail("Field requires %d characters, "
2387 											"but only %d remain.",
2388 											len, used),
2389 								  errhint("If your source string is not fixed-width, "
2390 										  "try using the \"FM\" modifier."))));
2391 
2392 		errno = 0;
2393 		result = strtol(copy, &last, 10);
2394 		used = last - copy;
2395 
2396 		if (used > 0 && used < len)
2397 			RETURN_ERROR(ereport(ERROR,
2398 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2399 								  errmsg("invalid value \"%s\" for \"%s\"",
2400 										 copy, node->key->name),
2401 								  errdetail("Field requires %d characters, "
2402 											"but only %d could be parsed.",
2403 											len, used),
2404 								  errhint("If your source string is not fixed-width, "
2405 										  "try using the \"FM\" modifier."))));
2406 
2407 		*src += used;
2408 	}
2409 
2410 	if (*src == init)
2411 		RETURN_ERROR(ereport(ERROR,
2412 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2413 							  errmsg("invalid value \"%s\" for \"%s\"",
2414 									 copy, node->key->name),
2415 							  errdetail("Value must be an integer."))));
2416 
2417 	if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2418 		RETURN_ERROR(ereport(ERROR,
2419 							 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2420 							  errmsg("value for \"%s\" in source string is out of range",
2421 									 node->key->name),
2422 							  errdetail("Value must be in the range %d to %d.",
2423 										INT_MIN, INT_MAX))));
2424 
2425 	if (dest != NULL)
2426 	{
2427 		from_char_set_int(dest, (int) result, node, have_error);
2428 		CHECK_ERROR;
2429 	}
2430 
2431 	return *src - init;
2432 
2433 on_error:
2434 	return -1;
2435 }
2436 
2437 /*
2438  * Call from_char_parse_int_len(), using the length of the format keyword as
2439  * the expected length of the field.
2440  *
2441  * Don't call this function if the field differs in length from the format
2442  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2443  * In such cases, call from_char_parse_int_len() instead to specify the
2444  * required length explicitly.
2445  */
2446 static int
from_char_parse_int(int * dest,const char ** src,FormatNode * node,bool * have_error)2447 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2448 {
2449 	return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2450 }
2451 
2452 /*
2453  * Sequentially search null-terminated "array" for a case-insensitive match
2454  * to the initial character(s) of "name".
2455  *
2456  * Returns array index of match, or -1 for no match.
2457  *
2458  * *len is set to the length of the match, or 0 for no match.
2459  *
2460  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2461  * suitable for comparisons to ASCII strings.
2462  */
2463 static int
seq_search_ascii(const char * name,const char * const * array,int * len)2464 seq_search_ascii(const char *name, const char *const *array, int *len)
2465 {
2466 	unsigned char firstc;
2467 	const char *const *a;
2468 
2469 	*len = 0;
2470 
2471 	/* empty string can't match anything */
2472 	if (!*name)
2473 		return -1;
2474 
2475 	/* we handle first char specially to gain some speed */
2476 	firstc = pg_ascii_tolower((unsigned char) *name);
2477 
2478 	for (a = array; *a != NULL; a++)
2479 	{
2480 		const char *p;
2481 		const char *n;
2482 
2483 		/* compare first chars */
2484 		if (pg_ascii_tolower((unsigned char) **a) != firstc)
2485 			continue;
2486 
2487 		/* compare rest of string */
2488 		for (p = *a + 1, n = name + 1;; p++, n++)
2489 		{
2490 			/* return success if we matched whole array entry */
2491 			if (*p == '\0')
2492 			{
2493 				*len = n - name;
2494 				return a - array;
2495 			}
2496 			/* else, must have another character in "name" ... */
2497 			if (*n == '\0')
2498 				break;
2499 			/* ... and it must match */
2500 			if (pg_ascii_tolower((unsigned char) *p) !=
2501 				pg_ascii_tolower((unsigned char) *n))
2502 				break;
2503 		}
2504 	}
2505 
2506 	return -1;
2507 }
2508 
2509 /*
2510  * Sequentially search an array of possibly non-English words for
2511  * a case-insensitive match to the initial character(s) of "name".
2512  *
2513  * This has the same API as seq_search_ascii(), but we use a more general
2514  * case-folding transformation to achieve case-insensitivity.  Case folding
2515  * is done per the rules of the collation identified by "collid".
2516  *
2517  * The array is treated as const, but we don't declare it that way because
2518  * the arrays exported by pg_locale.c aren't const.
2519  */
2520 static int
seq_search_localized(const char * name,char ** array,int * len,Oid collid)2521 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2522 {
2523 	char	  **a;
2524 	char	   *upper_name;
2525 	char	   *lower_name;
2526 
2527 	*len = 0;
2528 
2529 	/* empty string can't match anything */
2530 	if (!*name)
2531 		return -1;
2532 
2533 	/*
2534 	 * The case-folding processing done below is fairly expensive, so before
2535 	 * doing that, make a quick pass to see if there is an exact match.
2536 	 */
2537 	for (a = array; *a != NULL; a++)
2538 	{
2539 		int			element_len = strlen(*a);
2540 
2541 		if (strncmp(name, *a, element_len) == 0)
2542 		{
2543 			*len = element_len;
2544 			return a - array;
2545 		}
2546 	}
2547 
2548 	/*
2549 	 * Fold to upper case, then to lower case, so that we can match reliably
2550 	 * even in languages in which case conversions are not injective.
2551 	 */
2552 	upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2553 	lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2554 	pfree(upper_name);
2555 
2556 	for (a = array; *a != NULL; a++)
2557 	{
2558 		char	   *upper_element;
2559 		char	   *lower_element;
2560 		int			element_len;
2561 
2562 		/* Likewise upper/lower-case array element */
2563 		upper_element = str_toupper(*a, strlen(*a), collid);
2564 		lower_element = str_tolower(upper_element, strlen(upper_element),
2565 									collid);
2566 		pfree(upper_element);
2567 		element_len = strlen(lower_element);
2568 
2569 		/* Match? */
2570 		if (strncmp(lower_name, lower_element, element_len) == 0)
2571 		{
2572 			*len = element_len;
2573 			pfree(lower_element);
2574 			pfree(lower_name);
2575 			return a - array;
2576 		}
2577 		pfree(lower_element);
2578 	}
2579 
2580 	pfree(lower_name);
2581 	return -1;
2582 }
2583 
2584 /*
2585  * Perform a sequential search in 'array' (or 'localized_array', if that's
2586  * not NULL) for an entry matching the first character(s) of the 'src'
2587  * string case-insensitively.
2588  *
2589  * The 'array' is presumed to be English words (all-ASCII), but
2590  * if 'localized_array' is supplied, that might be non-English
2591  * so we need a more expensive case-folding transformation
2592  * (which will follow the rules of the collation 'collid').
2593  *
2594  * If a match is found, copy the array index of the match into the integer
2595  * pointed to by 'dest', advance 'src' to the end of the part of the string
2596  * which matched, and return the number of characters consumed.
2597  *
2598  * If the string doesn't match, throw an error if 'have_error' is NULL,
2599  * otherwise set '*have_error' and return -1.
2600  *
2601  * 'node' is used only for error reports: node->key->name identifies the
2602  * field type we were searching for.
2603  */
2604 static int
from_char_seq_search(int * dest,const char ** src,const char * const * array,char ** localized_array,Oid collid,FormatNode * node,bool * have_error)2605 from_char_seq_search(int *dest, const char **src, const char *const *array,
2606 					 char **localized_array, Oid collid,
2607 					 FormatNode *node, bool *have_error)
2608 {
2609 	int			len;
2610 
2611 	if (localized_array == NULL)
2612 		*dest = seq_search_ascii(*src, array, &len);
2613 	else
2614 		*dest = seq_search_localized(*src, localized_array, &len, collid);
2615 
2616 	if (len <= 0)
2617 	{
2618 		/*
2619 		 * In the error report, truncate the string at the next whitespace (if
2620 		 * any) to avoid including irrelevant data.
2621 		 */
2622 		char	   *copy = pstrdup(*src);
2623 		char	   *c;
2624 
2625 		for (c = copy; *c; c++)
2626 		{
2627 			if (scanner_isspace(*c))
2628 			{
2629 				*c = '\0';
2630 				break;
2631 			}
2632 		}
2633 
2634 		RETURN_ERROR(ereport(ERROR,
2635 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2636 							  errmsg("invalid value \"%s\" for \"%s\"",
2637 									 copy, node->key->name),
2638 							  errdetail("The given value did not match any of "
2639 										"the allowed values for this field."))));
2640 	}
2641 	*src += len;
2642 	return len;
2643 
2644 on_error:
2645 	return -1;
2646 }
2647 
2648 /* ----------
2649  * Process a TmToChar struct as denoted by a list of FormatNodes.
2650  * The formatted data is written to the string pointed to by 'out'.
2651  * ----------
2652  */
2653 static void
DCH_to_char(FormatNode * node,bool is_interval,TmToChar * in,char * out,Oid collid)2654 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2655 {
2656 	FormatNode *n;
2657 	char	   *s;
2658 	struct pg_tm *tm = &in->tm;
2659 	int			i;
2660 
2661 	/* cache localized days and months */
2662 	cache_locale_time();
2663 
2664 	s = out;
2665 	for (n = node; n->type != NODE_TYPE_END; n++)
2666 	{
2667 		if (n->type != NODE_TYPE_ACTION)
2668 		{
2669 			strcpy(s, n->character);
2670 			s += strlen(s);
2671 			continue;
2672 		}
2673 
2674 		switch (n->key->id)
2675 		{
2676 			case DCH_A_M:
2677 			case DCH_P_M:
2678 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2679 					   ? P_M_STR : A_M_STR);
2680 				s += strlen(s);
2681 				break;
2682 			case DCH_AM:
2683 			case DCH_PM:
2684 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2685 					   ? PM_STR : AM_STR);
2686 				s += strlen(s);
2687 				break;
2688 			case DCH_a_m:
2689 			case DCH_p_m:
2690 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2691 					   ? p_m_STR : a_m_STR);
2692 				s += strlen(s);
2693 				break;
2694 			case DCH_am:
2695 			case DCH_pm:
2696 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2697 					   ? pm_STR : am_STR);
2698 				s += strlen(s);
2699 				break;
2700 			case DCH_HH:
2701 			case DCH_HH12:
2702 
2703 				/*
2704 				 * display time as shown on a 12-hour clock, even for
2705 				 * intervals
2706 				 */
2707 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2708 						tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2709 						tm->tm_hour % (HOURS_PER_DAY / 2));
2710 				if (S_THth(n->suffix))
2711 					str_numth(s, s, S_TH_TYPE(n->suffix));
2712 				s += strlen(s);
2713 				break;
2714 			case DCH_HH24:
2715 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2716 						tm->tm_hour);
2717 				if (S_THth(n->suffix))
2718 					str_numth(s, s, S_TH_TYPE(n->suffix));
2719 				s += strlen(s);
2720 				break;
2721 			case DCH_MI:
2722 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2723 						tm->tm_min);
2724 				if (S_THth(n->suffix))
2725 					str_numth(s, s, S_TH_TYPE(n->suffix));
2726 				s += strlen(s);
2727 				break;
2728 			case DCH_SS:
2729 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2730 						tm->tm_sec);
2731 				if (S_THth(n->suffix))
2732 					str_numth(s, s, S_TH_TYPE(n->suffix));
2733 				s += strlen(s);
2734 				break;
2735 
2736 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2737 				sprintf(s, frac_fmt, (int) (frac_val)); \
2738 				if (S_THth(n->suffix)) \
2739 					str_numth(s, s, S_TH_TYPE(n->suffix)); \
2740 				s += strlen(s)
2741 
2742 			case DCH_FF1:		/* tenth of second */
2743 				DCH_to_char_fsec("%01d", in->fsec / 100000);
2744 				break;
2745 			case DCH_FF2:		/* hundredth of second */
2746 				DCH_to_char_fsec("%02d", in->fsec / 10000);
2747 				break;
2748 			case DCH_FF3:
2749 			case DCH_MS:		/* millisecond */
2750 				DCH_to_char_fsec("%03d", in->fsec / 1000);
2751 				break;
2752 			case DCH_FF4:		/* tenth of a millisecond */
2753 				DCH_to_char_fsec("%04d", in->fsec / 100);
2754 				break;
2755 			case DCH_FF5:		/* hundredth of a millisecond */
2756 				DCH_to_char_fsec("%05d", in->fsec / 10);
2757 				break;
2758 			case DCH_FF6:
2759 			case DCH_US:		/* microsecond */
2760 				DCH_to_char_fsec("%06d", in->fsec);
2761 				break;
2762 #undef DCH_to_char_fsec
2763 			case DCH_SSSS:
2764 				sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2765 						tm->tm_min * SECS_PER_MINUTE +
2766 						tm->tm_sec);
2767 				if (S_THth(n->suffix))
2768 					str_numth(s, s, S_TH_TYPE(n->suffix));
2769 				s += strlen(s);
2770 				break;
2771 			case DCH_tz:
2772 				INVALID_FOR_INTERVAL;
2773 				if (tmtcTzn(in))
2774 				{
2775 					/* We assume here that timezone names aren't localized */
2776 					char	   *p = asc_tolower_z(tmtcTzn(in));
2777 
2778 					strcpy(s, p);
2779 					pfree(p);
2780 					s += strlen(s);
2781 				}
2782 				break;
2783 			case DCH_TZ:
2784 				INVALID_FOR_INTERVAL;
2785 				if (tmtcTzn(in))
2786 				{
2787 					strcpy(s, tmtcTzn(in));
2788 					s += strlen(s);
2789 				}
2790 				break;
2791 			case DCH_TZH:
2792 				INVALID_FOR_INTERVAL;
2793 				sprintf(s, "%c%02d",
2794 						(tm->tm_gmtoff >= 0) ? '+' : '-',
2795 						abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2796 				s += strlen(s);
2797 				break;
2798 			case DCH_TZM:
2799 				INVALID_FOR_INTERVAL;
2800 				sprintf(s, "%02d",
2801 						(abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2802 				s += strlen(s);
2803 				break;
2804 			case DCH_OF:
2805 				INVALID_FOR_INTERVAL;
2806 				sprintf(s, "%c%0*d",
2807 						(tm->tm_gmtoff >= 0) ? '+' : '-',
2808 						S_FM(n->suffix) ? 0 : 2,
2809 						abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2810 				s += strlen(s);
2811 				if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2812 				{
2813 					sprintf(s, ":%02d",
2814 							(abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2815 					s += strlen(s);
2816 				}
2817 				break;
2818 			case DCH_A_D:
2819 			case DCH_B_C:
2820 				INVALID_FOR_INTERVAL;
2821 				strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2822 				s += strlen(s);
2823 				break;
2824 			case DCH_AD:
2825 			case DCH_BC:
2826 				INVALID_FOR_INTERVAL;
2827 				strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2828 				s += strlen(s);
2829 				break;
2830 			case DCH_a_d:
2831 			case DCH_b_c:
2832 				INVALID_FOR_INTERVAL;
2833 				strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2834 				s += strlen(s);
2835 				break;
2836 			case DCH_ad:
2837 			case DCH_bc:
2838 				INVALID_FOR_INTERVAL;
2839 				strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2840 				s += strlen(s);
2841 				break;
2842 			case DCH_MONTH:
2843 				INVALID_FOR_INTERVAL;
2844 				if (!tm->tm_mon)
2845 					break;
2846 				if (S_TM(n->suffix))
2847 				{
2848 					char	   *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2849 
2850 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2851 						strcpy(s, str);
2852 					else
2853 						ereport(ERROR,
2854 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2855 								 errmsg("localized string format value too long")));
2856 				}
2857 				else
2858 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2859 							asc_toupper_z(months_full[tm->tm_mon - 1]));
2860 				s += strlen(s);
2861 				break;
2862 			case DCH_Month:
2863 				INVALID_FOR_INTERVAL;
2864 				if (!tm->tm_mon)
2865 					break;
2866 				if (S_TM(n->suffix))
2867 				{
2868 					char	   *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2869 
2870 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2871 						strcpy(s, str);
2872 					else
2873 						ereport(ERROR,
2874 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2875 								 errmsg("localized string format value too long")));
2876 				}
2877 				else
2878 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2879 							months_full[tm->tm_mon - 1]);
2880 				s += strlen(s);
2881 				break;
2882 			case DCH_month:
2883 				INVALID_FOR_INTERVAL;
2884 				if (!tm->tm_mon)
2885 					break;
2886 				if (S_TM(n->suffix))
2887 				{
2888 					char	   *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2889 
2890 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2891 						strcpy(s, str);
2892 					else
2893 						ereport(ERROR,
2894 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2895 								 errmsg("localized string format value too long")));
2896 				}
2897 				else
2898 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2899 							asc_tolower_z(months_full[tm->tm_mon - 1]));
2900 				s += strlen(s);
2901 				break;
2902 			case DCH_MON:
2903 				INVALID_FOR_INTERVAL;
2904 				if (!tm->tm_mon)
2905 					break;
2906 				if (S_TM(n->suffix))
2907 				{
2908 					char	   *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2909 
2910 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2911 						strcpy(s, str);
2912 					else
2913 						ereport(ERROR,
2914 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2915 								 errmsg("localized string format value too long")));
2916 				}
2917 				else
2918 					strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2919 				s += strlen(s);
2920 				break;
2921 			case DCH_Mon:
2922 				INVALID_FOR_INTERVAL;
2923 				if (!tm->tm_mon)
2924 					break;
2925 				if (S_TM(n->suffix))
2926 				{
2927 					char	   *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2928 
2929 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2930 						strcpy(s, str);
2931 					else
2932 						ereport(ERROR,
2933 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2934 								 errmsg("localized string format value too long")));
2935 				}
2936 				else
2937 					strcpy(s, months[tm->tm_mon - 1]);
2938 				s += strlen(s);
2939 				break;
2940 			case DCH_mon:
2941 				INVALID_FOR_INTERVAL;
2942 				if (!tm->tm_mon)
2943 					break;
2944 				if (S_TM(n->suffix))
2945 				{
2946 					char	   *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2947 
2948 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2949 						strcpy(s, str);
2950 					else
2951 						ereport(ERROR,
2952 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2953 								 errmsg("localized string format value too long")));
2954 				}
2955 				else
2956 					strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2957 				s += strlen(s);
2958 				break;
2959 			case DCH_MM:
2960 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2961 						tm->tm_mon);
2962 				if (S_THth(n->suffix))
2963 					str_numth(s, s, S_TH_TYPE(n->suffix));
2964 				s += strlen(s);
2965 				break;
2966 			case DCH_DAY:
2967 				INVALID_FOR_INTERVAL;
2968 				if (S_TM(n->suffix))
2969 				{
2970 					char	   *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2971 
2972 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2973 						strcpy(s, str);
2974 					else
2975 						ereport(ERROR,
2976 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2977 								 errmsg("localized string format value too long")));
2978 				}
2979 				else
2980 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2981 							asc_toupper_z(days[tm->tm_wday]));
2982 				s += strlen(s);
2983 				break;
2984 			case DCH_Day:
2985 				INVALID_FOR_INTERVAL;
2986 				if (S_TM(n->suffix))
2987 				{
2988 					char	   *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2989 
2990 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2991 						strcpy(s, str);
2992 					else
2993 						ereport(ERROR,
2994 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2995 								 errmsg("localized string format value too long")));
2996 				}
2997 				else
2998 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2999 							days[tm->tm_wday]);
3000 				s += strlen(s);
3001 				break;
3002 			case DCH_day:
3003 				INVALID_FOR_INTERVAL;
3004 				if (S_TM(n->suffix))
3005 				{
3006 					char	   *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3007 
3008 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3009 						strcpy(s, str);
3010 					else
3011 						ereport(ERROR,
3012 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3013 								 errmsg("localized string format value too long")));
3014 				}
3015 				else
3016 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3017 							asc_tolower_z(days[tm->tm_wday]));
3018 				s += strlen(s);
3019 				break;
3020 			case DCH_DY:
3021 				INVALID_FOR_INTERVAL;
3022 				if (S_TM(n->suffix))
3023 				{
3024 					char	   *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3025 
3026 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3027 						strcpy(s, str);
3028 					else
3029 						ereport(ERROR,
3030 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3031 								 errmsg("localized string format value too long")));
3032 				}
3033 				else
3034 					strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3035 				s += strlen(s);
3036 				break;
3037 			case DCH_Dy:
3038 				INVALID_FOR_INTERVAL;
3039 				if (S_TM(n->suffix))
3040 				{
3041 					char	   *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3042 
3043 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3044 						strcpy(s, str);
3045 					else
3046 						ereport(ERROR,
3047 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3048 								 errmsg("localized string format value too long")));
3049 				}
3050 				else
3051 					strcpy(s, days_short[tm->tm_wday]);
3052 				s += strlen(s);
3053 				break;
3054 			case DCH_dy:
3055 				INVALID_FOR_INTERVAL;
3056 				if (S_TM(n->suffix))
3057 				{
3058 					char	   *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3059 
3060 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3061 						strcpy(s, str);
3062 					else
3063 						ereport(ERROR,
3064 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3065 								 errmsg("localized string format value too long")));
3066 				}
3067 				else
3068 					strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3069 				s += strlen(s);
3070 				break;
3071 			case DCH_DDD:
3072 			case DCH_IDDD:
3073 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3074 						(n->key->id == DCH_DDD) ?
3075 						tm->tm_yday :
3076 						date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3077 				if (S_THth(n->suffix))
3078 					str_numth(s, s, S_TH_TYPE(n->suffix));
3079 				s += strlen(s);
3080 				break;
3081 			case DCH_DD:
3082 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3083 				if (S_THth(n->suffix))
3084 					str_numth(s, s, S_TH_TYPE(n->suffix));
3085 				s += strlen(s);
3086 				break;
3087 			case DCH_D:
3088 				INVALID_FOR_INTERVAL;
3089 				sprintf(s, "%d", tm->tm_wday + 1);
3090 				if (S_THth(n->suffix))
3091 					str_numth(s, s, S_TH_TYPE(n->suffix));
3092 				s += strlen(s);
3093 				break;
3094 			case DCH_ID:
3095 				INVALID_FOR_INTERVAL;
3096 				sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3097 				if (S_THth(n->suffix))
3098 					str_numth(s, s, S_TH_TYPE(n->suffix));
3099 				s += strlen(s);
3100 				break;
3101 			case DCH_WW:
3102 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3103 						(tm->tm_yday - 1) / 7 + 1);
3104 				if (S_THth(n->suffix))
3105 					str_numth(s, s, S_TH_TYPE(n->suffix));
3106 				s += strlen(s);
3107 				break;
3108 			case DCH_IW:
3109 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3110 						date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3111 				if (S_THth(n->suffix))
3112 					str_numth(s, s, S_TH_TYPE(n->suffix));
3113 				s += strlen(s);
3114 				break;
3115 			case DCH_Q:
3116 				if (!tm->tm_mon)
3117 					break;
3118 				sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3119 				if (S_THth(n->suffix))
3120 					str_numth(s, s, S_TH_TYPE(n->suffix));
3121 				s += strlen(s);
3122 				break;
3123 			case DCH_CC:
3124 				if (is_interval)	/* straight calculation */
3125 					i = tm->tm_year / 100;
3126 				else
3127 				{
3128 					if (tm->tm_year > 0)
3129 						/* Century 20 == 1901 - 2000 */
3130 						i = (tm->tm_year - 1) / 100 + 1;
3131 					else
3132 						/* Century 6BC == 600BC - 501BC */
3133 						i = tm->tm_year / 100 - 1;
3134 				}
3135 				if (i <= 99 && i >= -99)
3136 					sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3137 				else
3138 					sprintf(s, "%d", i);
3139 				if (S_THth(n->suffix))
3140 					str_numth(s, s, S_TH_TYPE(n->suffix));
3141 				s += strlen(s);
3142 				break;
3143 			case DCH_Y_YYY:
3144 				i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3145 				sprintf(s, "%d,%03d", i,
3146 						ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3147 				if (S_THth(n->suffix))
3148 					str_numth(s, s, S_TH_TYPE(n->suffix));
3149 				s += strlen(s);
3150 				break;
3151 			case DCH_YYYY:
3152 			case DCH_IYYY:
3153 				sprintf(s, "%0*d",
3154 						S_FM(n->suffix) ? 0 :
3155 						(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3156 						(n->key->id == DCH_YYYY ?
3157 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3158 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3159 												  tm->tm_mon,
3160 												  tm->tm_mday),
3161 									 is_interval)));
3162 				if (S_THth(n->suffix))
3163 					str_numth(s, s, S_TH_TYPE(n->suffix));
3164 				s += strlen(s);
3165 				break;
3166 			case DCH_YYY:
3167 			case DCH_IYY:
3168 				sprintf(s, "%0*d",
3169 						S_FM(n->suffix) ? 0 :
3170 						(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3171 						(n->key->id == DCH_YYY ?
3172 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3173 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3174 												  tm->tm_mon,
3175 												  tm->tm_mday),
3176 									 is_interval)) % 1000);
3177 				if (S_THth(n->suffix))
3178 					str_numth(s, s, S_TH_TYPE(n->suffix));
3179 				s += strlen(s);
3180 				break;
3181 			case DCH_YY:
3182 			case DCH_IY:
3183 				sprintf(s, "%0*d",
3184 						S_FM(n->suffix) ? 0 :
3185 						(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3186 						(n->key->id == DCH_YY ?
3187 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3188 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3189 												  tm->tm_mon,
3190 												  tm->tm_mday),
3191 									 is_interval)) % 100);
3192 				if (S_THth(n->suffix))
3193 					str_numth(s, s, S_TH_TYPE(n->suffix));
3194 				s += strlen(s);
3195 				break;
3196 			case DCH_Y:
3197 			case DCH_I:
3198 				sprintf(s, "%1d",
3199 						(n->key->id == DCH_Y ?
3200 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3201 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3202 												  tm->tm_mon,
3203 												  tm->tm_mday),
3204 									 is_interval)) % 10);
3205 				if (S_THth(n->suffix))
3206 					str_numth(s, s, S_TH_TYPE(n->suffix));
3207 				s += strlen(s);
3208 				break;
3209 			case DCH_RM:
3210 				/* FALLTHROUGH */
3211 			case DCH_rm:
3212 
3213 				/*
3214 				 * For intervals, values like '12 month' will be reduced to 0
3215 				 * month and some years.  These should be processed.
3216 				 */
3217 				if (!tm->tm_mon && !tm->tm_year)
3218 					break;
3219 				else
3220 				{
3221 					int			mon = 0;
3222 					const char *const *months;
3223 
3224 					if (n->key->id == DCH_RM)
3225 						months = rm_months_upper;
3226 					else
3227 						months = rm_months_lower;
3228 
3229 					/*
3230 					 * Compute the position in the roman-numeral array.  Note
3231 					 * that the contents of the array are reversed, December
3232 					 * being first and January last.
3233 					 */
3234 					if (tm->tm_mon == 0)
3235 					{
3236 						/*
3237 						 * This case is special, and tracks the case of full
3238 						 * interval years.
3239 						 */
3240 						mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3241 					}
3242 					else if (tm->tm_mon < 0)
3243 					{
3244 						/*
3245 						 * Negative case.  In this case, the calculation is
3246 						 * reversed, where -1 means December, -2 November,
3247 						 * etc.
3248 						 */
3249 						mon = -1 * (tm->tm_mon + 1);
3250 					}
3251 					else
3252 					{
3253 						/*
3254 						 * Common case, with a strictly positive value.  The
3255 						 * position in the array matches with the value of
3256 						 * tm_mon.
3257 						 */
3258 						mon = MONTHS_PER_YEAR - tm->tm_mon;
3259 					}
3260 
3261 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3262 							months[mon]);
3263 					s += strlen(s);
3264 				}
3265 				break;
3266 			case DCH_W:
3267 				sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3268 				if (S_THth(n->suffix))
3269 					str_numth(s, s, S_TH_TYPE(n->suffix));
3270 				s += strlen(s);
3271 				break;
3272 			case DCH_J:
3273 				sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3274 				if (S_THth(n->suffix))
3275 					str_numth(s, s, S_TH_TYPE(n->suffix));
3276 				s += strlen(s);
3277 				break;
3278 		}
3279 	}
3280 
3281 	*s = '\0';
3282 }
3283 
3284 /*
3285  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3286  * The TmFromChar struct pointed to by 'out' is populated with the results.
3287  *
3288  * 'collid' identifies the collation to use, if needed.
3289  * 'std' specifies standard parsing mode.
3290  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3291  *
3292  * Note: we currently don't have any to_interval() function, so there
3293  * is no need here for INVALID_FOR_INTERVAL checks.
3294  */
3295 static void
DCH_from_char(FormatNode * node,const char * in,TmFromChar * out,Oid collid,bool std,bool * have_error)3296 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3297 			  Oid collid, bool std, bool *have_error)
3298 {
3299 	FormatNode *n;
3300 	const char *s;
3301 	int			len,
3302 				value;
3303 	bool		fx_mode = std;
3304 
3305 	/* number of extra skipped characters (more than given in format string) */
3306 	int			extra_skip = 0;
3307 
3308 	/* cache localized days and months */
3309 	cache_locale_time();
3310 
3311 	for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3312 	{
3313 		/*
3314 		 * Ignore spaces at the beginning of the string and before fields when
3315 		 * not in FX (fixed width) mode.
3316 		 */
3317 		if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3318 			(n->type == NODE_TYPE_ACTION || n == node))
3319 		{
3320 			while (*s != '\0' && isspace((unsigned char) *s))
3321 			{
3322 				s++;
3323 				extra_skip++;
3324 			}
3325 		}
3326 
3327 		if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3328 		{
3329 			if (std)
3330 			{
3331 				/*
3332 				 * Standard mode requires strict matching between format
3333 				 * string separators/spaces and input string.
3334 				 */
3335 				Assert(n->character[0] && !n->character[1]);
3336 
3337 				if (*s == n->character[0])
3338 					s++;
3339 				else
3340 					RETURN_ERROR(ereport(ERROR,
3341 										 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3342 										  errmsg("unmatched format separator \"%c\"",
3343 												 n->character[0]))));
3344 			}
3345 			else if (!fx_mode)
3346 			{
3347 				/*
3348 				 * In non FX (fixed format) mode one format string space or
3349 				 * separator match to one space or separator in input string.
3350 				 * Or match nothing if there is no space or separator in the
3351 				 * current position of input string.
3352 				 */
3353 				extra_skip--;
3354 				if (isspace((unsigned char) *s) || is_separator_char(s))
3355 				{
3356 					s++;
3357 					extra_skip++;
3358 				}
3359 			}
3360 			else
3361 			{
3362 				/*
3363 				 * In FX mode, on format string space or separator we consume
3364 				 * exactly one character from input string.  Notice we don't
3365 				 * insist that the consumed character match the format's
3366 				 * character.
3367 				 */
3368 				s += pg_mblen(s);
3369 			}
3370 			continue;
3371 		}
3372 		else if (n->type != NODE_TYPE_ACTION)
3373 		{
3374 			/*
3375 			 * Text character, so consume one character from input string.
3376 			 * Notice we don't insist that the consumed character match the
3377 			 * format's character.
3378 			 */
3379 			if (!fx_mode)
3380 			{
3381 				/*
3382 				 * In non FX mode we might have skipped some extra characters
3383 				 * (more than specified in format string) before.  In this
3384 				 * case we don't skip input string character, because it might
3385 				 * be part of field.
3386 				 */
3387 				if (extra_skip > 0)
3388 					extra_skip--;
3389 				else
3390 					s += pg_mblen(s);
3391 			}
3392 			else
3393 			{
3394 				int			chlen = pg_mblen(s);
3395 
3396 				/*
3397 				 * Standard mode requires strict match of format characters.
3398 				 */
3399 				if (std && n->type == NODE_TYPE_CHAR &&
3400 					strncmp(s, n->character, chlen) != 0)
3401 					RETURN_ERROR(ereport(ERROR,
3402 										 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3403 										  errmsg("unmatched format character \"%s\"",
3404 												 n->character))));
3405 
3406 				s += chlen;
3407 			}
3408 			continue;
3409 		}
3410 
3411 		from_char_set_mode(out, n->key->date_mode, have_error);
3412 		CHECK_ERROR;
3413 
3414 		switch (n->key->id)
3415 		{
3416 			case DCH_FX:
3417 				fx_mode = true;
3418 				break;
3419 			case DCH_A_M:
3420 			case DCH_P_M:
3421 			case DCH_a_m:
3422 			case DCH_p_m:
3423 				from_char_seq_search(&value, &s, ampm_strings_long,
3424 									 NULL, InvalidOid,
3425 									 n, have_error);
3426 				CHECK_ERROR;
3427 				from_char_set_int(&out->pm, value % 2, n, have_error);
3428 				CHECK_ERROR;
3429 				out->clock = CLOCK_12_HOUR;
3430 				break;
3431 			case DCH_AM:
3432 			case DCH_PM:
3433 			case DCH_am:
3434 			case DCH_pm:
3435 				from_char_seq_search(&value, &s, ampm_strings,
3436 									 NULL, InvalidOid,
3437 									 n, have_error);
3438 				CHECK_ERROR;
3439 				from_char_set_int(&out->pm, value % 2, n, have_error);
3440 				CHECK_ERROR;
3441 				out->clock = CLOCK_12_HOUR;
3442 				break;
3443 			case DCH_HH:
3444 			case DCH_HH12:
3445 				from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3446 				CHECK_ERROR;
3447 				out->clock = CLOCK_12_HOUR;
3448 				SKIP_THth(s, n->suffix);
3449 				break;
3450 			case DCH_HH24:
3451 				from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3452 				CHECK_ERROR;
3453 				SKIP_THth(s, n->suffix);
3454 				break;
3455 			case DCH_MI:
3456 				from_char_parse_int(&out->mi, &s, n, have_error);
3457 				CHECK_ERROR;
3458 				SKIP_THth(s, n->suffix);
3459 				break;
3460 			case DCH_SS:
3461 				from_char_parse_int(&out->ss, &s, n, have_error);
3462 				CHECK_ERROR;
3463 				SKIP_THth(s, n->suffix);
3464 				break;
3465 			case DCH_MS:		/* millisecond */
3466 				len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3467 				CHECK_ERROR;
3468 
3469 				/*
3470 				 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3471 				 */
3472 				out->ms *= len == 1 ? 100 :
3473 					len == 2 ? 10 : 1;
3474 
3475 				SKIP_THth(s, n->suffix);
3476 				break;
3477 			case DCH_FF1:
3478 			case DCH_FF2:
3479 			case DCH_FF3:
3480 			case DCH_FF4:
3481 			case DCH_FF5:
3482 			case DCH_FF6:
3483 				out->ff = n->key->id - DCH_FF1 + 1;
3484 				/* fall through */
3485 			case DCH_US:		/* microsecond */
3486 				len = from_char_parse_int_len(&out->us, &s,
3487 											  n->key->id == DCH_US ? 6 :
3488 											  out->ff, n, have_error);
3489 				CHECK_ERROR;
3490 
3491 				out->us *= len == 1 ? 100000 :
3492 					len == 2 ? 10000 :
3493 					len == 3 ? 1000 :
3494 					len == 4 ? 100 :
3495 					len == 5 ? 10 : 1;
3496 
3497 				SKIP_THth(s, n->suffix);
3498 				break;
3499 			case DCH_SSSS:
3500 				from_char_parse_int(&out->ssss, &s, n, have_error);
3501 				CHECK_ERROR;
3502 				SKIP_THth(s, n->suffix);
3503 				break;
3504 			case DCH_tz:
3505 			case DCH_TZ:
3506 			case DCH_OF:
3507 				RETURN_ERROR(ereport(ERROR,
3508 									 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3509 									  errmsg("formatting field \"%s\" is only supported in to_char",
3510 											 n->key->name))));
3511 				CHECK_ERROR;
3512 				break;
3513 			case DCH_TZH:
3514 
3515 				/*
3516 				 * Value of TZH might be negative.  And the issue is that we
3517 				 * might swallow minus sign as the separator.  So, if we have
3518 				 * skipped more characters than specified in the format
3519 				 * string, then we consider prepending last skipped minus to
3520 				 * TZH.
3521 				 */
3522 				if (*s == '+' || *s == '-' || *s == ' ')
3523 				{
3524 					out->tzsign = *s == '-' ? -1 : +1;
3525 					s++;
3526 				}
3527 				else
3528 				{
3529 					if (extra_skip > 0 && *(s - 1) == '-')
3530 						out->tzsign = -1;
3531 					else
3532 						out->tzsign = +1;
3533 				}
3534 
3535 				from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3536 				CHECK_ERROR;
3537 				break;
3538 			case DCH_TZM:
3539 				/* assign positive timezone sign if TZH was not seen before */
3540 				if (!out->tzsign)
3541 					out->tzsign = +1;
3542 				from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3543 				CHECK_ERROR;
3544 				break;
3545 			case DCH_A_D:
3546 			case DCH_B_C:
3547 			case DCH_a_d:
3548 			case DCH_b_c:
3549 				from_char_seq_search(&value, &s, adbc_strings_long,
3550 									 NULL, InvalidOid,
3551 									 n, have_error);
3552 				CHECK_ERROR;
3553 				from_char_set_int(&out->bc, value % 2, n, have_error);
3554 				CHECK_ERROR;
3555 				break;
3556 			case DCH_AD:
3557 			case DCH_BC:
3558 			case DCH_ad:
3559 			case DCH_bc:
3560 				from_char_seq_search(&value, &s, adbc_strings,
3561 									 NULL, InvalidOid,
3562 									 n, have_error);
3563 				CHECK_ERROR;
3564 				from_char_set_int(&out->bc, value % 2, n, have_error);
3565 				CHECK_ERROR;
3566 				break;
3567 			case DCH_MONTH:
3568 			case DCH_Month:
3569 			case DCH_month:
3570 				from_char_seq_search(&value, &s, months_full,
3571 									 S_TM(n->suffix) ? localized_full_months : NULL,
3572 									 collid,
3573 									 n, have_error);
3574 				CHECK_ERROR;
3575 				from_char_set_int(&out->mm, value + 1, n, have_error);
3576 				CHECK_ERROR;
3577 				break;
3578 			case DCH_MON:
3579 			case DCH_Mon:
3580 			case DCH_mon:
3581 				from_char_seq_search(&value, &s, months,
3582 									 S_TM(n->suffix) ? localized_abbrev_months : NULL,
3583 									 collid,
3584 									 n, have_error);
3585 				CHECK_ERROR;
3586 				from_char_set_int(&out->mm, value + 1, n, have_error);
3587 				CHECK_ERROR;
3588 				break;
3589 			case DCH_MM:
3590 				from_char_parse_int(&out->mm, &s, n, have_error);
3591 				CHECK_ERROR;
3592 				SKIP_THth(s, n->suffix);
3593 				break;
3594 			case DCH_DAY:
3595 			case DCH_Day:
3596 			case DCH_day:
3597 				from_char_seq_search(&value, &s, days,
3598 									 S_TM(n->suffix) ? localized_full_days : NULL,
3599 									 collid,
3600 									 n, have_error);
3601 				CHECK_ERROR;
3602 				from_char_set_int(&out->d, value, n, have_error);
3603 				CHECK_ERROR;
3604 				out->d++;
3605 				break;
3606 			case DCH_DY:
3607 			case DCH_Dy:
3608 			case DCH_dy:
3609 				from_char_seq_search(&value, &s, days_short,
3610 									 S_TM(n->suffix) ? localized_abbrev_days : NULL,
3611 									 collid,
3612 									 n, have_error);
3613 				CHECK_ERROR;
3614 				from_char_set_int(&out->d, value, n, have_error);
3615 				CHECK_ERROR;
3616 				out->d++;
3617 				break;
3618 			case DCH_DDD:
3619 				from_char_parse_int(&out->ddd, &s, n, have_error);
3620 				CHECK_ERROR;
3621 				SKIP_THth(s, n->suffix);
3622 				break;
3623 			case DCH_IDDD:
3624 				from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3625 				CHECK_ERROR;
3626 				SKIP_THth(s, n->suffix);
3627 				break;
3628 			case DCH_DD:
3629 				from_char_parse_int(&out->dd, &s, n, have_error);
3630 				CHECK_ERROR;
3631 				SKIP_THth(s, n->suffix);
3632 				break;
3633 			case DCH_D:
3634 				from_char_parse_int(&out->d, &s, n, have_error);
3635 				CHECK_ERROR;
3636 				SKIP_THth(s, n->suffix);
3637 				break;
3638 			case DCH_ID:
3639 				from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3640 				CHECK_ERROR;
3641 				/* Shift numbering to match Gregorian where Sunday = 1 */
3642 				if (++out->d > 7)
3643 					out->d = 1;
3644 				SKIP_THth(s, n->suffix);
3645 				break;
3646 			case DCH_WW:
3647 			case DCH_IW:
3648 				from_char_parse_int(&out->ww, &s, n, have_error);
3649 				CHECK_ERROR;
3650 				SKIP_THth(s, n->suffix);
3651 				break;
3652 			case DCH_Q:
3653 
3654 				/*
3655 				 * We ignore 'Q' when converting to date because it is unclear
3656 				 * which date in the quarter to use, and some people specify
3657 				 * both quarter and month, so if it was honored it might
3658 				 * conflict with the supplied month. That is also why we don't
3659 				 * throw an error.
3660 				 *
3661 				 * We still parse the source string for an integer, but it
3662 				 * isn't stored anywhere in 'out'.
3663 				 */
3664 				from_char_parse_int((int *) NULL, &s, n, have_error);
3665 				CHECK_ERROR;
3666 				SKIP_THth(s, n->suffix);
3667 				break;
3668 			case DCH_CC:
3669 				from_char_parse_int(&out->cc, &s, n, have_error);
3670 				CHECK_ERROR;
3671 				SKIP_THth(s, n->suffix);
3672 				break;
3673 			case DCH_Y_YYY:
3674 				{
3675 					int			matched,
3676 								years,
3677 								millennia,
3678 								nch;
3679 
3680 					matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3681 					if (matched < 2)
3682 						RETURN_ERROR(ereport(ERROR,
3683 											 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3684 											  errmsg("invalid input string for \"Y,YYY\""))));
3685 					years += (millennia * 1000);
3686 					from_char_set_int(&out->year, years, n, have_error);
3687 					CHECK_ERROR;
3688 					out->yysz = 4;
3689 					s += nch;
3690 					SKIP_THth(s, n->suffix);
3691 				}
3692 				break;
3693 			case DCH_YYYY:
3694 			case DCH_IYYY:
3695 				from_char_parse_int(&out->year, &s, n, have_error);
3696 				CHECK_ERROR;
3697 				out->yysz = 4;
3698 				SKIP_THth(s, n->suffix);
3699 				break;
3700 			case DCH_YYY:
3701 			case DCH_IYY:
3702 				len = from_char_parse_int(&out->year, &s, n, have_error);
3703 				CHECK_ERROR;
3704 				if (len < 4)
3705 					out->year = adjust_partial_year_to_2020(out->year);
3706 				out->yysz = 3;
3707 				SKIP_THth(s, n->suffix);
3708 				break;
3709 			case DCH_YY:
3710 			case DCH_IY:
3711 				len = from_char_parse_int(&out->year, &s, n, have_error);
3712 				CHECK_ERROR;
3713 				if (len < 4)
3714 					out->year = adjust_partial_year_to_2020(out->year);
3715 				out->yysz = 2;
3716 				SKIP_THth(s, n->suffix);
3717 				break;
3718 			case DCH_Y:
3719 			case DCH_I:
3720 				len = from_char_parse_int(&out->year, &s, n, have_error);
3721 				CHECK_ERROR;
3722 				if (len < 4)
3723 					out->year = adjust_partial_year_to_2020(out->year);
3724 				out->yysz = 1;
3725 				SKIP_THth(s, n->suffix);
3726 				break;
3727 			case DCH_RM:
3728 			case DCH_rm:
3729 				from_char_seq_search(&value, &s, rm_months_lower,
3730 									 NULL, InvalidOid,
3731 									 n, have_error);
3732 				CHECK_ERROR;
3733 				from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3734 								  n, have_error);
3735 				CHECK_ERROR;
3736 				break;
3737 			case DCH_W:
3738 				from_char_parse_int(&out->w, &s, n, have_error);
3739 				CHECK_ERROR;
3740 				SKIP_THth(s, n->suffix);
3741 				break;
3742 			case DCH_J:
3743 				from_char_parse_int(&out->j, &s, n, have_error);
3744 				CHECK_ERROR;
3745 				SKIP_THth(s, n->suffix);
3746 				break;
3747 		}
3748 
3749 		/* Ignore all spaces after fields */
3750 		if (!fx_mode)
3751 		{
3752 			extra_skip = 0;
3753 			while (*s != '\0' && isspace((unsigned char) *s))
3754 			{
3755 				s++;
3756 				extra_skip++;
3757 			}
3758 		}
3759 	}
3760 
3761 	/*
3762 	 * Standard parsing mode doesn't allow unmatched format patterns or
3763 	 * trailing characters in the input string.
3764 	 */
3765 	if (std)
3766 	{
3767 		if (n->type != NODE_TYPE_END)
3768 			RETURN_ERROR(ereport(ERROR,
3769 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3770 								  errmsg("input string is too short for datetime format"))));
3771 
3772 		while (*s != '\0' && isspace((unsigned char) *s))
3773 			s++;
3774 
3775 		if (*s != '\0')
3776 			RETURN_ERROR(ereport(ERROR,
3777 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3778 								  errmsg("trailing characters remain in input string "
3779 										 "after datetime format"))));
3780 	}
3781 
3782 on_error:
3783 	return;
3784 }
3785 
3786 /*
3787  * The invariant for DCH cache entry management is that DCHCounter is equal
3788  * to the maximum age value among the existing entries, and we increment it
3789  * whenever an access occurs.  If we approach overflow, deal with that by
3790  * halving all the age values, so that we retain a fairly accurate idea of
3791  * which entries are oldest.
3792  */
3793 static inline void
DCH_prevent_counter_overflow(void)3794 DCH_prevent_counter_overflow(void)
3795 {
3796 	if (DCHCounter >= (INT_MAX - 1))
3797 	{
3798 		for (int i = 0; i < n_DCHCache; i++)
3799 			DCHCache[i]->age >>= 1;
3800 		DCHCounter >>= 1;
3801 	}
3802 }
3803 
3804 /*
3805  * Get mask of date/time/zone components present in format nodes.
3806  *
3807  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3808  */
3809 static int
DCH_datetime_type(FormatNode * node,bool * have_error)3810 DCH_datetime_type(FormatNode *node, bool *have_error)
3811 {
3812 	FormatNode *n;
3813 	int			flags = 0;
3814 
3815 	for (n = node; n->type != NODE_TYPE_END; n++)
3816 	{
3817 		if (n->type != NODE_TYPE_ACTION)
3818 			continue;
3819 
3820 		switch (n->key->id)
3821 		{
3822 			case DCH_FX:
3823 				break;
3824 			case DCH_A_M:
3825 			case DCH_P_M:
3826 			case DCH_a_m:
3827 			case DCH_p_m:
3828 			case DCH_AM:
3829 			case DCH_PM:
3830 			case DCH_am:
3831 			case DCH_pm:
3832 			case DCH_HH:
3833 			case DCH_HH12:
3834 			case DCH_HH24:
3835 			case DCH_MI:
3836 			case DCH_SS:
3837 			case DCH_MS:		/* millisecond */
3838 			case DCH_US:		/* microsecond */
3839 			case DCH_FF1:
3840 			case DCH_FF2:
3841 			case DCH_FF3:
3842 			case DCH_FF4:
3843 			case DCH_FF5:
3844 			case DCH_FF6:
3845 			case DCH_SSSS:
3846 				flags |= DCH_TIMED;
3847 				break;
3848 			case DCH_tz:
3849 			case DCH_TZ:
3850 			case DCH_OF:
3851 				RETURN_ERROR(ereport(ERROR,
3852 									 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3853 									  errmsg("formatting field \"%s\" is only supported in to_char",
3854 											 n->key->name))));
3855 				flags |= DCH_ZONED;
3856 				break;
3857 			case DCH_TZH:
3858 			case DCH_TZM:
3859 				flags |= DCH_ZONED;
3860 				break;
3861 			case DCH_A_D:
3862 			case DCH_B_C:
3863 			case DCH_a_d:
3864 			case DCH_b_c:
3865 			case DCH_AD:
3866 			case DCH_BC:
3867 			case DCH_ad:
3868 			case DCH_bc:
3869 			case DCH_MONTH:
3870 			case DCH_Month:
3871 			case DCH_month:
3872 			case DCH_MON:
3873 			case DCH_Mon:
3874 			case DCH_mon:
3875 			case DCH_MM:
3876 			case DCH_DAY:
3877 			case DCH_Day:
3878 			case DCH_day:
3879 			case DCH_DY:
3880 			case DCH_Dy:
3881 			case DCH_dy:
3882 			case DCH_DDD:
3883 			case DCH_IDDD:
3884 			case DCH_DD:
3885 			case DCH_D:
3886 			case DCH_ID:
3887 			case DCH_WW:
3888 			case DCH_Q:
3889 			case DCH_CC:
3890 			case DCH_Y_YYY:
3891 			case DCH_YYYY:
3892 			case DCH_IYYY:
3893 			case DCH_YYY:
3894 			case DCH_IYY:
3895 			case DCH_YY:
3896 			case DCH_IY:
3897 			case DCH_Y:
3898 			case DCH_I:
3899 			case DCH_RM:
3900 			case DCH_rm:
3901 			case DCH_W:
3902 			case DCH_J:
3903 				flags |= DCH_DATED;
3904 				break;
3905 		}
3906 	}
3907 
3908 on_error:
3909 	return flags;
3910 }
3911 
3912 /* select a DCHCacheEntry to hold the given format picture */
3913 static DCHCacheEntry *
DCH_cache_getnew(const char * str,bool std)3914 DCH_cache_getnew(const char *str, bool std)
3915 {
3916 	DCHCacheEntry *ent;
3917 
3918 	/* Ensure we can advance DCHCounter below */
3919 	DCH_prevent_counter_overflow();
3920 
3921 	/*
3922 	 * If cache is full, remove oldest entry (or recycle first not-valid one)
3923 	 */
3924 	if (n_DCHCache >= DCH_CACHE_ENTRIES)
3925 	{
3926 		DCHCacheEntry *old = DCHCache[0];
3927 
3928 #ifdef DEBUG_TO_FROM_CHAR
3929 		elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3930 #endif
3931 		if (old->valid)
3932 		{
3933 			for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3934 			{
3935 				ent = DCHCache[i];
3936 				if (!ent->valid)
3937 				{
3938 					old = ent;
3939 					break;
3940 				}
3941 				if (ent->age < old->age)
3942 					old = ent;
3943 			}
3944 		}
3945 #ifdef DEBUG_TO_FROM_CHAR
3946 		elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3947 #endif
3948 		old->valid = false;
3949 		strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
3950 		old->age = (++DCHCounter);
3951 		/* caller is expected to fill format, then set valid */
3952 		return old;
3953 	}
3954 	else
3955 	{
3956 #ifdef DEBUG_TO_FROM_CHAR
3957 		elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3958 #endif
3959 		Assert(DCHCache[n_DCHCache] == NULL);
3960 		DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3961 			MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
3962 		ent->valid = false;
3963 		strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
3964 		ent->std = std;
3965 		ent->age = (++DCHCounter);
3966 		/* caller is expected to fill format, then set valid */
3967 		++n_DCHCache;
3968 		return ent;
3969 	}
3970 }
3971 
3972 /* look for an existing DCHCacheEntry matching the given format picture */
3973 static DCHCacheEntry *
DCH_cache_search(const char * str,bool std)3974 DCH_cache_search(const char *str, bool std)
3975 {
3976 	/* Ensure we can advance DCHCounter below */
3977 	DCH_prevent_counter_overflow();
3978 
3979 	for (int i = 0; i < n_DCHCache; i++)
3980 	{
3981 		DCHCacheEntry *ent = DCHCache[i];
3982 
3983 		if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3984 		{
3985 			ent->age = (++DCHCounter);
3986 			return ent;
3987 		}
3988 	}
3989 
3990 	return NULL;
3991 }
3992 
3993 /* Find or create a DCHCacheEntry for the given format picture */
3994 static DCHCacheEntry *
DCH_cache_fetch(const char * str,bool std)3995 DCH_cache_fetch(const char *str, bool std)
3996 {
3997 	DCHCacheEntry *ent;
3998 
3999 	if ((ent = DCH_cache_search(str, std)) == NULL)
4000 	{
4001 		/*
4002 		 * Not in the cache, must run parser and save a new format-picture to
4003 		 * the cache.  Do not mark the cache entry valid until parsing
4004 		 * succeeds.
4005 		 */
4006 		ent = DCH_cache_getnew(str, std);
4007 
4008 		parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4009 					 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4010 
4011 		ent->valid = true;
4012 	}
4013 	return ent;
4014 }
4015 
4016 /*
4017  * Format a date/time or interval into a string according to fmt.
4018  * We parse fmt into a list of FormatNodes.  This is then passed to DCH_to_char
4019  * for formatting.
4020  */
4021 static text *
datetime_to_char_body(TmToChar * tmtc,text * fmt,bool is_interval,Oid collid)4022 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4023 {
4024 	FormatNode *format;
4025 	char	   *fmt_str,
4026 			   *result;
4027 	bool		incache;
4028 	int			fmt_len;
4029 	text	   *res;
4030 
4031 	/*
4032 	 * Convert fmt to C string
4033 	 */
4034 	fmt_str = text_to_cstring(fmt);
4035 	fmt_len = strlen(fmt_str);
4036 
4037 	/*
4038 	 * Allocate workspace for result as C string
4039 	 */
4040 	result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4041 	*result = '\0';
4042 
4043 	if (fmt_len > DCH_CACHE_SIZE)
4044 	{
4045 		/*
4046 		 * Allocate new memory if format picture is bigger than static cache
4047 		 * and do not use cache (call parser always)
4048 		 */
4049 		incache = false;
4050 
4051 		format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4052 
4053 		parse_format(format, fmt_str, DCH_keywords,
4054 					 DCH_suff, DCH_index, DCH_FLAG, NULL);
4055 	}
4056 	else
4057 	{
4058 		/*
4059 		 * Use cache buffers
4060 		 */
4061 		DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4062 
4063 		incache = true;
4064 		format = ent->format;
4065 	}
4066 
4067 	/* The real work is here */
4068 	DCH_to_char(format, is_interval, tmtc, result, collid);
4069 
4070 	if (!incache)
4071 		pfree(format);
4072 
4073 	pfree(fmt_str);
4074 
4075 	/* convert C-string result to TEXT format */
4076 	res = cstring_to_text(result);
4077 
4078 	pfree(result);
4079 	return res;
4080 }
4081 
4082 /****************************************************************************
4083  *				Public routines
4084  ***************************************************************************/
4085 
4086 /* -------------------
4087  * TIMESTAMP to_char()
4088  * -------------------
4089  */
4090 Datum
timestamp_to_char(PG_FUNCTION_ARGS)4091 timestamp_to_char(PG_FUNCTION_ARGS)
4092 {
4093 	Timestamp	dt = PG_GETARG_TIMESTAMP(0);
4094 	text	   *fmt = PG_GETARG_TEXT_PP(1),
4095 			   *res;
4096 	TmToChar	tmtc;
4097 	struct pg_tm *tm;
4098 	int			thisdate;
4099 
4100 	if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4101 		PG_RETURN_NULL();
4102 
4103 	ZERO_tmtc(&tmtc);
4104 	tm = tmtcTm(&tmtc);
4105 
4106 	if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4107 		ereport(ERROR,
4108 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4109 				 errmsg("timestamp out of range")));
4110 
4111 	thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4112 	tm->tm_wday = (thisdate + 1) % 7;
4113 	tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4114 
4115 	if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4116 		PG_RETURN_NULL();
4117 
4118 	PG_RETURN_TEXT_P(res);
4119 }
4120 
4121 Datum
timestamptz_to_char(PG_FUNCTION_ARGS)4122 timestamptz_to_char(PG_FUNCTION_ARGS)
4123 {
4124 	TimestampTz dt = PG_GETARG_TIMESTAMP(0);
4125 	text	   *fmt = PG_GETARG_TEXT_PP(1),
4126 			   *res;
4127 	TmToChar	tmtc;
4128 	int			tz;
4129 	struct pg_tm *tm;
4130 	int			thisdate;
4131 
4132 	if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4133 		PG_RETURN_NULL();
4134 
4135 	ZERO_tmtc(&tmtc);
4136 	tm = tmtcTm(&tmtc);
4137 
4138 	if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4139 		ereport(ERROR,
4140 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4141 				 errmsg("timestamp out of range")));
4142 
4143 	thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4144 	tm->tm_wday = (thisdate + 1) % 7;
4145 	tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4146 
4147 	if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4148 		PG_RETURN_NULL();
4149 
4150 	PG_RETURN_TEXT_P(res);
4151 }
4152 
4153 
4154 /* -------------------
4155  * INTERVAL to_char()
4156  * -------------------
4157  */
4158 Datum
interval_to_char(PG_FUNCTION_ARGS)4159 interval_to_char(PG_FUNCTION_ARGS)
4160 {
4161 	Interval   *it = PG_GETARG_INTERVAL_P(0);
4162 	text	   *fmt = PG_GETARG_TEXT_PP(1),
4163 			   *res;
4164 	TmToChar	tmtc;
4165 	struct pg_tm *tm;
4166 
4167 	if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4168 		PG_RETURN_NULL();
4169 
4170 	ZERO_tmtc(&tmtc);
4171 	tm = tmtcTm(&tmtc);
4172 
4173 	if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4174 		PG_RETURN_NULL();
4175 
4176 	/* wday is meaningless, yday approximates the total span in days */
4177 	tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4178 
4179 	if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4180 		PG_RETURN_NULL();
4181 
4182 	PG_RETURN_TEXT_P(res);
4183 }
4184 
4185 /* ---------------------
4186  * TO_TIMESTAMP()
4187  *
4188  * Make Timestamp from date_str which is formatted at argument 'fmt'
4189  * ( to_timestamp is reverse to_char() )
4190  * ---------------------
4191  */
4192 Datum
to_timestamp(PG_FUNCTION_ARGS)4193 to_timestamp(PG_FUNCTION_ARGS)
4194 {
4195 	text	   *date_txt = PG_GETARG_TEXT_PP(0);
4196 	text	   *fmt = PG_GETARG_TEXT_PP(1);
4197 	Oid			collid = PG_GET_COLLATION();
4198 	Timestamp	result;
4199 	int			tz;
4200 	struct pg_tm tm;
4201 	fsec_t		fsec;
4202 	int			fprec;
4203 
4204 	do_to_timestamp(date_txt, fmt, collid, false,
4205 					&tm, &fsec, &fprec, NULL, NULL);
4206 
4207 	/* Use the specified time zone, if any. */
4208 	if (tm.tm_zone)
4209 	{
4210 		int			dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4211 
4212 		if (dterr)
4213 			DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4214 	}
4215 	else
4216 		tz = DetermineTimeZoneOffset(&tm, session_timezone);
4217 
4218 	if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4219 		ereport(ERROR,
4220 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4221 				 errmsg("timestamp out of range")));
4222 
4223 	/* Use the specified fractional precision, if any. */
4224 	if (fprec)
4225 		AdjustTimestampForTypmod(&result, fprec);
4226 
4227 	PG_RETURN_TIMESTAMP(result);
4228 }
4229 
4230 /* ----------
4231  * TO_DATE
4232  *	Make Date from date_str which is formatted at argument 'fmt'
4233  * ----------
4234  */
4235 Datum
to_date(PG_FUNCTION_ARGS)4236 to_date(PG_FUNCTION_ARGS)
4237 {
4238 	text	   *date_txt = PG_GETARG_TEXT_PP(0);
4239 	text	   *fmt = PG_GETARG_TEXT_PP(1);
4240 	Oid			collid = PG_GET_COLLATION();
4241 	DateADT		result;
4242 	struct pg_tm tm;
4243 	fsec_t		fsec;
4244 
4245 	do_to_timestamp(date_txt, fmt, collid, false,
4246 					&tm, &fsec, NULL, NULL, NULL);
4247 
4248 	/* Prevent overflow in Julian-day routines */
4249 	if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4250 		ereport(ERROR,
4251 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4252 				 errmsg("date out of range: \"%s\"",
4253 						text_to_cstring(date_txt))));
4254 
4255 	result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4256 
4257 	/* Now check for just-out-of-range dates */
4258 	if (!IS_VALID_DATE(result))
4259 		ereport(ERROR,
4260 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4261 				 errmsg("date out of range: \"%s\"",
4262 						text_to_cstring(date_txt))));
4263 
4264 	PG_RETURN_DATEADT(result);
4265 }
4266 
4267 /*
4268  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4269  * as a format string.  The collation 'collid' may be used for case-folding
4270  * rules in some cases.  'strict' specifies standard parsing mode.
4271  *
4272  * The actual data type (returned in 'typid', 'typmod') is determined by
4273  * the presence of date/time/zone components in the format string.
4274  *
4275  * When timezone component is present, the corresponding offset is
4276  * returned in '*tz'.
4277  *
4278  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4279  * and zero value is returned.
4280  */
4281 Datum
parse_datetime(text * date_txt,text * fmt,Oid collid,bool strict,Oid * typid,int32 * typmod,int * tz,bool * have_error)4282 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4283 			   Oid *typid, int32 *typmod, int *tz,
4284 			   bool *have_error)
4285 {
4286 	struct pg_tm tm;
4287 	fsec_t		fsec;
4288 	int			fprec;
4289 	uint32		flags;
4290 
4291 	do_to_timestamp(date_txt, fmt, collid, strict,
4292 					&tm, &fsec, &fprec, &flags, have_error);
4293 	CHECK_ERROR;
4294 
4295 	*typmod = fprec ? fprec : -1;	/* fractional part precision */
4296 
4297 	if (flags & DCH_DATED)
4298 	{
4299 		if (flags & DCH_TIMED)
4300 		{
4301 			if (flags & DCH_ZONED)
4302 			{
4303 				TimestampTz result;
4304 
4305 				if (tm.tm_zone)
4306 				{
4307 					int			dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4308 
4309 					if (dterr)
4310 						DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4311 				}
4312 				else
4313 				{
4314 					/*
4315 					 * Time zone is present in format string, but not in input
4316 					 * string.  Assuming do_to_timestamp() triggers no error
4317 					 * this should be possible only in non-strict case.
4318 					 */
4319 					Assert(!strict);
4320 
4321 					RETURN_ERROR(ereport(ERROR,
4322 										 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4323 										  errmsg("missing time zone in input string for type timestamptz"))));
4324 				}
4325 
4326 				if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4327 					RETURN_ERROR(ereport(ERROR,
4328 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4329 										  errmsg("timestamptz out of range"))));
4330 
4331 				AdjustTimestampForTypmod(&result, *typmod);
4332 
4333 				*typid = TIMESTAMPTZOID;
4334 				return TimestampTzGetDatum(result);
4335 			}
4336 			else
4337 			{
4338 				Timestamp	result;
4339 
4340 				if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4341 					RETURN_ERROR(ereport(ERROR,
4342 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4343 										  errmsg("timestamp out of range"))));
4344 
4345 				AdjustTimestampForTypmod(&result, *typmod);
4346 
4347 				*typid = TIMESTAMPOID;
4348 				return TimestampGetDatum(result);
4349 			}
4350 		}
4351 		else
4352 		{
4353 			if (flags & DCH_ZONED)
4354 			{
4355 				RETURN_ERROR(ereport(ERROR,
4356 									 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4357 									  errmsg("datetime format is zoned but not timed"))));
4358 			}
4359 			else
4360 			{
4361 				DateADT		result;
4362 
4363 				/* Prevent overflow in Julian-day routines */
4364 				if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4365 					RETURN_ERROR(ereport(ERROR,
4366 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4367 										  errmsg("date out of range: \"%s\"",
4368 												 text_to_cstring(date_txt)))));
4369 
4370 				result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4371 					POSTGRES_EPOCH_JDATE;
4372 
4373 				/* Now check for just-out-of-range dates */
4374 				if (!IS_VALID_DATE(result))
4375 					RETURN_ERROR(ereport(ERROR,
4376 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4377 										  errmsg("date out of range: \"%s\"",
4378 												 text_to_cstring(date_txt)))));
4379 
4380 				*typid = DATEOID;
4381 				return DateADTGetDatum(result);
4382 			}
4383 		}
4384 	}
4385 	else if (flags & DCH_TIMED)
4386 	{
4387 		if (flags & DCH_ZONED)
4388 		{
4389 			TimeTzADT  *result = palloc(sizeof(TimeTzADT));
4390 
4391 			if (tm.tm_zone)
4392 			{
4393 				int			dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4394 
4395 				if (dterr)
4396 					RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4397 			}
4398 			else
4399 			{
4400 				/*
4401 				 * Time zone is present in format string, but not in input
4402 				 * string.  Assuming do_to_timestamp() triggers no error this
4403 				 * should be possible only in non-strict case.
4404 				 */
4405 				Assert(!strict);
4406 
4407 				RETURN_ERROR(ereport(ERROR,
4408 									 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4409 									  errmsg("missing time zone in input string for type timetz"))));
4410 			}
4411 
4412 			if (tm2timetz(&tm, fsec, *tz, result) != 0)
4413 				RETURN_ERROR(ereport(ERROR,
4414 									 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4415 									  errmsg("timetz out of range"))));
4416 
4417 			AdjustTimeForTypmod(&result->time, *typmod);
4418 
4419 			*typid = TIMETZOID;
4420 			return TimeTzADTPGetDatum(result);
4421 		}
4422 		else
4423 		{
4424 			TimeADT		result;
4425 
4426 			if (tm2time(&tm, fsec, &result) != 0)
4427 				RETURN_ERROR(ereport(ERROR,
4428 									 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4429 									  errmsg("time out of range"))));
4430 
4431 			AdjustTimeForTypmod(&result, *typmod);
4432 
4433 			*typid = TIMEOID;
4434 			return TimeADTGetDatum(result);
4435 		}
4436 	}
4437 	else
4438 	{
4439 		RETURN_ERROR(ereport(ERROR,
4440 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4441 							  errmsg("datetime format is not dated and not timed"))));
4442 	}
4443 
4444 on_error:
4445 	return (Datum) 0;
4446 }
4447 
4448 /*
4449  * do_to_timestamp: shared code for to_timestamp and to_date
4450  *
4451  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4452  * fractional seconds, and fractional precision.
4453  *
4454  * 'collid' identifies the collation to use, if needed.
4455  * 'std' specifies standard parsing mode.
4456  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4457  * if that is not NULL.
4458  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4459  *
4460  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4461  * DCH_from_char to populate a TmFromChar with the parsed contents of
4462  * 'date_txt'.
4463  *
4464  * The TmFromChar is then analysed and converted into the final results in
4465  * struct 'tm', 'fsec', and 'fprec'.
4466  */
4467 static void
do_to_timestamp(text * date_txt,text * fmt,Oid collid,bool std,struct pg_tm * tm,fsec_t * fsec,int * fprec,uint32 * flags,bool * have_error)4468 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4469 				struct pg_tm *tm, fsec_t *fsec, int *fprec,
4470 				uint32 *flags, bool *have_error)
4471 {
4472 	FormatNode *format = NULL;
4473 	TmFromChar	tmfc;
4474 	int			fmt_len;
4475 	char	   *date_str;
4476 	int			fmask;
4477 	bool		incache = false;
4478 
4479 	Assert(tm != NULL);
4480 	Assert(fsec != NULL);
4481 
4482 	date_str = text_to_cstring(date_txt);
4483 
4484 	ZERO_tmfc(&tmfc);
4485 	ZERO_tm(tm);
4486 	*fsec = 0;
4487 	if (fprec)
4488 		*fprec = 0;
4489 	if (flags)
4490 		*flags = 0;
4491 	fmask = 0;					/* bit mask for ValidateDate() */
4492 
4493 	fmt_len = VARSIZE_ANY_EXHDR(fmt);
4494 
4495 	if (fmt_len)
4496 	{
4497 		char	   *fmt_str;
4498 
4499 		fmt_str = text_to_cstring(fmt);
4500 
4501 		if (fmt_len > DCH_CACHE_SIZE)
4502 		{
4503 			/*
4504 			 * Allocate new memory if format picture is bigger than static
4505 			 * cache and do not use cache (call parser always)
4506 			 */
4507 			format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4508 
4509 			parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4510 						 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4511 		}
4512 		else
4513 		{
4514 			/*
4515 			 * Use cache buffers
4516 			 */
4517 			DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4518 
4519 			incache = true;
4520 			format = ent->format;
4521 		}
4522 
4523 #ifdef DEBUG_TO_FROM_CHAR
4524 		/* dump_node(format, fmt_len); */
4525 		/* dump_index(DCH_keywords, DCH_index); */
4526 #endif
4527 
4528 		DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4529 		CHECK_ERROR;
4530 
4531 		pfree(fmt_str);
4532 
4533 		if (flags)
4534 			*flags = DCH_datetime_type(format, have_error);
4535 
4536 		if (!incache)
4537 		{
4538 			pfree(format);
4539 			format = NULL;
4540 		}
4541 
4542 		CHECK_ERROR;
4543 	}
4544 
4545 	DEBUG_TMFC(&tmfc);
4546 
4547 	/*
4548 	 * Convert to_date/to_timestamp input fields to standard 'tm'
4549 	 */
4550 	if (tmfc.ssss)
4551 	{
4552 		int			x = tmfc.ssss;
4553 
4554 		tm->tm_hour = x / SECS_PER_HOUR;
4555 		x %= SECS_PER_HOUR;
4556 		tm->tm_min = x / SECS_PER_MINUTE;
4557 		x %= SECS_PER_MINUTE;
4558 		tm->tm_sec = x;
4559 	}
4560 
4561 	if (tmfc.ss)
4562 		tm->tm_sec = tmfc.ss;
4563 	if (tmfc.mi)
4564 		tm->tm_min = tmfc.mi;
4565 	if (tmfc.hh)
4566 		tm->tm_hour = tmfc.hh;
4567 
4568 	if (tmfc.clock == CLOCK_12_HOUR)
4569 	{
4570 		if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4571 		{
4572 			RETURN_ERROR(ereport(ERROR,
4573 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4574 								  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4575 										 tm->tm_hour),
4576 								  errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4577 		}
4578 
4579 		if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4580 			tm->tm_hour += HOURS_PER_DAY / 2;
4581 		else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4582 			tm->tm_hour = 0;
4583 	}
4584 
4585 	if (tmfc.year)
4586 	{
4587 		/*
4588 		 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4589 		 * the year in the given century.  Keep in mind that the 21st century
4590 		 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4591 		 * 600BC to 501BC.
4592 		 */
4593 		if (tmfc.cc && tmfc.yysz <= 2)
4594 		{
4595 			if (tmfc.bc)
4596 				tmfc.cc = -tmfc.cc;
4597 			tm->tm_year = tmfc.year % 100;
4598 			if (tm->tm_year)
4599 			{
4600 				if (tmfc.cc >= 0)
4601 					tm->tm_year += (tmfc.cc - 1) * 100;
4602 				else
4603 					tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4604 			}
4605 			else
4606 			{
4607 				/* find century year for dates ending in "00" */
4608 				tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4609 			}
4610 		}
4611 		else
4612 		{
4613 			/* If a 4-digit year is provided, we use that and ignore CC. */
4614 			tm->tm_year = tmfc.year;
4615 			if (tmfc.bc)
4616 				tm->tm_year = -tm->tm_year;
4617 			/* correct for our representation of BC years */
4618 			if (tm->tm_year < 0)
4619 				tm->tm_year++;
4620 		}
4621 		fmask |= DTK_M(YEAR);
4622 	}
4623 	else if (tmfc.cc)
4624 	{
4625 		/* use first year of century */
4626 		if (tmfc.bc)
4627 			tmfc.cc = -tmfc.cc;
4628 		if (tmfc.cc >= 0)
4629 			/* +1 because 21st century started in 2001 */
4630 			tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4631 		else
4632 			/* +1 because year == 599 is 600 BC */
4633 			tm->tm_year = tmfc.cc * 100 + 1;
4634 		fmask |= DTK_M(YEAR);
4635 	}
4636 
4637 	if (tmfc.j)
4638 	{
4639 		j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4640 		fmask |= DTK_DATE_M;
4641 	}
4642 
4643 	if (tmfc.ww)
4644 	{
4645 		if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4646 		{
4647 			/*
4648 			 * If tmfc.d is not set, then the date is left at the beginning of
4649 			 * the ISO week (Monday).
4650 			 */
4651 			if (tmfc.d)
4652 				isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4653 			else
4654 				isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4655 			fmask |= DTK_DATE_M;
4656 		}
4657 		else
4658 			tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4659 	}
4660 
4661 	if (tmfc.w)
4662 		tmfc.dd = (tmfc.w - 1) * 7 + 1;
4663 	if (tmfc.dd)
4664 	{
4665 		tm->tm_mday = tmfc.dd;
4666 		fmask |= DTK_M(DAY);
4667 	}
4668 	if (tmfc.mm)
4669 	{
4670 		tm->tm_mon = tmfc.mm;
4671 		fmask |= DTK_M(MONTH);
4672 	}
4673 
4674 	if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4675 	{
4676 		/*
4677 		 * The month and day field have not been set, so we use the
4678 		 * day-of-year field to populate them.  Depending on the date mode,
4679 		 * this field may be interpreted as a Gregorian day-of-year, or an ISO
4680 		 * week date day-of-year.
4681 		 */
4682 
4683 		if (!tm->tm_year && !tmfc.bc)
4684 		{
4685 			RETURN_ERROR(ereport(ERROR,
4686 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4687 								  errmsg("cannot calculate day of year without year information"))));
4688 		}
4689 
4690 		if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4691 		{
4692 			int			j0;		/* zeroth day of the ISO year, in Julian */
4693 
4694 			j0 = isoweek2j(tm->tm_year, 1) - 1;
4695 
4696 			j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4697 			fmask |= DTK_DATE_M;
4698 		}
4699 		else
4700 		{
4701 			const int  *y;
4702 			int			i;
4703 
4704 			static const int ysum[2][13] = {
4705 				{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4706 			{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4707 
4708 			y = ysum[isleap(tm->tm_year)];
4709 
4710 			for (i = 1; i <= MONTHS_PER_YEAR; i++)
4711 			{
4712 				if (tmfc.ddd <= y[i])
4713 					break;
4714 			}
4715 			if (tm->tm_mon <= 1)
4716 				tm->tm_mon = i;
4717 
4718 			if (tm->tm_mday <= 1)
4719 				tm->tm_mday = tmfc.ddd - y[i - 1];
4720 
4721 			fmask |= DTK_M(MONTH) | DTK_M(DAY);
4722 		}
4723 	}
4724 
4725 	if (tmfc.ms)
4726 		*fsec += tmfc.ms * 1000;
4727 	if (tmfc.us)
4728 		*fsec += tmfc.us;
4729 	if (fprec)
4730 		*fprec = tmfc.ff;		/* fractional precision, if specified */
4731 
4732 	/* Range-check date fields according to bit mask computed above */
4733 	if (fmask != 0)
4734 	{
4735 		/* We already dealt with AD/BC, so pass isjulian = true */
4736 		int			dterr = ValidateDate(fmask, true, false, false, tm);
4737 
4738 		if (dterr != 0)
4739 		{
4740 			/*
4741 			 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4742 			 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4743 			 * irrelevant hint about datestyle.
4744 			 */
4745 			RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4746 		}
4747 	}
4748 
4749 	/* Range-check time fields too */
4750 	if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4751 		tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4752 		tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4753 		*fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4754 	{
4755 		RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4756 	}
4757 
4758 	/* Save parsed time-zone into tm->tm_zone if it was specified */
4759 	if (tmfc.tzsign)
4760 	{
4761 		char	   *tz;
4762 
4763 		if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4764 			tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4765 		{
4766 			RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4767 		}
4768 
4769 		tz = psprintf("%c%02d:%02d",
4770 					  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4771 
4772 		tm->tm_zone = tz;
4773 	}
4774 
4775 	DEBUG_TM(tm);
4776 
4777 on_error:
4778 
4779 	if (format && !incache)
4780 		pfree(format);
4781 
4782 	pfree(date_str);
4783 }
4784 
4785 
4786 /**********************************************************************
4787  *	the NUMBER version part
4788  *********************************************************************/
4789 
4790 
4791 static char *
fill_str(char * str,int c,int max)4792 fill_str(char *str, int c, int max)
4793 {
4794 	memset(str, c, max);
4795 	*(str + max) = '\0';
4796 	return str;
4797 }
4798 
4799 #define zeroize_NUM(_n) \
4800 do { \
4801 	(_n)->flag		= 0;	\
4802 	(_n)->lsign		= 0;	\
4803 	(_n)->pre		= 0;	\
4804 	(_n)->post		= 0;	\
4805 	(_n)->pre_lsign_num = 0;	\
4806 	(_n)->need_locale	= 0;	\
4807 	(_n)->multi		= 0;	\
4808 	(_n)->zero_start	= 0;	\
4809 	(_n)->zero_end		= 0;	\
4810 } while(0)
4811 
4812 /* This works the same as DCH_prevent_counter_overflow */
4813 static inline void
NUM_prevent_counter_overflow(void)4814 NUM_prevent_counter_overflow(void)
4815 {
4816 	if (NUMCounter >= (INT_MAX - 1))
4817 	{
4818 		for (int i = 0; i < n_NUMCache; i++)
4819 			NUMCache[i]->age >>= 1;
4820 		NUMCounter >>= 1;
4821 	}
4822 }
4823 
4824 /* select a NUMCacheEntry to hold the given format picture */
4825 static NUMCacheEntry *
NUM_cache_getnew(const char * str)4826 NUM_cache_getnew(const char *str)
4827 {
4828 	NUMCacheEntry *ent;
4829 
4830 	/* Ensure we can advance NUMCounter below */
4831 	NUM_prevent_counter_overflow();
4832 
4833 	/*
4834 	 * If cache is full, remove oldest entry (or recycle first not-valid one)
4835 	 */
4836 	if (n_NUMCache >= NUM_CACHE_ENTRIES)
4837 	{
4838 		NUMCacheEntry *old = NUMCache[0];
4839 
4840 #ifdef DEBUG_TO_FROM_CHAR
4841 		elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4842 #endif
4843 		if (old->valid)
4844 		{
4845 			for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4846 			{
4847 				ent = NUMCache[i];
4848 				if (!ent->valid)
4849 				{
4850 					old = ent;
4851 					break;
4852 				}
4853 				if (ent->age < old->age)
4854 					old = ent;
4855 			}
4856 		}
4857 #ifdef DEBUG_TO_FROM_CHAR
4858 		elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4859 #endif
4860 		old->valid = false;
4861 		strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
4862 		old->age = (++NUMCounter);
4863 		/* caller is expected to fill format and Num, then set valid */
4864 		return old;
4865 	}
4866 	else
4867 	{
4868 #ifdef DEBUG_TO_FROM_CHAR
4869 		elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4870 #endif
4871 		Assert(NUMCache[n_NUMCache] == NULL);
4872 		NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4873 			MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
4874 		ent->valid = false;
4875 		strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
4876 		ent->age = (++NUMCounter);
4877 		/* caller is expected to fill format and Num, then set valid */
4878 		++n_NUMCache;
4879 		return ent;
4880 	}
4881 }
4882 
4883 /* look for an existing NUMCacheEntry matching the given format picture */
4884 static NUMCacheEntry *
NUM_cache_search(const char * str)4885 NUM_cache_search(const char *str)
4886 {
4887 	/* Ensure we can advance NUMCounter below */
4888 	NUM_prevent_counter_overflow();
4889 
4890 	for (int i = 0; i < n_NUMCache; i++)
4891 	{
4892 		NUMCacheEntry *ent = NUMCache[i];
4893 
4894 		if (ent->valid && strcmp(ent->str, str) == 0)
4895 		{
4896 			ent->age = (++NUMCounter);
4897 			return ent;
4898 		}
4899 	}
4900 
4901 	return NULL;
4902 }
4903 
4904 /* Find or create a NUMCacheEntry for the given format picture */
4905 static NUMCacheEntry *
NUM_cache_fetch(const char * str)4906 NUM_cache_fetch(const char *str)
4907 {
4908 	NUMCacheEntry *ent;
4909 
4910 	if ((ent = NUM_cache_search(str)) == NULL)
4911 	{
4912 		/*
4913 		 * Not in the cache, must run parser and save a new format-picture to
4914 		 * the cache.  Do not mark the cache entry valid until parsing
4915 		 * succeeds.
4916 		 */
4917 		ent = NUM_cache_getnew(str);
4918 
4919 		zeroize_NUM(&ent->Num);
4920 
4921 		parse_format(ent->format, str, NUM_keywords,
4922 					 NULL, NUM_index, NUM_FLAG, &ent->Num);
4923 
4924 		ent->valid = true;
4925 	}
4926 	return ent;
4927 }
4928 
4929 /* ----------
4930  * Cache routine for NUM to_char version
4931  * ----------
4932  */
4933 static FormatNode *
NUM_cache(int len,NUMDesc * Num,text * pars_str,bool * shouldFree)4934 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4935 {
4936 	FormatNode *format = NULL;
4937 	char	   *str;
4938 
4939 	str = text_to_cstring(pars_str);
4940 
4941 	if (len > NUM_CACHE_SIZE)
4942 	{
4943 		/*
4944 		 * Allocate new memory if format picture is bigger than static cache
4945 		 * and do not use cache (call parser always)
4946 		 */
4947 		format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4948 
4949 		*shouldFree = true;
4950 
4951 		zeroize_NUM(Num);
4952 
4953 		parse_format(format, str, NUM_keywords,
4954 					 NULL, NUM_index, NUM_FLAG, Num);
4955 	}
4956 	else
4957 	{
4958 		/*
4959 		 * Use cache buffers
4960 		 */
4961 		NUMCacheEntry *ent = NUM_cache_fetch(str);
4962 
4963 		*shouldFree = false;
4964 
4965 		format = ent->format;
4966 
4967 		/*
4968 		 * Copy cache to used struct
4969 		 */
4970 		Num->flag = ent->Num.flag;
4971 		Num->lsign = ent->Num.lsign;
4972 		Num->pre = ent->Num.pre;
4973 		Num->post = ent->Num.post;
4974 		Num->pre_lsign_num = ent->Num.pre_lsign_num;
4975 		Num->need_locale = ent->Num.need_locale;
4976 		Num->multi = ent->Num.multi;
4977 		Num->zero_start = ent->Num.zero_start;
4978 		Num->zero_end = ent->Num.zero_end;
4979 	}
4980 
4981 #ifdef DEBUG_TO_FROM_CHAR
4982 	/* dump_node(format, len); */
4983 	dump_index(NUM_keywords, NUM_index);
4984 #endif
4985 
4986 	pfree(str);
4987 	return format;
4988 }
4989 
4990 
4991 static char *
int_to_roman(int number)4992 int_to_roman(int number)
4993 {
4994 	int			len,
4995 				num;
4996 	char	   *p,
4997 			   *result,
4998 				numstr[12];
4999 
5000 	result = (char *) palloc(16);
5001 	*result = '\0';
5002 
5003 	if (number > 3999 || number < 1)
5004 	{
5005 		fill_str(result, '#', 15);
5006 		return result;
5007 	}
5008 	len = snprintf(numstr, sizeof(numstr), "%d", number);
5009 
5010 	for (p = numstr; *p != '\0'; p++, --len)
5011 	{
5012 		num = *p - ('0' + 1);
5013 		if (num < 0)
5014 			continue;
5015 
5016 		if (len > 3)
5017 		{
5018 			while (num-- != -1)
5019 				strcat(result, "M");
5020 		}
5021 		else
5022 		{
5023 			if (len == 3)
5024 				strcat(result, rm100[num]);
5025 			else if (len == 2)
5026 				strcat(result, rm10[num]);
5027 			else if (len == 1)
5028 				strcat(result, rm1[num]);
5029 		}
5030 	}
5031 	return result;
5032 }
5033 
5034 
5035 
5036 /* ----------
5037  * Locale
5038  * ----------
5039  */
5040 static void
NUM_prepare_locale(NUMProc * Np)5041 NUM_prepare_locale(NUMProc *Np)
5042 {
5043 	if (Np->Num->need_locale)
5044 	{
5045 		struct lconv *lconv;
5046 
5047 		/*
5048 		 * Get locales
5049 		 */
5050 		lconv = PGLC_localeconv();
5051 
5052 		/*
5053 		 * Positive / Negative number sign
5054 		 */
5055 		if (lconv->negative_sign && *lconv->negative_sign)
5056 			Np->L_negative_sign = lconv->negative_sign;
5057 		else
5058 			Np->L_negative_sign = "-";
5059 
5060 		if (lconv->positive_sign && *lconv->positive_sign)
5061 			Np->L_positive_sign = lconv->positive_sign;
5062 		else
5063 			Np->L_positive_sign = "+";
5064 
5065 		/*
5066 		 * Number decimal point
5067 		 */
5068 		if (lconv->decimal_point && *lconv->decimal_point)
5069 			Np->decimal = lconv->decimal_point;
5070 
5071 		else
5072 			Np->decimal = ".";
5073 
5074 		if (!IS_LDECIMAL(Np->Num))
5075 			Np->decimal = ".";
5076 
5077 		/*
5078 		 * Number thousands separator
5079 		 *
5080 		 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5081 		 * but "" for thousands_sep, so we set the thousands_sep too.
5082 		 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5083 		 */
5084 		if (lconv->thousands_sep && *lconv->thousands_sep)
5085 			Np->L_thousands_sep = lconv->thousands_sep;
5086 		/* Make sure thousands separator doesn't match decimal point symbol. */
5087 		else if (strcmp(Np->decimal, ",") != 0)
5088 			Np->L_thousands_sep = ",";
5089 		else
5090 			Np->L_thousands_sep = ".";
5091 
5092 		/*
5093 		 * Currency symbol
5094 		 */
5095 		if (lconv->currency_symbol && *lconv->currency_symbol)
5096 			Np->L_currency_symbol = lconv->currency_symbol;
5097 		else
5098 			Np->L_currency_symbol = " ";
5099 	}
5100 	else
5101 	{
5102 		/*
5103 		 * Default values
5104 		 */
5105 		Np->L_negative_sign = "-";
5106 		Np->L_positive_sign = "+";
5107 		Np->decimal = ".";
5108 
5109 		Np->L_thousands_sep = ",";
5110 		Np->L_currency_symbol = " ";
5111 	}
5112 }
5113 
5114 /* ----------
5115  * Return pointer of last relevant number after decimal point
5116  *	12.0500 --> last relevant is '5'
5117  *	12.0000 --> last relevant is '.'
5118  * If there is no decimal point, return NULL (which will result in same
5119  * behavior as if FM hadn't been specified).
5120  * ----------
5121  */
5122 static char *
get_last_relevant_decnum(char * num)5123 get_last_relevant_decnum(char *num)
5124 {
5125 	char	   *result,
5126 			   *p = strchr(num, '.');
5127 
5128 #ifdef DEBUG_TO_FROM_CHAR
5129 	elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5130 #endif
5131 
5132 	if (!p)
5133 		return NULL;
5134 
5135 	result = p;
5136 
5137 	while (*(++p))
5138 	{
5139 		if (*p != '0')
5140 			result = p;
5141 	}
5142 
5143 	return result;
5144 }
5145 
5146 /*
5147  * These macros are used in NUM_processor() and its subsidiary routines.
5148  * OVERLOAD_TEST: true if we've reached end of input string
5149  * AMOUNT_TEST(s): true if at least s bytes remain in string
5150  */
5151 #define OVERLOAD_TEST	(Np->inout_p >= Np->inout + input_len)
5152 #define AMOUNT_TEST(s)	(Np->inout_p <= Np->inout + (input_len - (s)))
5153 
5154 /* ----------
5155  * Number extraction for TO_NUMBER()
5156  * ----------
5157  */
5158 static void
NUM_numpart_from_char(NUMProc * Np,int id,int input_len)5159 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5160 {
5161 	bool		isread = false;
5162 
5163 #ifdef DEBUG_TO_FROM_CHAR
5164 	elog(DEBUG_elog_output, " --- scan start --- id=%s",
5165 		 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5166 #endif
5167 
5168 	if (OVERLOAD_TEST)
5169 		return;
5170 
5171 	if (*Np->inout_p == ' ')
5172 		Np->inout_p++;
5173 
5174 	if (OVERLOAD_TEST)
5175 		return;
5176 
5177 	/*
5178 	 * read sign before number
5179 	 */
5180 	if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5181 		(Np->read_pre + Np->read_post) == 0)
5182 	{
5183 #ifdef DEBUG_TO_FROM_CHAR
5184 		elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5185 			 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5186 #endif
5187 
5188 		/*
5189 		 * locale sign
5190 		 */
5191 		if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5192 		{
5193 			int			x = 0;
5194 
5195 #ifdef DEBUG_TO_FROM_CHAR
5196 			elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5197 #endif
5198 			if ((x = strlen(Np->L_negative_sign)) &&
5199 				AMOUNT_TEST(x) &&
5200 				strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5201 			{
5202 				Np->inout_p += x;
5203 				*Np->number = '-';
5204 			}
5205 			else if ((x = strlen(Np->L_positive_sign)) &&
5206 					 AMOUNT_TEST(x) &&
5207 					 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5208 			{
5209 				Np->inout_p += x;
5210 				*Np->number = '+';
5211 			}
5212 		}
5213 		else
5214 		{
5215 #ifdef DEBUG_TO_FROM_CHAR
5216 			elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5217 #endif
5218 
5219 			/*
5220 			 * simple + - < >
5221 			 */
5222 			if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5223 										*Np->inout_p == '<'))
5224 			{
5225 				*Np->number = '-';	/* set - */
5226 				Np->inout_p++;
5227 			}
5228 			else if (*Np->inout_p == '+')
5229 			{
5230 				*Np->number = '+';	/* set + */
5231 				Np->inout_p++;
5232 			}
5233 		}
5234 	}
5235 
5236 	if (OVERLOAD_TEST)
5237 		return;
5238 
5239 #ifdef DEBUG_TO_FROM_CHAR
5240 	elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5241 #endif
5242 
5243 	/*
5244 	 * read digit or decimal point
5245 	 */
5246 	if (isdigit((unsigned char) *Np->inout_p))
5247 	{
5248 		if (Np->read_dec && Np->read_post == Np->Num->post)
5249 			return;
5250 
5251 		*Np->number_p = *Np->inout_p;
5252 		Np->number_p++;
5253 
5254 		if (Np->read_dec)
5255 			Np->read_post++;
5256 		else
5257 			Np->read_pre++;
5258 
5259 		isread = true;
5260 
5261 #ifdef DEBUG_TO_FROM_CHAR
5262 		elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5263 #endif
5264 	}
5265 	else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5266 	{
5267 		/*
5268 		 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5269 		 * Np->decimal is always just "." if we don't have a D format token.
5270 		 * So we just unconditionally match to Np->decimal.
5271 		 */
5272 		int			x = strlen(Np->decimal);
5273 
5274 #ifdef DEBUG_TO_FROM_CHAR
5275 		elog(DEBUG_elog_output, "Try read decimal point (%c)",
5276 			 *Np->inout_p);
5277 #endif
5278 		if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5279 		{
5280 			Np->inout_p += x - 1;
5281 			*Np->number_p = '.';
5282 			Np->number_p++;
5283 			Np->read_dec = true;
5284 			isread = true;
5285 		}
5286 	}
5287 
5288 	if (OVERLOAD_TEST)
5289 		return;
5290 
5291 	/*
5292 	 * Read sign behind "last" number
5293 	 *
5294 	 * We need sign detection because determine exact position of post-sign is
5295 	 * difficult:
5296 	 *
5297 	 * FM9999.9999999S	   -> 123.001- 9.9S			   -> .5- FM9.999999MI ->
5298 	 * 5.01-
5299 	 */
5300 	if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5301 	{
5302 		/*
5303 		 * locale sign (NUM_S) is always anchored behind a last number, if: -
5304 		 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5305 		 * next char is not digit
5306 		 */
5307 		if (IS_LSIGN(Np->Num) && isread &&
5308 			(Np->inout_p + 1) < Np->inout + input_len &&
5309 			!isdigit((unsigned char) *(Np->inout_p + 1)))
5310 		{
5311 			int			x;
5312 			char	   *tmp = Np->inout_p++;
5313 
5314 #ifdef DEBUG_TO_FROM_CHAR
5315 			elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5316 #endif
5317 			if ((x = strlen(Np->L_negative_sign)) &&
5318 				AMOUNT_TEST(x) &&
5319 				strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5320 			{
5321 				Np->inout_p += x - 1;	/* -1 .. NUM_processor() do inout_p++ */
5322 				*Np->number = '-';
5323 			}
5324 			else if ((x = strlen(Np->L_positive_sign)) &&
5325 					 AMOUNT_TEST(x) &&
5326 					 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5327 			{
5328 				Np->inout_p += x - 1;	/* -1 .. NUM_processor() do inout_p++ */
5329 				*Np->number = '+';
5330 			}
5331 			if (*Np->number == ' ')
5332 				/* no sign read */
5333 				Np->inout_p = tmp;
5334 		}
5335 
5336 		/*
5337 		 * try read non-locale sign, it's happen only if format is not exact
5338 		 * and we cannot determine sign position of MI/PL/SG, an example:
5339 		 *
5340 		 * FM9.999999MI			   -> 5.01-
5341 		 *
5342 		 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5343 		 * like to_number('1 -', '9S') where sign is not anchored to last
5344 		 * number.
5345 		 */
5346 		else if (isread == false && IS_LSIGN(Np->Num) == false &&
5347 				 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5348 		{
5349 #ifdef DEBUG_TO_FROM_CHAR
5350 			elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5351 #endif
5352 
5353 			/*
5354 			 * simple + -
5355 			 */
5356 			if (*Np->inout_p == '-' || *Np->inout_p == '+')
5357 				/* NUM_processor() do inout_p++ */
5358 				*Np->number = *Np->inout_p;
5359 		}
5360 	}
5361 }
5362 
5363 #define IS_PREDEC_SPACE(_n) \
5364 		(IS_ZERO((_n)->Num)==false && \
5365 		 (_n)->number == (_n)->number_p && \
5366 		 *(_n)->number == '0' && \
5367 				 (_n)->Num->post != 0)
5368 
5369 /* ----------
5370  * Add digit or sign to number-string
5371  * ----------
5372  */
5373 static void
NUM_numpart_to_char(NUMProc * Np,int id)5374 NUM_numpart_to_char(NUMProc *Np, int id)
5375 {
5376 	int			end;
5377 
5378 	if (IS_ROMAN(Np->Num))
5379 		return;
5380 
5381 	/* Note: in this elog() output not set '\0' in 'inout' */
5382 
5383 #ifdef DEBUG_TO_FROM_CHAR
5384 
5385 	/*
5386 	 * Np->num_curr is number of current item in format-picture, it is not
5387 	 * current position in inout!
5388 	 */
5389 	elog(DEBUG_elog_output,
5390 		 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5391 		 Np->sign_wrote,
5392 		 Np->num_curr,
5393 		 Np->number_p,
5394 		 Np->inout);
5395 #endif
5396 	Np->num_in = false;
5397 
5398 	/*
5399 	 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5400 	 * handle "9.9" --> " .1"
5401 	 */
5402 	if (Np->sign_wrote == false &&
5403 		(Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5404 		(IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5405 	{
5406 		if (IS_LSIGN(Np->Num))
5407 		{
5408 			if (Np->Num->lsign == NUM_LSIGN_PRE)
5409 			{
5410 				if (Np->sign == '-')
5411 					strcpy(Np->inout_p, Np->L_negative_sign);
5412 				else
5413 					strcpy(Np->inout_p, Np->L_positive_sign);
5414 				Np->inout_p += strlen(Np->inout_p);
5415 				Np->sign_wrote = true;
5416 			}
5417 		}
5418 		else if (IS_BRACKET(Np->Num))
5419 		{
5420 			*Np->inout_p = Np->sign == '+' ? ' ' : '<';
5421 			++Np->inout_p;
5422 			Np->sign_wrote = true;
5423 		}
5424 		else if (Np->sign == '+')
5425 		{
5426 			if (!IS_FILLMODE(Np->Num))
5427 			{
5428 				*Np->inout_p = ' '; /* Write + */
5429 				++Np->inout_p;
5430 			}
5431 			Np->sign_wrote = true;
5432 		}
5433 		else if (Np->sign == '-')
5434 		{						/* Write - */
5435 			*Np->inout_p = '-';
5436 			++Np->inout_p;
5437 			Np->sign_wrote = true;
5438 		}
5439 	}
5440 
5441 
5442 	/*
5443 	 * digits / FM / Zero / Dec. point
5444 	 */
5445 	if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
5446 	{
5447 		if (Np->num_curr < Np->out_pre_spaces &&
5448 			(Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
5449 		{
5450 			/*
5451 			 * Write blank space
5452 			 */
5453 			if (!IS_FILLMODE(Np->Num))
5454 			{
5455 				*Np->inout_p = ' '; /* Write ' ' */
5456 				++Np->inout_p;
5457 			}
5458 		}
5459 		else if (IS_ZERO(Np->Num) &&
5460 				 Np->num_curr < Np->out_pre_spaces &&
5461 				 Np->Num->zero_start <= Np->num_curr)
5462 		{
5463 			/*
5464 			 * Write ZERO
5465 			 */
5466 			*Np->inout_p = '0'; /* Write '0' */
5467 			++Np->inout_p;
5468 			Np->num_in = true;
5469 		}
5470 		else
5471 		{
5472 			/*
5473 			 * Write Decimal point
5474 			 */
5475 			if (*Np->number_p == '.')
5476 			{
5477 				if (!Np->last_relevant || *Np->last_relevant != '.')
5478 				{
5479 					strcpy(Np->inout_p, Np->decimal);	/* Write DEC/D */
5480 					Np->inout_p += strlen(Np->inout_p);
5481 				}
5482 
5483 				/*
5484 				 * Ora 'n' -- FM9.9 --> 'n.'
5485 				 */
5486 				else if (IS_FILLMODE(Np->Num) &&
5487 						 Np->last_relevant && *Np->last_relevant == '.')
5488 				{
5489 					strcpy(Np->inout_p, Np->decimal);	/* Write DEC/D */
5490 					Np->inout_p += strlen(Np->inout_p);
5491 				}
5492 			}
5493 			else
5494 			{
5495 				/*
5496 				 * Write Digits
5497 				 */
5498 				if (Np->last_relevant && Np->number_p > Np->last_relevant &&
5499 					id != NUM_0)
5500 					;
5501 
5502 				/*
5503 				 * '0.1' -- 9.9 --> '  .1'
5504 				 */
5505 				else if (IS_PREDEC_SPACE(Np))
5506 				{
5507 					if (!IS_FILLMODE(Np->Num))
5508 					{
5509 						*Np->inout_p = ' ';
5510 						++Np->inout_p;
5511 					}
5512 
5513 					/*
5514 					 * '0' -- FM9.9 --> '0.'
5515 					 */
5516 					else if (Np->last_relevant && *Np->last_relevant == '.')
5517 					{
5518 						*Np->inout_p = '0';
5519 						++Np->inout_p;
5520 					}
5521 				}
5522 				else
5523 				{
5524 					*Np->inout_p = *Np->number_p;	/* Write DIGIT */
5525 					++Np->inout_p;
5526 					Np->num_in = true;
5527 				}
5528 			}
5529 			/* do no exceed string length */
5530 			if (*Np->number_p)
5531 				++Np->number_p;
5532 		}
5533 
5534 		end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
5535 
5536 		if (Np->last_relevant && Np->last_relevant == Np->number_p)
5537 			end = Np->num_curr;
5538 
5539 		if (Np->num_curr + 1 == end)
5540 		{
5541 			if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
5542 			{
5543 				*Np->inout_p = Np->sign == '+' ? ' ' : '>';
5544 				++Np->inout_p;
5545 			}
5546 			else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
5547 			{
5548 				if (Np->sign == '-')
5549 					strcpy(Np->inout_p, Np->L_negative_sign);
5550 				else
5551 					strcpy(Np->inout_p, Np->L_positive_sign);
5552 				Np->inout_p += strlen(Np->inout_p);
5553 			}
5554 		}
5555 	}
5556 
5557 	++Np->num_curr;
5558 }
5559 
5560 /*
5561  * Skip over "n" input characters, but only if they aren't numeric data
5562  */
5563 static void
NUM_eat_non_data_chars(NUMProc * Np,int n,int input_len)5564 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
5565 {
5566 	while (n-- > 0)
5567 	{
5568 		if (OVERLOAD_TEST)
5569 			break;				/* end of input */
5570 		if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
5571 			break;				/* it's a data character */
5572 		Np->inout_p += pg_mblen(Np->inout_p);
5573 	}
5574 }
5575 
5576 static char *
NUM_processor(FormatNode * node,NUMDesc * Num,char * inout,char * number,int input_len,int to_char_out_pre_spaces,int sign,bool is_to_char,Oid collid)5577 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
5578 			  char *number, int input_len, int to_char_out_pre_spaces,
5579 			  int sign, bool is_to_char, Oid collid)
5580 {
5581 	FormatNode *n;
5582 	NUMProc		_Np,
5583 			   *Np = &_Np;
5584 	const char *pattern;
5585 	int			pattern_len;
5586 
5587 	MemSet(Np, 0, sizeof(NUMProc));
5588 
5589 	Np->Num = Num;
5590 	Np->is_to_char = is_to_char;
5591 	Np->number = number;
5592 	Np->inout = inout;
5593 	Np->last_relevant = NULL;
5594 	Np->read_post = 0;
5595 	Np->read_pre = 0;
5596 	Np->read_dec = false;
5597 
5598 	if (Np->Num->zero_start)
5599 		--Np->Num->zero_start;
5600 
5601 	if (IS_EEEE(Np->Num))
5602 	{
5603 		if (!Np->is_to_char)
5604 			ereport(ERROR,
5605 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5606 					 errmsg("\"EEEE\" not supported for input")));
5607 		return strcpy(inout, number);
5608 	}
5609 
5610 	/*
5611 	 * Roman correction
5612 	 */
5613 	if (IS_ROMAN(Np->Num))
5614 	{
5615 		if (!Np->is_to_char)
5616 			ereport(ERROR,
5617 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5618 					 errmsg("\"RN\" not supported for input")));
5619 
5620 		Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
5621 			Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
5622 
5623 		if (IS_FILLMODE(Np->Num))
5624 		{
5625 			Np->Num->flag = 0;
5626 			Np->Num->flag |= NUM_F_FILLMODE;
5627 		}
5628 		else
5629 			Np->Num->flag = 0;
5630 		Np->Num->flag |= NUM_F_ROMAN;
5631 	}
5632 
5633 	/*
5634 	 * Sign
5635 	 */
5636 	if (is_to_char)
5637 	{
5638 		Np->sign = sign;
5639 
5640 		/* MI/PL/SG - write sign itself and not in number */
5641 		if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
5642 		{
5643 			if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
5644 				Np->sign_wrote = false; /* need sign */
5645 			else
5646 				Np->sign_wrote = true;	/* needn't sign */
5647 		}
5648 		else
5649 		{
5650 			if (Np->sign != '-')
5651 			{
5652 				if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
5653 					Np->Num->flag &= ~NUM_F_BRACKET;
5654 				if (IS_MINUS(Np->Num))
5655 					Np->Num->flag &= ~NUM_F_MINUS;
5656 			}
5657 			else if (Np->sign != '+' && IS_PLUS(Np->Num))
5658 				Np->Num->flag &= ~NUM_F_PLUS;
5659 
5660 			if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
5661 				Np->sign_wrote = true;	/* needn't sign */
5662 			else
5663 				Np->sign_wrote = false; /* need sign */
5664 
5665 			if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
5666 				Np->Num->lsign = NUM_LSIGN_POST;
5667 		}
5668 	}
5669 	else
5670 		Np->sign = false;
5671 
5672 	/*
5673 	 * Count
5674 	 */
5675 	Np->num_count = Np->Num->post + Np->Num->pre - 1;
5676 
5677 	if (is_to_char)
5678 	{
5679 		Np->out_pre_spaces = to_char_out_pre_spaces;
5680 
5681 		if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
5682 		{
5683 			Np->last_relevant = get_last_relevant_decnum(Np->number);
5684 
5685 			/*
5686 			 * If any '0' specifiers are present, make sure we don't strip
5687 			 * those digits.
5688 			 */
5689 			if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
5690 			{
5691 				char	   *last_zero;
5692 
5693 				last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
5694 				if (Np->last_relevant < last_zero)
5695 					Np->last_relevant = last_zero;
5696 			}
5697 		}
5698 
5699 		if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
5700 			++Np->num_count;
5701 	}
5702 	else
5703 	{
5704 		Np->out_pre_spaces = 0;
5705 		*Np->number = ' ';		/* sign space */
5706 		*(Np->number + 1) = '\0';
5707 	}
5708 
5709 	Np->num_in = 0;
5710 	Np->num_curr = 0;
5711 
5712 #ifdef DEBUG_TO_FROM_CHAR
5713 	elog(DEBUG_elog_output,
5714 		 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5715 		 Np->sign,
5716 		 Np->number,
5717 		 Np->Num->pre,
5718 		 Np->Num->post,
5719 		 Np->num_count,
5720 		 Np->out_pre_spaces,
5721 		 Np->sign_wrote ? "Yes" : "No",
5722 		 IS_ZERO(Np->Num) ? "Yes" : "No",
5723 		 Np->Num->zero_start,
5724 		 Np->Num->zero_end,
5725 		 Np->last_relevant ? Np->last_relevant : "<not set>",
5726 		 IS_BRACKET(Np->Num) ? "Yes" : "No",
5727 		 IS_PLUS(Np->Num) ? "Yes" : "No",
5728 		 IS_MINUS(Np->Num) ? "Yes" : "No",
5729 		 IS_FILLMODE(Np->Num) ? "Yes" : "No",
5730 		 IS_ROMAN(Np->Num) ? "Yes" : "No",
5731 		 IS_EEEE(Np->Num) ? "Yes" : "No"
5732 		);
5733 #endif
5734 
5735 	/*
5736 	 * Locale
5737 	 */
5738 	NUM_prepare_locale(Np);
5739 
5740 	/*
5741 	 * Processor direct cycle
5742 	 */
5743 	if (Np->is_to_char)
5744 		Np->number_p = Np->number;
5745 	else
5746 		Np->number_p = Np->number + 1;	/* first char is space for sign */
5747 
5748 	for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5749 	{
5750 		if (!Np->is_to_char)
5751 		{
5752 			/*
5753 			 * Check at least one byte remains to be scanned.  (In actions
5754 			 * below, must use AMOUNT_TEST if we want to read more bytes than
5755 			 * that.)
5756 			 */
5757 			if (OVERLOAD_TEST)
5758 				break;
5759 		}
5760 
5761 		/*
5762 		 * Format pictures actions
5763 		 */
5764 		if (n->type == NODE_TYPE_ACTION)
5765 		{
5766 			/*
5767 			 * Create/read digit/zero/blank/sign/special-case
5768 			 *
5769 			 * 'NUM_S' note: The locale sign is anchored to number and we
5770 			 * read/write it when we work with first or last number
5771 			 * (NUM_0/NUM_9).  This is why NUM_S is missing in switch().
5772 			 *
5773 			 * Notice the "Np->inout_p++" at the bottom of the loop.  This is
5774 			 * why most of the actions advance inout_p one less than you might
5775 			 * expect.  In cases where we don't want that increment to happen,
5776 			 * a switch case ends with "continue" not "break".
5777 			 */
5778 			switch (n->key->id)
5779 			{
5780 				case NUM_9:
5781 				case NUM_0:
5782 				case NUM_DEC:
5783 				case NUM_D:
5784 					if (Np->is_to_char)
5785 					{
5786 						NUM_numpart_to_char(Np, n->key->id);
5787 						continue;	/* for() */
5788 					}
5789 					else
5790 					{
5791 						NUM_numpart_from_char(Np, n->key->id, input_len);
5792 						break;	/* switch() case: */
5793 					}
5794 
5795 				case NUM_COMMA:
5796 					if (Np->is_to_char)
5797 					{
5798 						if (!Np->num_in)
5799 						{
5800 							if (IS_FILLMODE(Np->Num))
5801 								continue;
5802 							else
5803 								*Np->inout_p = ' ';
5804 						}
5805 						else
5806 							*Np->inout_p = ',';
5807 					}
5808 					else
5809 					{
5810 						if (!Np->num_in)
5811 						{
5812 							if (IS_FILLMODE(Np->Num))
5813 								continue;
5814 						}
5815 						if (*Np->inout_p != ',')
5816 							continue;
5817 					}
5818 					break;
5819 
5820 				case NUM_G:
5821 					pattern = Np->L_thousands_sep;
5822 					pattern_len = strlen(pattern);
5823 					if (Np->is_to_char)
5824 					{
5825 						if (!Np->num_in)
5826 						{
5827 							if (IS_FILLMODE(Np->Num))
5828 								continue;
5829 							else
5830 							{
5831 								/* just in case there are MB chars */
5832 								pattern_len = pg_mbstrlen(pattern);
5833 								memset(Np->inout_p, ' ', pattern_len);
5834 								Np->inout_p += pattern_len - 1;
5835 							}
5836 						}
5837 						else
5838 						{
5839 							strcpy(Np->inout_p, pattern);
5840 							Np->inout_p += pattern_len - 1;
5841 						}
5842 					}
5843 					else
5844 					{
5845 						if (!Np->num_in)
5846 						{
5847 							if (IS_FILLMODE(Np->Num))
5848 								continue;
5849 						}
5850 
5851 						/*
5852 						 * Because L_thousands_sep typically contains data
5853 						 * characters (either '.' or ','), we can't use
5854 						 * NUM_eat_non_data_chars here.  Instead skip only if
5855 						 * the input matches L_thousands_sep.
5856 						 */
5857 						if (AMOUNT_TEST(pattern_len) &&
5858 							strncmp(Np->inout_p, pattern, pattern_len) == 0)
5859 							Np->inout_p += pattern_len - 1;
5860 						else
5861 							continue;
5862 					}
5863 					break;
5864 
5865 				case NUM_L:
5866 					pattern = Np->L_currency_symbol;
5867 					if (Np->is_to_char)
5868 					{
5869 						strcpy(Np->inout_p, pattern);
5870 						Np->inout_p += strlen(pattern) - 1;
5871 					}
5872 					else
5873 					{
5874 						NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5875 						continue;
5876 					}
5877 					break;
5878 
5879 				case NUM_RN:
5880 					if (IS_FILLMODE(Np->Num))
5881 					{
5882 						strcpy(Np->inout_p, Np->number_p);
5883 						Np->inout_p += strlen(Np->inout_p) - 1;
5884 					}
5885 					else
5886 					{
5887 						sprintf(Np->inout_p, "%15s", Np->number_p);
5888 						Np->inout_p += strlen(Np->inout_p) - 1;
5889 					}
5890 					break;
5891 
5892 				case NUM_rn:
5893 					if (IS_FILLMODE(Np->Num))
5894 					{
5895 						strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5896 						Np->inout_p += strlen(Np->inout_p) - 1;
5897 					}
5898 					else
5899 					{
5900 						sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5901 						Np->inout_p += strlen(Np->inout_p) - 1;
5902 					}
5903 					break;
5904 
5905 				case NUM_th:
5906 					if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5907 						Np->sign == '-' || IS_DECIMAL(Np->Num))
5908 						continue;
5909 
5910 					if (Np->is_to_char)
5911 					{
5912 						strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5913 						Np->inout_p += 1;
5914 					}
5915 					else
5916 					{
5917 						/* All variants of 'th' occupy 2 characters */
5918 						NUM_eat_non_data_chars(Np, 2, input_len);
5919 						continue;
5920 					}
5921 					break;
5922 
5923 				case NUM_TH:
5924 					if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5925 						Np->sign == '-' || IS_DECIMAL(Np->Num))
5926 						continue;
5927 
5928 					if (Np->is_to_char)
5929 					{
5930 						strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5931 						Np->inout_p += 1;
5932 					}
5933 					else
5934 					{
5935 						/* All variants of 'TH' occupy 2 characters */
5936 						NUM_eat_non_data_chars(Np, 2, input_len);
5937 						continue;
5938 					}
5939 					break;
5940 
5941 				case NUM_MI:
5942 					if (Np->is_to_char)
5943 					{
5944 						if (Np->sign == '-')
5945 							*Np->inout_p = '-';
5946 						else if (IS_FILLMODE(Np->Num))
5947 							continue;
5948 						else
5949 							*Np->inout_p = ' ';
5950 					}
5951 					else
5952 					{
5953 						if (*Np->inout_p == '-')
5954 							*Np->number = '-';
5955 						else
5956 						{
5957 							NUM_eat_non_data_chars(Np, 1, input_len);
5958 							continue;
5959 						}
5960 					}
5961 					break;
5962 
5963 				case NUM_PL:
5964 					if (Np->is_to_char)
5965 					{
5966 						if (Np->sign == '+')
5967 							*Np->inout_p = '+';
5968 						else if (IS_FILLMODE(Np->Num))
5969 							continue;
5970 						else
5971 							*Np->inout_p = ' ';
5972 					}
5973 					else
5974 					{
5975 						if (*Np->inout_p == '+')
5976 							*Np->number = '+';
5977 						else
5978 						{
5979 							NUM_eat_non_data_chars(Np, 1, input_len);
5980 							continue;
5981 						}
5982 					}
5983 					break;
5984 
5985 				case NUM_SG:
5986 					if (Np->is_to_char)
5987 						*Np->inout_p = Np->sign;
5988 					else
5989 					{
5990 						if (*Np->inout_p == '-')
5991 							*Np->number = '-';
5992 						else if (*Np->inout_p == '+')
5993 							*Np->number = '+';
5994 						else
5995 						{
5996 							NUM_eat_non_data_chars(Np, 1, input_len);
5997 							continue;
5998 						}
5999 					}
6000 					break;
6001 
6002 				default:
6003 					continue;
6004 					break;
6005 			}
6006 		}
6007 		else
6008 		{
6009 			/*
6010 			 * In TO_CHAR, non-pattern characters in the format are copied to
6011 			 * the output.  In TO_NUMBER, we skip one input character for each
6012 			 * non-pattern format character, whether or not it matches the
6013 			 * format character.
6014 			 */
6015 			if (Np->is_to_char)
6016 			{
6017 				strcpy(Np->inout_p, n->character);
6018 				Np->inout_p += strlen(Np->inout_p);
6019 			}
6020 			else
6021 			{
6022 				Np->inout_p += pg_mblen(Np->inout_p);
6023 			}
6024 			continue;
6025 		}
6026 		Np->inout_p++;
6027 	}
6028 
6029 	if (Np->is_to_char)
6030 	{
6031 		*Np->inout_p = '\0';
6032 		return Np->inout;
6033 	}
6034 	else
6035 	{
6036 		if (*(Np->number_p - 1) == '.')
6037 			*(Np->number_p - 1) = '\0';
6038 		else
6039 			*Np->number_p = '\0';
6040 
6041 		/*
6042 		 * Correction - precision of dec. number
6043 		 */
6044 		Np->Num->post = Np->read_post;
6045 
6046 #ifdef DEBUG_TO_FROM_CHAR
6047 		elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
6048 #endif
6049 		return Np->number;
6050 	}
6051 }
6052 
6053 /* ----------
6054  * MACRO: Start part of NUM - for all NUM's to_char variants
6055  *	(sorry, but I hate copy same code - macro is better..)
6056  * ----------
6057  */
6058 #define NUM_TOCHAR_prepare \
6059 do { \
6060 	int len = VARSIZE_ANY_EXHDR(fmt); \
6061 	if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ)		\
6062 		PG_RETURN_TEXT_P(cstring_to_text("")); \
6063 	result	= (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ);	\
6064 	format	= NUM_cache(len, &Num, fmt, &shouldFree);		\
6065 } while (0)
6066 
6067 /* ----------
6068  * MACRO: Finish part of NUM
6069  * ----------
6070  */
6071 #define NUM_TOCHAR_finish \
6072 do { \
6073 	int		len; \
6074 									\
6075 	NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
6076 									\
6077 	if (shouldFree)					\
6078 		pfree(format);				\
6079 									\
6080 	/*								\
6081 	 * Convert null-terminated representation of result to standard text. \
6082 	 * The result is usually much bigger than it needs to be, but there \
6083 	 * seems little point in realloc'ing it smaller. \
6084 	 */								\
6085 	len = strlen(VARDATA(result));	\
6086 	SET_VARSIZE(result, len + VARHDRSZ); \
6087 } while (0)
6088 
6089 /* -------------------
6090  * NUMERIC to_number() (convert string to numeric)
6091  * -------------------
6092  */
6093 Datum
numeric_to_number(PG_FUNCTION_ARGS)6094 numeric_to_number(PG_FUNCTION_ARGS)
6095 {
6096 	text	   *value = PG_GETARG_TEXT_PP(0);
6097 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6098 	NUMDesc		Num;
6099 	Datum		result;
6100 	FormatNode *format;
6101 	char	   *numstr;
6102 	bool		shouldFree;
6103 	int			len = 0;
6104 	int			scale,
6105 				precision;
6106 
6107 	len = VARSIZE_ANY_EXHDR(fmt);
6108 
6109 	if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
6110 		PG_RETURN_NULL();
6111 
6112 	format = NUM_cache(len, &Num, fmt, &shouldFree);
6113 
6114 	numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
6115 
6116 	NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
6117 				  VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
6118 
6119 	scale = Num.post;
6120 	precision = Num.pre + Num.multi + scale;
6121 
6122 	if (shouldFree)
6123 		pfree(format);
6124 
6125 	result = DirectFunctionCall3(numeric_in,
6126 								 CStringGetDatum(numstr),
6127 								 ObjectIdGetDatum(InvalidOid),
6128 								 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
6129 
6130 	if (IS_MULTI(&Num))
6131 	{
6132 		Numeric		x;
6133 		Numeric		a = int64_to_numeric(10);
6134 		Numeric		b = int64_to_numeric(-Num.multi);
6135 
6136 		x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6137 												NumericGetDatum(a),
6138 												NumericGetDatum(b)));
6139 		result = DirectFunctionCall2(numeric_mul,
6140 									 result,
6141 									 NumericGetDatum(x));
6142 	}
6143 
6144 	pfree(numstr);
6145 	return result;
6146 }
6147 
6148 /* ------------------
6149  * NUMERIC to_char()
6150  * ------------------
6151  */
6152 Datum
numeric_to_char(PG_FUNCTION_ARGS)6153 numeric_to_char(PG_FUNCTION_ARGS)
6154 {
6155 	Numeric		value = PG_GETARG_NUMERIC(0);
6156 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6157 	NUMDesc		Num;
6158 	FormatNode *format;
6159 	text	   *result;
6160 	bool		shouldFree;
6161 	int			out_pre_spaces = 0,
6162 				sign = 0;
6163 	char	   *numstr,
6164 			   *orgnum,
6165 			   *p;
6166 	Numeric		x;
6167 
6168 	NUM_TOCHAR_prepare;
6169 
6170 	/*
6171 	 * On DateType depend part (numeric)
6172 	 */
6173 	if (IS_ROMAN(&Num))
6174 	{
6175 		x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6176 												NumericGetDatum(value),
6177 												Int32GetDatum(0)));
6178 		numstr =
6179 			int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
6180 														   NumericGetDatum(x))));
6181 	}
6182 	else if (IS_EEEE(&Num))
6183 	{
6184 		orgnum = numeric_out_sci(value, Num.post);
6185 
6186 		/*
6187 		 * numeric_out_sci() does not emit a sign for positive numbers.  We
6188 		 * need to add a space in this case so that positive and negative
6189 		 * numbers are aligned.  Also must check for NaN/infinity cases, which
6190 		 * we handle the same way as in float8_to_char.
6191 		 */
6192 		if (strcmp(orgnum, "NaN") == 0 ||
6193 			strcmp(orgnum, "Infinity") == 0 ||
6194 			strcmp(orgnum, "-Infinity") == 0)
6195 		{
6196 			/*
6197 			 * Allow 6 characters for the leading sign, the decimal point,
6198 			 * "e", the exponent's sign and two exponent digits.
6199 			 */
6200 			numstr = (char *) palloc(Num.pre + Num.post + 7);
6201 			fill_str(numstr, '#', Num.pre + Num.post + 6);
6202 			*numstr = ' ';
6203 			*(numstr + Num.pre + 1) = '.';
6204 		}
6205 		else if (*orgnum != '-')
6206 		{
6207 			numstr = (char *) palloc(strlen(orgnum) + 2);
6208 			*numstr = ' ';
6209 			strcpy(numstr + 1, orgnum);
6210 		}
6211 		else
6212 		{
6213 			numstr = orgnum;
6214 		}
6215 	}
6216 	else
6217 	{
6218 		int			numstr_pre_len;
6219 		Numeric		val = value;
6220 
6221 		if (IS_MULTI(&Num))
6222 		{
6223 			Numeric		a = int64_to_numeric(10);
6224 			Numeric		b = int64_to_numeric(Num.multi);
6225 
6226 			x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6227 													NumericGetDatum(a),
6228 													NumericGetDatum(b)));
6229 			val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
6230 													  NumericGetDatum(value),
6231 													  NumericGetDatum(x)));
6232 			Num.pre += Num.multi;
6233 		}
6234 
6235 		x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6236 												NumericGetDatum(val),
6237 												Int32GetDatum(Num.post)));
6238 		orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
6239 													 NumericGetDatum(x)));
6240 
6241 		if (*orgnum == '-')
6242 		{
6243 			sign = '-';
6244 			numstr = orgnum + 1;
6245 		}
6246 		else
6247 		{
6248 			sign = '+';
6249 			numstr = orgnum;
6250 		}
6251 
6252 		if ((p = strchr(numstr, '.')))
6253 			numstr_pre_len = p - numstr;
6254 		else
6255 			numstr_pre_len = strlen(numstr);
6256 
6257 		/* needs padding? */
6258 		if (numstr_pre_len < Num.pre)
6259 			out_pre_spaces = Num.pre - numstr_pre_len;
6260 		/* overflowed prefix digit format? */
6261 		else if (numstr_pre_len > Num.pre)
6262 		{
6263 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6264 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6265 			*(numstr + Num.pre) = '.';
6266 		}
6267 	}
6268 
6269 	NUM_TOCHAR_finish;
6270 	PG_RETURN_TEXT_P(result);
6271 }
6272 
6273 /* ---------------
6274  * INT4 to_char()
6275  * ---------------
6276  */
6277 Datum
int4_to_char(PG_FUNCTION_ARGS)6278 int4_to_char(PG_FUNCTION_ARGS)
6279 {
6280 	int32		value = PG_GETARG_INT32(0);
6281 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6282 	NUMDesc		Num;
6283 	FormatNode *format;
6284 	text	   *result;
6285 	bool		shouldFree;
6286 	int			out_pre_spaces = 0,
6287 				sign = 0;
6288 	char	   *numstr,
6289 			   *orgnum;
6290 
6291 	NUM_TOCHAR_prepare;
6292 
6293 	/*
6294 	 * On DateType depend part (int32)
6295 	 */
6296 	if (IS_ROMAN(&Num))
6297 		numstr = int_to_roman(value);
6298 	else if (IS_EEEE(&Num))
6299 	{
6300 		/* we can do it easily because float8 won't lose any precision */
6301 		float8		val = (float8) value;
6302 
6303 		orgnum = (char *) psprintf("%+.*e", Num.post, val);
6304 
6305 		/*
6306 		 * Swap a leading positive sign for a space.
6307 		 */
6308 		if (*orgnum == '+')
6309 			*orgnum = ' ';
6310 
6311 		numstr = orgnum;
6312 	}
6313 	else
6314 	{
6315 		int			numstr_pre_len;
6316 
6317 		if (IS_MULTI(&Num))
6318 		{
6319 			orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6320 														 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
6321 			Num.pre += Num.multi;
6322 		}
6323 		else
6324 		{
6325 			orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6326 														 Int32GetDatum(value)));
6327 		}
6328 
6329 		if (*orgnum == '-')
6330 		{
6331 			sign = '-';
6332 			orgnum++;
6333 		}
6334 		else
6335 			sign = '+';
6336 
6337 		numstr_pre_len = strlen(orgnum);
6338 
6339 		/* post-decimal digits?  Pad out with zeros. */
6340 		if (Num.post)
6341 		{
6342 			numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6343 			strcpy(numstr, orgnum);
6344 			*(numstr + numstr_pre_len) = '.';
6345 			memset(numstr + numstr_pre_len + 1, '0', Num.post);
6346 			*(numstr + numstr_pre_len + Num.post + 1) = '\0';
6347 		}
6348 		else
6349 			numstr = orgnum;
6350 
6351 		/* needs padding? */
6352 		if (numstr_pre_len < Num.pre)
6353 			out_pre_spaces = Num.pre - numstr_pre_len;
6354 		/* overflowed prefix digit format? */
6355 		else if (numstr_pre_len > Num.pre)
6356 		{
6357 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6358 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6359 			*(numstr + Num.pre) = '.';
6360 		}
6361 	}
6362 
6363 	NUM_TOCHAR_finish;
6364 	PG_RETURN_TEXT_P(result);
6365 }
6366 
6367 /* ---------------
6368  * INT8 to_char()
6369  * ---------------
6370  */
6371 Datum
int8_to_char(PG_FUNCTION_ARGS)6372 int8_to_char(PG_FUNCTION_ARGS)
6373 {
6374 	int64		value = PG_GETARG_INT64(0);
6375 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6376 	NUMDesc		Num;
6377 	FormatNode *format;
6378 	text	   *result;
6379 	bool		shouldFree;
6380 	int			out_pre_spaces = 0,
6381 				sign = 0;
6382 	char	   *numstr,
6383 			   *orgnum;
6384 
6385 	NUM_TOCHAR_prepare;
6386 
6387 	/*
6388 	 * On DateType depend part (int32)
6389 	 */
6390 	if (IS_ROMAN(&Num))
6391 	{
6392 		/* Currently don't support int8 conversion to roman... */
6393 		numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
6394 	}
6395 	else if (IS_EEEE(&Num))
6396 	{
6397 		/* to avoid loss of precision, must go via numeric not float8 */
6398 		orgnum = numeric_out_sci(int64_to_numeric(value),
6399 								 Num.post);
6400 
6401 		/*
6402 		 * numeric_out_sci() does not emit a sign for positive numbers.  We
6403 		 * need to add a space in this case so that positive and negative
6404 		 * numbers are aligned.  We don't have to worry about NaN/inf here.
6405 		 */
6406 		if (*orgnum != '-')
6407 		{
6408 			numstr = (char *) palloc(strlen(orgnum) + 2);
6409 			*numstr = ' ';
6410 			strcpy(numstr + 1, orgnum);
6411 		}
6412 		else
6413 		{
6414 			numstr = orgnum;
6415 		}
6416 	}
6417 	else
6418 	{
6419 		int			numstr_pre_len;
6420 
6421 		if (IS_MULTI(&Num))
6422 		{
6423 			double		multi = pow((double) 10, (double) Num.multi);
6424 
6425 			value = DatumGetInt64(DirectFunctionCall2(int8mul,
6426 													  Int64GetDatum(value),
6427 													  DirectFunctionCall1(dtoi8,
6428 																		  Float8GetDatum(multi))));
6429 			Num.pre += Num.multi;
6430 		}
6431 
6432 		orgnum = DatumGetCString(DirectFunctionCall1(int8out,
6433 													 Int64GetDatum(value)));
6434 
6435 		if (*orgnum == '-')
6436 		{
6437 			sign = '-';
6438 			orgnum++;
6439 		}
6440 		else
6441 			sign = '+';
6442 
6443 		numstr_pre_len = strlen(orgnum);
6444 
6445 		/* post-decimal digits?  Pad out with zeros. */
6446 		if (Num.post)
6447 		{
6448 			numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6449 			strcpy(numstr, orgnum);
6450 			*(numstr + numstr_pre_len) = '.';
6451 			memset(numstr + numstr_pre_len + 1, '0', Num.post);
6452 			*(numstr + numstr_pre_len + Num.post + 1) = '\0';
6453 		}
6454 		else
6455 			numstr = orgnum;
6456 
6457 		/* needs padding? */
6458 		if (numstr_pre_len < Num.pre)
6459 			out_pre_spaces = Num.pre - numstr_pre_len;
6460 		/* overflowed prefix digit format? */
6461 		else if (numstr_pre_len > Num.pre)
6462 		{
6463 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6464 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6465 			*(numstr + Num.pre) = '.';
6466 		}
6467 	}
6468 
6469 	NUM_TOCHAR_finish;
6470 	PG_RETURN_TEXT_P(result);
6471 }
6472 
6473 /* -----------------
6474  * FLOAT4 to_char()
6475  * -----------------
6476  */
6477 Datum
float4_to_char(PG_FUNCTION_ARGS)6478 float4_to_char(PG_FUNCTION_ARGS)
6479 {
6480 	float4		value = PG_GETARG_FLOAT4(0);
6481 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6482 	NUMDesc		Num;
6483 	FormatNode *format;
6484 	text	   *result;
6485 	bool		shouldFree;
6486 	int			out_pre_spaces = 0,
6487 				sign = 0;
6488 	char	   *numstr,
6489 			   *p;
6490 
6491 	NUM_TOCHAR_prepare;
6492 
6493 	if (IS_ROMAN(&Num))
6494 		numstr = int_to_roman((int) rint(value));
6495 	else if (IS_EEEE(&Num))
6496 	{
6497 		if (isnan(value) || isinf(value))
6498 		{
6499 			/*
6500 			 * Allow 6 characters for the leading sign, the decimal point,
6501 			 * "e", the exponent's sign and two exponent digits.
6502 			 */
6503 			numstr = (char *) palloc(Num.pre + Num.post + 7);
6504 			fill_str(numstr, '#', Num.pre + Num.post + 6);
6505 			*numstr = ' ';
6506 			*(numstr + Num.pre + 1) = '.';
6507 		}
6508 		else
6509 		{
6510 			numstr = psprintf("%+.*e", Num.post, value);
6511 
6512 			/*
6513 			 * Swap a leading positive sign for a space.
6514 			 */
6515 			if (*numstr == '+')
6516 				*numstr = ' ';
6517 		}
6518 	}
6519 	else
6520 	{
6521 		float4		val = value;
6522 		char	   *orgnum;
6523 		int			numstr_pre_len;
6524 
6525 		if (IS_MULTI(&Num))
6526 		{
6527 			float		multi = pow((double) 10, (double) Num.multi);
6528 
6529 			val = value * multi;
6530 			Num.pre += Num.multi;
6531 		}
6532 
6533 		orgnum = psprintf("%.0f", fabs(val));
6534 		numstr_pre_len = strlen(orgnum);
6535 
6536 		/* adjust post digits to fit max float digits */
6537 		if (numstr_pre_len >= FLT_DIG)
6538 			Num.post = 0;
6539 		else if (numstr_pre_len + Num.post > FLT_DIG)
6540 			Num.post = FLT_DIG - numstr_pre_len;
6541 		orgnum = psprintf("%.*f", Num.post, val);
6542 
6543 		if (*orgnum == '-')
6544 		{						/* < 0 */
6545 			sign = '-';
6546 			numstr = orgnum + 1;
6547 		}
6548 		else
6549 		{
6550 			sign = '+';
6551 			numstr = orgnum;
6552 		}
6553 
6554 		if ((p = strchr(numstr, '.')))
6555 			numstr_pre_len = p - numstr;
6556 		else
6557 			numstr_pre_len = strlen(numstr);
6558 
6559 		/* needs padding? */
6560 		if (numstr_pre_len < Num.pre)
6561 			out_pre_spaces = Num.pre - numstr_pre_len;
6562 		/* overflowed prefix digit format? */
6563 		else if (numstr_pre_len > Num.pre)
6564 		{
6565 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6566 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6567 			*(numstr + Num.pre) = '.';
6568 		}
6569 	}
6570 
6571 	NUM_TOCHAR_finish;
6572 	PG_RETURN_TEXT_P(result);
6573 }
6574 
6575 /* -----------------
6576  * FLOAT8 to_char()
6577  * -----------------
6578  */
6579 Datum
float8_to_char(PG_FUNCTION_ARGS)6580 float8_to_char(PG_FUNCTION_ARGS)
6581 {
6582 	float8		value = PG_GETARG_FLOAT8(0);
6583 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6584 	NUMDesc		Num;
6585 	FormatNode *format;
6586 	text	   *result;
6587 	bool		shouldFree;
6588 	int			out_pre_spaces = 0,
6589 				sign = 0;
6590 	char	   *numstr,
6591 			   *p;
6592 
6593 	NUM_TOCHAR_prepare;
6594 
6595 	if (IS_ROMAN(&Num))
6596 		numstr = int_to_roman((int) rint(value));
6597 	else if (IS_EEEE(&Num))
6598 	{
6599 		if (isnan(value) || isinf(value))
6600 		{
6601 			/*
6602 			 * Allow 6 characters for the leading sign, the decimal point,
6603 			 * "e", the exponent's sign and two exponent digits.
6604 			 */
6605 			numstr = (char *) palloc(Num.pre + Num.post + 7);
6606 			fill_str(numstr, '#', Num.pre + Num.post + 6);
6607 			*numstr = ' ';
6608 			*(numstr + Num.pre + 1) = '.';
6609 		}
6610 		else
6611 		{
6612 			numstr = psprintf("%+.*e", Num.post, value);
6613 
6614 			/*
6615 			 * Swap a leading positive sign for a space.
6616 			 */
6617 			if (*numstr == '+')
6618 				*numstr = ' ';
6619 		}
6620 	}
6621 	else
6622 	{
6623 		float8		val = value;
6624 		char	   *orgnum;
6625 		int			numstr_pre_len;
6626 
6627 		if (IS_MULTI(&Num))
6628 		{
6629 			double		multi = pow((double) 10, (double) Num.multi);
6630 
6631 			val = value * multi;
6632 			Num.pre += Num.multi;
6633 		}
6634 
6635 		orgnum = psprintf("%.0f", fabs(val));
6636 		numstr_pre_len = strlen(orgnum);
6637 
6638 		/* adjust post digits to fit max double digits */
6639 		if (numstr_pre_len >= DBL_DIG)
6640 			Num.post = 0;
6641 		else if (numstr_pre_len + Num.post > DBL_DIG)
6642 			Num.post = DBL_DIG - numstr_pre_len;
6643 		orgnum = psprintf("%.*f", Num.post, val);
6644 
6645 		if (*orgnum == '-')
6646 		{						/* < 0 */
6647 			sign = '-';
6648 			numstr = orgnum + 1;
6649 		}
6650 		else
6651 		{
6652 			sign = '+';
6653 			numstr = orgnum;
6654 		}
6655 
6656 		if ((p = strchr(numstr, '.')))
6657 			numstr_pre_len = p - numstr;
6658 		else
6659 			numstr_pre_len = strlen(numstr);
6660 
6661 		/* needs padding? */
6662 		if (numstr_pre_len < Num.pre)
6663 			out_pre_spaces = Num.pre - numstr_pre_len;
6664 		/* overflowed prefix digit format? */
6665 		else if (numstr_pre_len > Num.pre)
6666 		{
6667 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6668 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6669 			*(numstr + Num.pre) = '.';
6670 		}
6671 	}
6672 
6673 	NUM_TOCHAR_finish;
6674 	PG_RETURN_TEXT_P(result);
6675 }
6676