1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  *	 Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group
8  *
9  *
10  *	 TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  *	 The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  *	 inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  *	 Cache & Memory:
17  *	Routines use (itself) internal cache for format pictures.
18  *
19  *	The cache uses a static buffer and is persistent across transactions.  If
20  *	the format-picture is bigger than the cache buffer, the parser is called
21  *	always.
22  *
23  *	 NOTE for Number version:
24  *	All in this version is implemented as keywords ( => not used
25  *	suffixes), because a format picture is for *one* item (number)
26  *	only. It not is as a timestamp version, where each keyword (can)
27  *	has suffix.
28  *
29  *	 NOTE for Timestamp routines:
30  *	In this module the POSIX 'struct tm' type is *not* used, but rather
31  *	PgSQL type, which has tm_mon based on one (*non* zero) and
32  *	year *not* based on 1900, but is used full year number.
33  *	Module supports AD / BC / AM / PM.
34  *
35  *	Supported types for to_char():
36  *
37  *		Timestamp, Numeric, int4, int8, float4, float8
38  *
39  *	Supported types for reverse conversion:
40  *
41  *		Timestamp	- to_timestamp()
42  *		Date		- to_date()
43  *		Numeric		- to_number()
44  *
45  *
46  *	Karel Zak
47  *
48  * TODO
49  *	- better number building (formatting) / parsing, now it isn't
50  *		  ideal code
51  *	- use Assert()
52  *	- add support for roman number to standard number conversion
53  *	- add support for number spelling
54  *	- add support for string to string formatting (we must be better
55  *	  than Oracle :-),
56  *		to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output	DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>, so include that too.
76  */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81 
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85 
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99 
100 /* ----------
101  * Convenience macros for error handling
102  * ----------
103  *
104  * Two macros below help to handle errors in functions that take
105  * 'bool *have_error' argument.  When this argument is not NULL, it's expected
106  * that function will suppress ereports when possible.  Instead it should
107  * return some default value and set *have_error flag.
108  *
109  * RETURN_ERROR() macro intended to wrap ereport() calls.  When have_error
110  * function argument is not NULL, then instead of ereport'ing we set
111  * *have_error flag and go to on_error label.  It's supposed that jump
112  * resources will be freed and some 'default' value returned.
113  *
114  * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115  * It's supposed to be used for immediate exit from the function on error
116  * after call of another function with 'bool *have_error' argument.
117  */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120 	if (have_error) \
121 	{ \
122 		*have_error = true; \
123 		goto on_error; \
124 	} \
125 	else \
126 	{ \
127 		throw_error; \
128 	} \
129 } while (0)
130 
131 #define CHECK_ERROR \
132 do { \
133 	if (have_error && *have_error) \
134 		goto on_error; \
135 } while (0)
136 
137 /* ----------
138  * Routines flags
139  * ----------
140  */
141 #define DCH_FLAG		0x1		/* DATE-TIME flag	*/
142 #define NUM_FLAG		0x2		/* NUMBER flag	*/
143 #define STD_FLAG		0x4		/* STANDARD flag	*/
144 
145 /* ----------
146  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147  * ----------
148  */
149 #define KeyWord_INDEX_SIZE		('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c)	((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151 
152 /* ----------
153  * Maximal length of one node
154  * ----------
155  */
156 #define DCH_MAX_ITEM_SIZ	   12	/* max localized day name		*/
157 #define NUM_MAX_ITEM_SIZ		8	/* roman number (RN has 15 chars)	*/
158 
159 
160 /* ----------
161  * Format parser structs
162  * ----------
163  */
164 typedef struct
165 {
166 	const char *name;			/* suffix string		*/
167 	int			len,			/* suffix length		*/
168 				id,				/* used in node->suffix */
169 				type;			/* prefix / postfix		*/
170 } KeySuffix;
171 
172 /* ----------
173  * FromCharDateMode
174  * ----------
175  *
176  * This value is used to nominate one of several distinct (and mutually
177  * exclusive) date conventions that a keyword can belong to.
178  */
179 typedef enum
180 {
181 	FROM_CHAR_DATE_NONE = 0,	/* Value does not affect date mode. */
182 	FROM_CHAR_DATE_GREGORIAN,	/* Gregorian (day, month, year) style date */
183 	FROM_CHAR_DATE_ISOWEEK		/* ISO 8601 week date */
184 } FromCharDateMode;
185 
186 typedef struct
187 {
188 	const char *name;
189 	int			len;
190 	int			id;
191 	bool		is_digit;
192 	FromCharDateMode date_mode;
193 } KeyWord;
194 
195 typedef struct
196 {
197 	uint8		type;			/* NODE_TYPE_XXX, see below */
198 	char		character[MAX_MULTIBYTE_CHAR_LEN + 1];	/* if type is CHAR */
199 	uint8		suffix;			/* keyword prefix/suffix code, if any */
200 	const KeyWord *key;			/* if type is ACTION */
201 } FormatNode;
202 
203 #define NODE_TYPE_END		1
204 #define NODE_TYPE_ACTION	2
205 #define NODE_TYPE_CHAR		3
206 #define NODE_TYPE_SEPARATOR	4
207 #define NODE_TYPE_SPACE		5
208 
209 #define SUFFTYPE_PREFIX		1
210 #define SUFFTYPE_POSTFIX	2
211 
212 #define CLOCK_24_HOUR		0
213 #define CLOCK_12_HOUR		1
214 
215 
216 /* ----------
217  * Full months
218  * ----------
219  */
220 static const char *const months_full[] = {
221 	"January", "February", "March", "April", "May", "June", "July",
222 	"August", "September", "October", "November", "December", NULL
223 };
224 
225 static const char *const days_short[] = {
226 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228 
229 /* ----------
230  * AD / BC
231  * ----------
232  *	There is no 0 AD.  Years go from 1 BC to 1 AD, so we make it
233  *	positive and map year == -1 to year zero, and shift all negative
234  *	years up one.  For interval years, we just return the year.
235  */
236 #define ADJUST_YEAR(year, is_interval)	((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237 
238 #define A_D_STR		"A.D."
239 #define a_d_STR		"a.d."
240 #define AD_STR		"AD"
241 #define ad_STR		"ad"
242 
243 #define B_C_STR		"B.C."
244 #define b_c_STR		"b.c."
245 #define BC_STR		"BC"
246 #define bc_STR		"bc"
247 
248 /*
249  * AD / BC strings for seq_search.
250  *
251  * These are given in two variants, a long form with periods and a standard
252  * form without.
253  *
254  * The array is laid out such that matches for AD have an even index, and
255  * matches for BC have an odd index.  So the boolean value for BC is given by
256  * taking the array index of the match, modulo 2.
257  */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260 
261 /* ----------
262  * AM / PM
263  * ----------
264  */
265 #define A_M_STR		"A.M."
266 #define a_m_STR		"a.m."
267 #define AM_STR		"AM"
268 #define am_STR		"am"
269 
270 #define P_M_STR		"P.M."
271 #define p_m_STR		"p.m."
272 #define PM_STR		"PM"
273 #define pm_STR		"pm"
274 
275 /*
276  * AM / PM strings for seq_search.
277  *
278  * These are given in two variants, a long form with periods and a standard
279  * form without.
280  *
281  * The array is laid out such that matches for AM have an even index, and
282  * matches for PM have an odd index.  So the boolean value for PM is given by
283  * taking the array index of the match, modulo 2.
284  */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287 
288 /* ----------
289  * Months in roman-numeral
290  * (Must be in reverse order for seq_search (in FROM_CHAR), because
291  *	'VIII' must have higher precedence than 'V')
292  * ----------
293  */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296 
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299 
300 /* ----------
301  * Roman numbers
302  * ----------
303  */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307 
308 /* ----------
309  * Ordinal postfixes
310  * ----------
311  */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314 
315 /* ----------
316  * Flags & Options:
317  * ----------
318  */
319 #define TH_UPPER		1
320 #define TH_LOWER		2
321 
322 /* ----------
323  * Number description struct
324  * ----------
325  */
326 typedef struct
327 {
328 	int			pre,			/* (count) numbers before decimal */
329 				post,			/* (count) numbers after decimal  */
330 				lsign,			/* want locales sign		  */
331 				flag,			/* number parameters		  */
332 				pre_lsign_num,	/* tmp value for lsign		  */
333 				multi,			/* multiplier for 'V'		  */
334 				zero_start,		/* position of first zero	  */
335 				zero_end,		/* position of last zero	  */
336 				need_locale;	/* needs it locale		  */
337 } NUMDesc;
338 
339 /* ----------
340  * Flags for NUMBER version
341  * ----------
342  */
343 #define NUM_F_DECIMAL		(1 << 1)
344 #define NUM_F_LDECIMAL		(1 << 2)
345 #define NUM_F_ZERO			(1 << 3)
346 #define NUM_F_BLANK			(1 << 4)
347 #define NUM_F_FILLMODE		(1 << 5)
348 #define NUM_F_LSIGN			(1 << 6)
349 #define NUM_F_BRACKET		(1 << 7)
350 #define NUM_F_MINUS			(1 << 8)
351 #define NUM_F_PLUS			(1 << 9)
352 #define NUM_F_ROMAN			(1 << 10)
353 #define NUM_F_MULTI			(1 << 11)
354 #define NUM_F_PLUS_POST		(1 << 12)
355 #define NUM_F_MINUS_POST	(1 << 13)
356 #define NUM_F_EEEE			(1 << 14)
357 
358 #define NUM_LSIGN_PRE	(-1)
359 #define NUM_LSIGN_POST	1
360 #define NUM_LSIGN_NONE	0
361 
362 /* ----------
363  * Tests
364  * ----------
365  */
366 #define IS_DECIMAL(_f)	((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f)	((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f)	((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f)	((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f)	((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f)	((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f)	((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f)		((_f)->flag & NUM_F_EEEE)
378 
379 /* ----------
380  * Format picture cache
381  *
382  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384  *
385  * For simplicity, the cache entries are fixed-size, so they allow for the
386  * worst case of a FormatNode for each byte in the picture string.
387  *
388  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390  * we don't waste too much space by palloc'ing them individually.  Be sure
391  * to adjust those macros if you add fields to those structs.
392  *
393  * The max number of entries in each cache is DCH_CACHE_ENTRIES
394  * resp. NUM_CACHE_ENTRIES.
395  * ----------
396  */
397 #define DCH_CACHE_OVERHEAD \
398 	MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400 	MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401 
402 #define DCH_CACHE_SIZE \
403 	((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405 	((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406 
407 #define DCH_CACHE_ENTRIES	20
408 #define NUM_CACHE_ENTRIES	20
409 
410 typedef struct
411 {
412 	FormatNode	format[DCH_CACHE_SIZE + 1];
413 	char		str[DCH_CACHE_SIZE + 1];
414 	bool		std;
415 	bool		valid;
416 	int			age;
417 } DCHCacheEntry;
418 
419 typedef struct
420 {
421 	FormatNode	format[NUM_CACHE_SIZE + 1];
422 	char		str[NUM_CACHE_SIZE + 1];
423 	bool		valid;
424 	int			age;
425 	NUMDesc		Num;
426 } NUMCacheEntry;
427 
428 /* global cache for date/time format pictures */
429 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
430 static int	n_DCHCache = 0;		/* current number of entries */
431 static int	DCHCounter = 0;		/* aging-event counter */
432 
433 /* global cache for number format pictures */
434 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
435 static int	n_NUMCache = 0;		/* current number of entries */
436 static int	NUMCounter = 0;		/* aging-event counter */
437 
438 /* ----------
439  * For char->date/time conversion
440  * ----------
441  */
442 typedef struct
443 {
444 	FromCharDateMode mode;
445 	int			hh,
446 				pm,
447 				mi,
448 				ss,
449 				ssss,
450 				d,				/* stored as 1-7, Sunday = 1, 0 means missing */
451 				dd,
452 				ddd,
453 				mm,
454 				ms,
455 				year,
456 				bc,
457 				ww,
458 				w,
459 				cc,
460 				j,
461 				us,
462 				yysz,			/* is it YY or YYYY ? */
463 				clock,			/* 12 or 24 hour clock? */
464 				tzsign,			/* +1, -1 or 0 if timezone info is absent */
465 				tzh,
466 				tzm,
467 				ff;				/* fractional precision */
468 } TmFromChar;
469 
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471 
472 /* ----------
473  * Debug
474  * ----------
475  */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478 		elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479 			(_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480 			(_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481 			(_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482 			(_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484 		elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485 			(_X)->tm_sec, (_X)->tm_year,\
486 			(_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487 			(_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492 
493 /* ----------
494  * Datetime to char conversion
495  * ----------
496  */
497 typedef struct TmToChar
498 {
499 	struct pg_tm tm;			/* classic 'tm' struct */
500 	fsec_t		fsec;			/* fractional seconds */
501 	const char *tzn;			/* timezone */
502 } TmToChar;
503 
504 #define tmtcTm(_X)	(&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X)	((_X)->fsec)
507 
508 #define ZERO_tm(_X) \
509 do {	\
510 	(_X)->tm_sec  = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511 	(_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512 	(_X)->tm_mday = (_X)->tm_mon  = 1; \
513 	(_X)->tm_zone = NULL; \
514 } while(0)
515 
516 #define ZERO_tmtc(_X) \
517 do { \
518 	ZERO_tm( tmtcTm(_X) ); \
519 	tmtcFsec(_X) = 0; \
520 	tmtcTzn(_X) = NULL; \
521 } while(0)
522 
523 /*
524  *	to_char(time) appears to to_char() as an interval, so this check
525  *	is really for interval and time data types.
526  */
527 #define INVALID_FOR_INTERVAL  \
528 do { \
529 	if (is_interval) \
530 		ereport(ERROR, \
531 				(errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532 				 errmsg("invalid format specification for an interval value"), \
533 				 errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535 
536 /*****************************************************************************
537  *			KeyWord definitions
538  *****************************************************************************/
539 
540 /* ----------
541  * Suffixes (FormatNode.suffix is an OR of these codes)
542  * ----------
543  */
544 #define DCH_S_FM	0x01
545 #define DCH_S_TH	0x02
546 #define DCH_S_th	0x04
547 #define DCH_S_SP	0x08
548 #define DCH_S_TM	0x10
549 
550 /* ----------
551  * Suffix tests
552  * ----------
553  */
554 #define S_THth(_s)	((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s)	(((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s)	(((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s)	(((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558 
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s)	(((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s)	(((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s)	(((_s) & DCH_S_TM) ? 1 : 0)
563 
564 /* ----------
565  * Suffixes definition for DATE-TIME TO/FROM CHAR
566  * ----------
567  */
568 #define TM_SUFFIX_LEN	2
569 
570 static const KeySuffix DCH_suff[] = {
571 	{"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572 	{"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573 	{"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574 	{"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575 	{"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576 	{"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577 	{"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578 	/* last */
579 	{NULL, 0, 0, 0}
580 };
581 
582 
583 /* ----------
584  * Format-pictures (KeyWord).
585  *
586  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587  *		  complicated -to-> easy:
588  *
589  *	(example: "DDD","DD","Day","D" )
590  *
591  * (this specific sort needs the algorithm for sequential search for strings,
592  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593  * or "HH12"? You must first try "HH12", because "HH" is in string, but
594  * it is not good.
595  *
596  * (!)
597  *	 - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598  * (!)
599  *
600  * For fast search is used the 'int index[]', index is ascii table from position
601  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602  * position or -1 if char is not used in the KeyWord. Search example for
603  * string "MM":
604  *	1)	see in index to index['M' - 32],
605  *	2)	take keywords position (enum DCH_MI) from index
606  *	3)	run sequential search in keywords[] from this position
607  *
608  * ----------
609  */
610 
611 typedef enum
612 {
613 	DCH_A_D,
614 	DCH_A_M,
615 	DCH_AD,
616 	DCH_AM,
617 	DCH_B_C,
618 	DCH_BC,
619 	DCH_CC,
620 	DCH_DAY,
621 	DCH_DDD,
622 	DCH_DD,
623 	DCH_DY,
624 	DCH_Day,
625 	DCH_Dy,
626 	DCH_D,
627 	DCH_FF1,
628 	DCH_FF2,
629 	DCH_FF3,
630 	DCH_FF4,
631 	DCH_FF5,
632 	DCH_FF6,
633 	DCH_FX,						/* global suffix */
634 	DCH_HH24,
635 	DCH_HH12,
636 	DCH_HH,
637 	DCH_IDDD,
638 	DCH_ID,
639 	DCH_IW,
640 	DCH_IYYY,
641 	DCH_IYY,
642 	DCH_IY,
643 	DCH_I,
644 	DCH_J,
645 	DCH_MI,
646 	DCH_MM,
647 	DCH_MONTH,
648 	DCH_MON,
649 	DCH_MS,
650 	DCH_Month,
651 	DCH_Mon,
652 	DCH_OF,
653 	DCH_P_M,
654 	DCH_PM,
655 	DCH_Q,
656 	DCH_RM,
657 	DCH_SSSSS,
658 	DCH_SSSS,
659 	DCH_SS,
660 	DCH_TZH,
661 	DCH_TZM,
662 	DCH_TZ,
663 	DCH_US,
664 	DCH_WW,
665 	DCH_W,
666 	DCH_Y_YYY,
667 	DCH_YYYY,
668 	DCH_YYY,
669 	DCH_YY,
670 	DCH_Y,
671 	DCH_a_d,
672 	DCH_a_m,
673 	DCH_ad,
674 	DCH_am,
675 	DCH_b_c,
676 	DCH_bc,
677 	DCH_cc,
678 	DCH_day,
679 	DCH_ddd,
680 	DCH_dd,
681 	DCH_dy,
682 	DCH_d,
683 	DCH_ff1,
684 	DCH_ff2,
685 	DCH_ff3,
686 	DCH_ff4,
687 	DCH_ff5,
688 	DCH_ff6,
689 	DCH_fx,
690 	DCH_hh24,
691 	DCH_hh12,
692 	DCH_hh,
693 	DCH_iddd,
694 	DCH_id,
695 	DCH_iw,
696 	DCH_iyyy,
697 	DCH_iyy,
698 	DCH_iy,
699 	DCH_i,
700 	DCH_j,
701 	DCH_mi,
702 	DCH_mm,
703 	DCH_month,
704 	DCH_mon,
705 	DCH_ms,
706 	DCH_p_m,
707 	DCH_pm,
708 	DCH_q,
709 	DCH_rm,
710 	DCH_sssss,
711 	DCH_ssss,
712 	DCH_ss,
713 	DCH_tz,
714 	DCH_us,
715 	DCH_ww,
716 	DCH_w,
717 	DCH_y_yyy,
718 	DCH_yyyy,
719 	DCH_yyy,
720 	DCH_yy,
721 	DCH_y,
722 
723 	/* last */
724 	_DCH_last_
725 }			DCH_poz;
726 
727 typedef enum
728 {
729 	NUM_COMMA,
730 	NUM_DEC,
731 	NUM_0,
732 	NUM_9,
733 	NUM_B,
734 	NUM_C,
735 	NUM_D,
736 	NUM_E,
737 	NUM_FM,
738 	NUM_G,
739 	NUM_L,
740 	NUM_MI,
741 	NUM_PL,
742 	NUM_PR,
743 	NUM_RN,
744 	NUM_SG,
745 	NUM_SP,
746 	NUM_S,
747 	NUM_TH,
748 	NUM_V,
749 	NUM_b,
750 	NUM_c,
751 	NUM_d,
752 	NUM_e,
753 	NUM_fm,
754 	NUM_g,
755 	NUM_l,
756 	NUM_mi,
757 	NUM_pl,
758 	NUM_pr,
759 	NUM_rn,
760 	NUM_sg,
761 	NUM_sp,
762 	NUM_s,
763 	NUM_th,
764 	NUM_v,
765 
766 	/* last */
767 	_NUM_last_
768 }			NUM_poz;
769 
770 /* ----------
771  * KeyWords for DATE-TIME version
772  * ----------
773  */
774 static const KeyWord DCH_keywords[] = {
775 /*	name, len, id, is_digit, date_mode */
776 	{"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE},	/* A */
777 	{"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778 	{"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779 	{"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780 	{"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE},	/* B */
781 	{"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782 	{"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE},	/* C */
783 	{"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE},	/* D */
784 	{"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785 	{"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786 	{"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787 	{"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788 	{"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789 	{"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790 	{"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE},	/* F */
791 	{"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792 	{"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793 	{"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794 	{"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795 	{"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796 	{"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797 	{"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE},	/* H */
798 	{"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799 	{"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800 	{"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK},	/* I */
801 	{"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802 	{"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803 	{"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804 	{"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805 	{"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806 	{"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807 	{"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808 	{"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE},	/* M */
809 	{"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810 	{"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811 	{"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812 	{"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813 	{"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814 	{"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815 	{"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE},	/* O */
816 	{"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE},	/* P */
817 	{"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818 	{"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819 	{"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820 	{"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE},	/* S */
821 	{"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822 	{"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823 	{"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE},	/* T */
824 	{"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825 	{"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826 	{"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE},	/* U */
827 	{"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN},	/* W */
828 	{"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829 	{"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN},	/* Y */
830 	{"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831 	{"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832 	{"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833 	{"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834 	{"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE},	/* a */
835 	{"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836 	{"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837 	{"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838 	{"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE},	/* b */
839 	{"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840 	{"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE},	/* c */
841 	{"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE},	/* d */
842 	{"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843 	{"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844 	{"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845 	{"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846 	{"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE},	/* f */
847 	{"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848 	{"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849 	{"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850 	{"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851 	{"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852 	{"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853 	{"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE},	/* h */
854 	{"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855 	{"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856 	{"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK},	/* i */
857 	{"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858 	{"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859 	{"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860 	{"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861 	{"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862 	{"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863 	{"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864 	{"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE},	/* m */
865 	{"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866 	{"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867 	{"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868 	{"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869 	{"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE},	/* p */
870 	{"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871 	{"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872 	{"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873 	{"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE},	/* s */
874 	{"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875 	{"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876 	{"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},	/* t */
877 	{"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE},	/* u */
878 	{"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN},	/* w */
879 	{"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880 	{"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN},	/* y */
881 	{"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882 	{"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883 	{"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884 	{"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885 
886 	/* last */
887 	{NULL, 0, 0, 0, 0}
888 };
889 
890 /* ----------
891  * KeyWords for NUMBER version
892  *
893  * The is_digit and date_mode fields are not relevant here.
894  * ----------
895  */
896 static const KeyWord NUM_keywords[] = {
897 /*	name, len, id			is in Index */
898 	{",", 1, NUM_COMMA},		/* , */
899 	{".", 1, NUM_DEC},			/* . */
900 	{"0", 1, NUM_0},			/* 0 */
901 	{"9", 1, NUM_9},			/* 9 */
902 	{"B", 1, NUM_B},			/* B */
903 	{"C", 1, NUM_C},			/* C */
904 	{"D", 1, NUM_D},			/* D */
905 	{"EEEE", 4, NUM_E},			/* E */
906 	{"FM", 2, NUM_FM},			/* F */
907 	{"G", 1, NUM_G},			/* G */
908 	{"L", 1, NUM_L},			/* L */
909 	{"MI", 2, NUM_MI},			/* M */
910 	{"PL", 2, NUM_PL},			/* P */
911 	{"PR", 2, NUM_PR},
912 	{"RN", 2, NUM_RN},			/* R */
913 	{"SG", 2, NUM_SG},			/* S */
914 	{"SP", 2, NUM_SP},
915 	{"S", 1, NUM_S},
916 	{"TH", 2, NUM_TH},			/* T */
917 	{"V", 1, NUM_V},			/* V */
918 	{"b", 1, NUM_B},			/* b */
919 	{"c", 1, NUM_C},			/* c */
920 	{"d", 1, NUM_D},			/* d */
921 	{"eeee", 4, NUM_E},			/* e */
922 	{"fm", 2, NUM_FM},			/* f */
923 	{"g", 1, NUM_G},			/* g */
924 	{"l", 1, NUM_L},			/* l */
925 	{"mi", 2, NUM_MI},			/* m */
926 	{"pl", 2, NUM_PL},			/* p */
927 	{"pr", 2, NUM_PR},
928 	{"rn", 2, NUM_rn},			/* r */
929 	{"sg", 2, NUM_SG},			/* s */
930 	{"sp", 2, NUM_SP},
931 	{"s", 1, NUM_S},
932 	{"th", 2, NUM_th},			/* t */
933 	{"v", 1, NUM_V},			/* v */
934 
935 	/* last */
936 	{NULL, 0, 0}
937 };
938 
939 
940 /* ----------
941  * KeyWords index for DATE-TIME version
942  * ----------
943  */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0	1	2	3	4	5	6	7	8	9
947 */
948 	/*---- first 0..31 chars are skipped ----*/
949 
950 	-1, -1, -1, -1, -1, -1, -1, -1,
951 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953 	-1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954 	DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
955 	DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
956 	-1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957 	DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958 	-1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959 	-1, DCH_y_yyy, -1, -1, -1, -1
960 
961 	/*---- chars over 126 are skipped ----*/
962 };
963 
964 /* ----------
965  * KeyWords index for NUMBER version
966  * ----------
967  */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0	1	2	3	4	5	6	7	8	9
971 */
972 	/*---- first 0..31 chars are skipped ----*/
973 
974 	-1, -1, -1, -1, -1, -1, -1, -1,
975 	-1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976 	-1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977 	-1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978 	NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979 	NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980 	-1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981 	NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982 	-1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983 	-1, -1, -1, -1, -1, -1
984 
985 	/*---- chars over 126 are skipped ----*/
986 };
987 
988 /* ----------
989  * Number processor struct
990  * ----------
991  */
992 typedef struct NUMProc
993 {
994 	bool		is_to_char;
995 	NUMDesc    *Num;			/* number description		*/
996 
997 	int			sign,			/* '-' or '+'			*/
998 				sign_wrote,		/* was sign write		*/
999 				num_count,		/* number of write digits	*/
1000 				num_in,			/* is inside number		*/
1001 				num_curr,		/* current position in number	*/
1002 				out_pre_spaces, /* spaces before first digit	*/
1003 
1004 				read_dec,		/* to_number - was read dec. point	*/
1005 				read_post,		/* to_number - number of dec. digit */
1006 				read_pre;		/* to_number - number non-dec. digit */
1007 
1008 	char	   *number,			/* string with number	*/
1009 			   *number_p,		/* pointer to current number position */
1010 			   *inout,			/* in / out buffer	*/
1011 			   *inout_p,		/* pointer to current inout position */
1012 			   *last_relevant,	/* last relevant number after decimal point */
1013 
1014 			   *L_negative_sign,	/* Locale */
1015 			   *L_positive_sign,
1016 			   *decimal,
1017 			   *L_thousands_sep,
1018 			   *L_currency_symbol;
1019 } NUMProc;
1020 
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED	0x01
1023 #define DCH_TIMED	0x02
1024 #define DCH_ZONED	0x04
1025 
1026 /* ----------
1027  * Functions
1028  * ----------
1029  */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031 									   const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036 						 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037 
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039 						TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041 						  Oid collid, bool std, bool *have_error);
1042 
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047 
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int	adjust_partial_year_to_2020(int year);
1051 static int	strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053 							   bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055 							  bool *have_error);
1056 static int	from_char_parse_int_len(int *dest, const char **src, const int len,
1057 									FormatNode *node, bool *have_error);
1058 static int	from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059 								bool *have_error);
1060 static int	seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int	seq_search_localized(const char *name, char **array, int *len,
1062 								 Oid collid);
1063 static int	from_char_seq_search(int *dest, const char **src,
1064 								 const char *const *array,
1065 								 char **localized_array, Oid collid,
1066 								 FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068 							struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069 							uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078 						   char *number, int input_len, int to_char_out_pre_spaces,
1079 						   int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086 
1087 
1088 /* ----------
1089  * Fast sequential search, use index for data selection which
1090  * go to seq. cycle (it is very fast for unwanted strings)
1091  * (can't be used binary search in format parsing)
1092  * ----------
1093  */
1094 static const KeyWord *
index_seq_search(const char * str,const KeyWord * kw,const int * index)1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097 	int			poz;
1098 
1099 	if (!KeyWord_INDEX_FILTER(*str))
1100 		return NULL;
1101 
1102 	if ((poz = *(index + (*str - ' '))) > -1)
1103 	{
1104 		const KeyWord *k = kw + poz;
1105 
1106 		do
1107 		{
1108 			if (strncmp(str, k->name, k->len) == 0)
1109 				return k;
1110 			k++;
1111 			if (!k->name)
1112 				return NULL;
1113 		} while (*str == *k->name);
1114 	}
1115 	return NULL;
1116 }
1117 
1118 static const KeySuffix *
suff_search(const char * str,const KeySuffix * suf,int type)1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121 	const KeySuffix *s;
1122 
1123 	for (s = suf; s->name != NULL; s++)
1124 	{
1125 		if (s->type != type)
1126 			continue;
1127 
1128 		if (strncmp(str, s->name, s->len) == 0)
1129 			return s;
1130 	}
1131 	return NULL;
1132 }
1133 
1134 static bool
is_separator_char(const char * str)1135 is_separator_char(const char *str)
1136 {
1137 	/* ASCII printable character, but not letter or digit */
1138 	return (*str > 0x20 && *str < 0x7F &&
1139 			!(*str >= 'A' && *str <= 'Z') &&
1140 			!(*str >= 'a' && *str <= 'z') &&
1141 			!(*str >= '0' && *str <= '9'));
1142 }
1143 
1144 /* ----------
1145  * Prepare NUMDesc (number description struct) via FormatNode struct
1146  * ----------
1147  */
1148 static void
NUMDesc_prepare(NUMDesc * num,FormatNode * n)1149 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1150 {
1151 	if (n->type != NODE_TYPE_ACTION)
1152 		return;
1153 
1154 	if (IS_EEEE(num) && n->key->id != NUM_E)
1155 		ereport(ERROR,
1156 				(errcode(ERRCODE_SYNTAX_ERROR),
1157 				 errmsg("\"EEEE\" must be the last pattern used")));
1158 
1159 	switch (n->key->id)
1160 	{
1161 		case NUM_9:
1162 			if (IS_BRACKET(num))
1163 				ereport(ERROR,
1164 						(errcode(ERRCODE_SYNTAX_ERROR),
1165 						 errmsg("\"9\" must be ahead of \"PR\"")));
1166 			if (IS_MULTI(num))
1167 			{
1168 				++num->multi;
1169 				break;
1170 			}
1171 			if (IS_DECIMAL(num))
1172 				++num->post;
1173 			else
1174 				++num->pre;
1175 			break;
1176 
1177 		case NUM_0:
1178 			if (IS_BRACKET(num))
1179 				ereport(ERROR,
1180 						(errcode(ERRCODE_SYNTAX_ERROR),
1181 						 errmsg("\"0\" must be ahead of \"PR\"")));
1182 			if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183 			{
1184 				num->flag |= NUM_F_ZERO;
1185 				num->zero_start = num->pre + 1;
1186 			}
1187 			if (!IS_DECIMAL(num))
1188 				++num->pre;
1189 			else
1190 				++num->post;
1191 
1192 			num->zero_end = num->pre + num->post;
1193 			break;
1194 
1195 		case NUM_B:
1196 			if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197 				num->flag |= NUM_F_BLANK;
1198 			break;
1199 
1200 		case NUM_D:
1201 			num->flag |= NUM_F_LDECIMAL;
1202 			num->need_locale = true;
1203 			/* FALLTHROUGH */
1204 		case NUM_DEC:
1205 			if (IS_DECIMAL(num))
1206 				ereport(ERROR,
1207 						(errcode(ERRCODE_SYNTAX_ERROR),
1208 						 errmsg("multiple decimal points")));
1209 			if (IS_MULTI(num))
1210 				ereport(ERROR,
1211 						(errcode(ERRCODE_SYNTAX_ERROR),
1212 						 errmsg("cannot use \"V\" and decimal point together")));
1213 			num->flag |= NUM_F_DECIMAL;
1214 			break;
1215 
1216 		case NUM_FM:
1217 			num->flag |= NUM_F_FILLMODE;
1218 			break;
1219 
1220 		case NUM_S:
1221 			if (IS_LSIGN(num))
1222 				ereport(ERROR,
1223 						(errcode(ERRCODE_SYNTAX_ERROR),
1224 						 errmsg("cannot use \"S\" twice")));
1225 			if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226 				ereport(ERROR,
1227 						(errcode(ERRCODE_SYNTAX_ERROR),
1228 						 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229 			if (!IS_DECIMAL(num))
1230 			{
1231 				num->lsign = NUM_LSIGN_PRE;
1232 				num->pre_lsign_num = num->pre;
1233 				num->need_locale = true;
1234 				num->flag |= NUM_F_LSIGN;
1235 			}
1236 			else if (num->lsign == NUM_LSIGN_NONE)
1237 			{
1238 				num->lsign = NUM_LSIGN_POST;
1239 				num->need_locale = true;
1240 				num->flag |= NUM_F_LSIGN;
1241 			}
1242 			break;
1243 
1244 		case NUM_MI:
1245 			if (IS_LSIGN(num))
1246 				ereport(ERROR,
1247 						(errcode(ERRCODE_SYNTAX_ERROR),
1248 						 errmsg("cannot use \"S\" and \"MI\" together")));
1249 			num->flag |= NUM_F_MINUS;
1250 			if (IS_DECIMAL(num))
1251 				num->flag |= NUM_F_MINUS_POST;
1252 			break;
1253 
1254 		case NUM_PL:
1255 			if (IS_LSIGN(num))
1256 				ereport(ERROR,
1257 						(errcode(ERRCODE_SYNTAX_ERROR),
1258 						 errmsg("cannot use \"S\" and \"PL\" together")));
1259 			num->flag |= NUM_F_PLUS;
1260 			if (IS_DECIMAL(num))
1261 				num->flag |= NUM_F_PLUS_POST;
1262 			break;
1263 
1264 		case NUM_SG:
1265 			if (IS_LSIGN(num))
1266 				ereport(ERROR,
1267 						(errcode(ERRCODE_SYNTAX_ERROR),
1268 						 errmsg("cannot use \"S\" and \"SG\" together")));
1269 			num->flag |= NUM_F_MINUS;
1270 			num->flag |= NUM_F_PLUS;
1271 			break;
1272 
1273 		case NUM_PR:
1274 			if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275 				ereport(ERROR,
1276 						(errcode(ERRCODE_SYNTAX_ERROR),
1277 						 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278 			num->flag |= NUM_F_BRACKET;
1279 			break;
1280 
1281 		case NUM_rn:
1282 		case NUM_RN:
1283 			num->flag |= NUM_F_ROMAN;
1284 			break;
1285 
1286 		case NUM_L:
1287 		case NUM_G:
1288 			num->need_locale = true;
1289 			break;
1290 
1291 		case NUM_V:
1292 			if (IS_DECIMAL(num))
1293 				ereport(ERROR,
1294 						(errcode(ERRCODE_SYNTAX_ERROR),
1295 						 errmsg("cannot use \"V\" and decimal point together")));
1296 			num->flag |= NUM_F_MULTI;
1297 			break;
1298 
1299 		case NUM_E:
1300 			if (IS_EEEE(num))
1301 				ereport(ERROR,
1302 						(errcode(ERRCODE_SYNTAX_ERROR),
1303 						 errmsg("cannot use \"EEEE\" twice")));
1304 			if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305 				IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306 				IS_ROMAN(num) || IS_MULTI(num))
1307 				ereport(ERROR,
1308 						(errcode(ERRCODE_SYNTAX_ERROR),
1309 						 errmsg("\"EEEE\" is incompatible with other formats"),
1310 						 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311 			num->flag |= NUM_F_EEEE;
1312 			break;
1313 	}
1314 }
1315 
1316 /* ----------
1317  * Format parser, search small keywords and keyword's suffixes, and make
1318  * format-node tree.
1319  *
1320  * for DATE-TIME & NUMBER version
1321  * ----------
1322  */
1323 static void
parse_format(FormatNode * node,const char * str,const KeyWord * kw,const KeySuffix * suf,const int * index,uint32 flags,NUMDesc * Num)1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325 			 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327 	FormatNode *n;
1328 
1329 #ifdef DEBUG_TO_FROM_CHAR
1330 	elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332 
1333 	n = node;
1334 
1335 	while (*str)
1336 	{
1337 		int			suffix = 0;
1338 		const KeySuffix *s;
1339 
1340 		/*
1341 		 * Prefix
1342 		 */
1343 		if ((flags & DCH_FLAG) &&
1344 			(s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345 		{
1346 			suffix |= s->id;
1347 			if (s->len)
1348 				str += s->len;
1349 		}
1350 
1351 		/*
1352 		 * Keyword
1353 		 */
1354 		if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355 		{
1356 			n->type = NODE_TYPE_ACTION;
1357 			n->suffix = suffix;
1358 			if (n->key->len)
1359 				str += n->key->len;
1360 
1361 			/*
1362 			 * NUM version: Prepare global NUMDesc struct
1363 			 */
1364 			if (flags & NUM_FLAG)
1365 				NUMDesc_prepare(Num, n);
1366 
1367 			/*
1368 			 * Postfix
1369 			 */
1370 			if ((flags & DCH_FLAG) && *str &&
1371 				(s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372 			{
1373 				n->suffix |= s->id;
1374 				if (s->len)
1375 					str += s->len;
1376 			}
1377 
1378 			n++;
1379 		}
1380 		else if (*str)
1381 		{
1382 			int			chlen;
1383 
1384 			if ((flags & STD_FLAG) && *str != '"')
1385 			{
1386 				/*
1387 				 * Standard mode, allow only following separators: "-./,':; ".
1388 				 * However, we support double quotes even in standard mode
1389 				 * (see below).  This is our extension of standard mode.
1390 				 */
1391 				if (strchr("-./,':; ", *str) == NULL)
1392 					ereport(ERROR,
1393 							(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1394 							 errmsg("invalid datetime format separator: \"%s\"",
1395 									pnstrdup(str, pg_mblen(str)))));
1396 
1397 				if (*str == ' ')
1398 					n->type = NODE_TYPE_SPACE;
1399 				else
1400 					n->type = NODE_TYPE_SEPARATOR;
1401 
1402 				n->character[0] = *str;
1403 				n->character[1] = '\0';
1404 				n->key = NULL;
1405 				n->suffix = 0;
1406 				n++;
1407 				str++;
1408 			}
1409 			else if (*str == '"')
1410 			{
1411 				/*
1412 				 * Process double-quoted literal string, if any
1413 				 */
1414 				str++;
1415 				while (*str)
1416 				{
1417 					if (*str == '"')
1418 					{
1419 						str++;
1420 						break;
1421 					}
1422 					/* backslash quotes the next character, if any */
1423 					if (*str == '\\' && *(str + 1))
1424 						str++;
1425 					chlen = pg_mblen(str);
1426 					n->type = NODE_TYPE_CHAR;
1427 					memcpy(n->character, str, chlen);
1428 					n->character[chlen] = '\0';
1429 					n->key = NULL;
1430 					n->suffix = 0;
1431 					n++;
1432 					str += chlen;
1433 				}
1434 			}
1435 			else
1436 			{
1437 				/*
1438 				 * Outside double-quoted strings, backslash is only special if
1439 				 * it immediately precedes a double quote.
1440 				 */
1441 				if (*str == '\\' && *(str + 1) == '"')
1442 					str++;
1443 				chlen = pg_mblen(str);
1444 
1445 				if ((flags & DCH_FLAG) && is_separator_char(str))
1446 					n->type = NODE_TYPE_SEPARATOR;
1447 				else if (isspace((unsigned char) *str))
1448 					n->type = NODE_TYPE_SPACE;
1449 				else
1450 					n->type = NODE_TYPE_CHAR;
1451 
1452 				memcpy(n->character, str, chlen);
1453 				n->character[chlen] = '\0';
1454 				n->key = NULL;
1455 				n->suffix = 0;
1456 				n++;
1457 				str += chlen;
1458 			}
1459 		}
1460 	}
1461 
1462 	n->type = NODE_TYPE_END;
1463 	n->suffix = 0;
1464 }
1465 
1466 /* ----------
1467  * DEBUG: Dump the FormatNode Tree (debug)
1468  * ----------
1469  */
1470 #ifdef DEBUG_TO_FROM_CHAR
1471 
1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1473 #define DUMP_FM(_suf)	(S_FM(_suf) ? "FM" : " ")
1474 
1475 static void
dump_node(FormatNode * node,int max)1476 dump_node(FormatNode *node, int max)
1477 {
1478 	FormatNode *n;
1479 	int			a;
1480 
1481 	elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1482 
1483 	for (a = 0, n = node; a <= max; n++, a++)
1484 	{
1485 		if (n->type == NODE_TYPE_ACTION)
1486 			elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1487 				 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1488 		else if (n->type == NODE_TYPE_CHAR)
1489 			elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1490 				 a, n->character);
1491 		else if (n->type == NODE_TYPE_END)
1492 		{
1493 			elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1494 			return;
1495 		}
1496 		else
1497 			elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1498 	}
1499 }
1500 #endif							/* DEBUG */
1501 
1502 /*****************************************************************************
1503  *			Private utils
1504  *****************************************************************************/
1505 
1506 /* ----------
1507  * Return ST/ND/RD/TH for simple (1..9) numbers
1508  * type --> 0 upper, 1 lower
1509  * ----------
1510  */
1511 static const char *
get_th(char * num,int type)1512 get_th(char *num, int type)
1513 {
1514 	int			len = strlen(num),
1515 				last,
1516 				seclast;
1517 
1518 	last = *(num + (len - 1));
1519 	if (!isdigit((unsigned char) last))
1520 		ereport(ERROR,
1521 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1522 				 errmsg("\"%s\" is not a number", num)));
1523 
1524 	/*
1525 	 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1526 	 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1527 	 */
1528 	if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1529 		last = 0;
1530 
1531 	switch (last)
1532 	{
1533 		case '1':
1534 			if (type == TH_UPPER)
1535 				return numTH[0];
1536 			return numth[0];
1537 		case '2':
1538 			if (type == TH_UPPER)
1539 				return numTH[1];
1540 			return numth[1];
1541 		case '3':
1542 			if (type == TH_UPPER)
1543 				return numTH[2];
1544 			return numth[2];
1545 		default:
1546 			if (type == TH_UPPER)
1547 				return numTH[3];
1548 			return numth[3];
1549 	}
1550 }
1551 
1552 /* ----------
1553  * Convert string-number to ordinal string-number
1554  * type --> 0 upper, 1 lower
1555  * ----------
1556  */
1557 static char *
str_numth(char * dest,char * num,int type)1558 str_numth(char *dest, char *num, int type)
1559 {
1560 	if (dest != num)
1561 		strcpy(dest, num);
1562 	strcat(dest, get_th(num, type));
1563 	return dest;
1564 }
1565 
1566 /*****************************************************************************
1567  *			upper/lower/initcap functions
1568  *****************************************************************************/
1569 
1570 #ifdef USE_ICU
1571 
1572 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1573 									 const UChar *src, int32_t srcLength,
1574 									 const char *locale,
1575 									 UErrorCode *pErrorCode);
1576 
1577 static int32_t
icu_convert_case(ICU_Convert_Func func,pg_locale_t mylocale,UChar ** buff_dest,UChar * buff_source,int32_t len_source)1578 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1579 				 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1580 {
1581 	UErrorCode	status;
1582 	int32_t		len_dest;
1583 
1584 	len_dest = len_source;		/* try first with same length */
1585 	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
1586 	status = U_ZERO_ERROR;
1587 	len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1588 					mylocale->info.icu.locale, &status);
1589 	if (status == U_BUFFER_OVERFLOW_ERROR)
1590 	{
1591 		/* try again with adjusted length */
1592 		pfree(*buff_dest);
1593 		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
1594 		status = U_ZERO_ERROR;
1595 		len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1596 						mylocale->info.icu.locale, &status);
1597 	}
1598 	if (U_FAILURE(status))
1599 		ereport(ERROR,
1600 				(errmsg("case conversion failed: %s", u_errorName(status))));
1601 	return len_dest;
1602 }
1603 
1604 static int32_t
u_strToTitle_default_BI(UChar * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,const char * locale,UErrorCode * pErrorCode)1605 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1606 						const UChar *src, int32_t srcLength,
1607 						const char *locale,
1608 						UErrorCode *pErrorCode)
1609 {
1610 	return u_strToTitle(dest, destCapacity, src, srcLength,
1611 						NULL, locale, pErrorCode);
1612 }
1613 
1614 #endif							/* USE_ICU */
1615 
1616 /*
1617  * If the system provides the needed functions for wide-character manipulation
1618  * (which are all standardized by C99), then we implement upper/lower/initcap
1619  * using wide-character functions, if necessary.  Otherwise we use the
1620  * traditional <ctype.h> functions, which of course will not work as desired
1621  * in multibyte character sets.  Note that in either case we are effectively
1622  * assuming that the database character encoding matches the encoding implied
1623  * by LC_CTYPE.
1624  *
1625  * If the system provides locale_t and associated functions (which are
1626  * standardized by Open Group's XBD), we can support collations that are
1627  * neither default nor C.  The code is written to handle both combinations
1628  * of have-wide-characters and have-locale_t, though it's rather unlikely
1629  * a platform would have the latter without the former.
1630  */
1631 
1632 /*
1633  * collation-aware, wide-character-aware lower function
1634  *
1635  * We pass the number of bytes so we can pass varlena and char*
1636  * to this function.  The result is a palloc'd, null-terminated string.
1637  */
1638 char *
str_tolower(const char * buff,size_t nbytes,Oid collid)1639 str_tolower(const char *buff, size_t nbytes, Oid collid)
1640 {
1641 	char	   *result;
1642 
1643 	if (!buff)
1644 		return NULL;
1645 
1646 	/* C/POSIX collations use this path regardless of database encoding */
1647 	if (lc_ctype_is_c(collid))
1648 	{
1649 		result = asc_tolower(buff, nbytes);
1650 	}
1651 	else
1652 	{
1653 		pg_locale_t mylocale = 0;
1654 
1655 		if (collid != DEFAULT_COLLATION_OID)
1656 		{
1657 			if (!OidIsValid(collid))
1658 			{
1659 				/*
1660 				 * This typically means that the parser could not resolve a
1661 				 * conflict of implicit collations, so report it that way.
1662 				 */
1663 				ereport(ERROR,
1664 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1665 						 errmsg("could not determine which collation to use for %s function",
1666 								"lower()"),
1667 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1668 			}
1669 			mylocale = pg_newlocale_from_collation(collid);
1670 		}
1671 
1672 #ifdef USE_ICU
1673 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1674 		{
1675 			int32_t		len_uchar;
1676 			int32_t		len_conv;
1677 			UChar	   *buff_uchar;
1678 			UChar	   *buff_conv;
1679 
1680 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1681 			len_conv = icu_convert_case(u_strToLower, mylocale,
1682 										&buff_conv, buff_uchar, len_uchar);
1683 			icu_from_uchar(&result, buff_conv, len_conv);
1684 			pfree(buff_uchar);
1685 			pfree(buff_conv);
1686 		}
1687 		else
1688 #endif
1689 		{
1690 			if (pg_database_encoding_max_length() > 1)
1691 			{
1692 				wchar_t    *workspace;
1693 				size_t		curr_char;
1694 				size_t		result_size;
1695 
1696 				/* Overflow paranoia */
1697 				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1698 					ereport(ERROR,
1699 							(errcode(ERRCODE_OUT_OF_MEMORY),
1700 							 errmsg("out of memory")));
1701 
1702 				/* Output workspace cannot have more codes than input bytes */
1703 				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1704 
1705 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1706 
1707 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1708 				{
1709 #ifdef HAVE_LOCALE_T
1710 					if (mylocale)
1711 						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1712 					else
1713 #endif
1714 						workspace[curr_char] = towlower(workspace[curr_char]);
1715 				}
1716 
1717 				/*
1718 				 * Make result large enough; case change might change number
1719 				 * of bytes
1720 				 */
1721 				result_size = curr_char * pg_database_encoding_max_length() + 1;
1722 				result = palloc(result_size);
1723 
1724 				wchar2char(result, workspace, result_size, mylocale);
1725 				pfree(workspace);
1726 			}
1727 			else
1728 			{
1729 				char	   *p;
1730 
1731 				result = pnstrdup(buff, nbytes);
1732 
1733 				/*
1734 				 * Note: we assume that tolower_l() will not be so broken as
1735 				 * to need an isupper_l() guard test.  When using the default
1736 				 * collation, we apply the traditional Postgres behavior that
1737 				 * forces ASCII-style treatment of I/i, but in non-default
1738 				 * collations you get exactly what the collation says.
1739 				 */
1740 				for (p = result; *p; p++)
1741 				{
1742 #ifdef HAVE_LOCALE_T
1743 					if (mylocale)
1744 						*p = tolower_l((unsigned char) *p, mylocale->info.lt);
1745 					else
1746 #endif
1747 						*p = pg_tolower((unsigned char) *p);
1748 				}
1749 			}
1750 		}
1751 	}
1752 
1753 	return result;
1754 }
1755 
1756 /*
1757  * collation-aware, wide-character-aware upper function
1758  *
1759  * We pass the number of bytes so we can pass varlena and char*
1760  * to this function.  The result is a palloc'd, null-terminated string.
1761  */
1762 char *
str_toupper(const char * buff,size_t nbytes,Oid collid)1763 str_toupper(const char *buff, size_t nbytes, Oid collid)
1764 {
1765 	char	   *result;
1766 
1767 	if (!buff)
1768 		return NULL;
1769 
1770 	/* C/POSIX collations use this path regardless of database encoding */
1771 	if (lc_ctype_is_c(collid))
1772 	{
1773 		result = asc_toupper(buff, nbytes);
1774 	}
1775 	else
1776 	{
1777 		pg_locale_t mylocale = 0;
1778 
1779 		if (collid != DEFAULT_COLLATION_OID)
1780 		{
1781 			if (!OidIsValid(collid))
1782 			{
1783 				/*
1784 				 * This typically means that the parser could not resolve a
1785 				 * conflict of implicit collations, so report it that way.
1786 				 */
1787 				ereport(ERROR,
1788 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1789 						 errmsg("could not determine which collation to use for %s function",
1790 								"upper()"),
1791 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1792 			}
1793 			mylocale = pg_newlocale_from_collation(collid);
1794 		}
1795 
1796 #ifdef USE_ICU
1797 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1798 		{
1799 			int32_t		len_uchar,
1800 						len_conv;
1801 			UChar	   *buff_uchar;
1802 			UChar	   *buff_conv;
1803 
1804 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1805 			len_conv = icu_convert_case(u_strToUpper, mylocale,
1806 										&buff_conv, buff_uchar, len_uchar);
1807 			icu_from_uchar(&result, buff_conv, len_conv);
1808 			pfree(buff_uchar);
1809 			pfree(buff_conv);
1810 		}
1811 		else
1812 #endif
1813 		{
1814 			if (pg_database_encoding_max_length() > 1)
1815 			{
1816 				wchar_t    *workspace;
1817 				size_t		curr_char;
1818 				size_t		result_size;
1819 
1820 				/* Overflow paranoia */
1821 				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1822 					ereport(ERROR,
1823 							(errcode(ERRCODE_OUT_OF_MEMORY),
1824 							 errmsg("out of memory")));
1825 
1826 				/* Output workspace cannot have more codes than input bytes */
1827 				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1828 
1829 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1830 
1831 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1832 				{
1833 #ifdef HAVE_LOCALE_T
1834 					if (mylocale)
1835 						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1836 					else
1837 #endif
1838 						workspace[curr_char] = towupper(workspace[curr_char]);
1839 				}
1840 
1841 				/*
1842 				 * Make result large enough; case change might change number
1843 				 * of bytes
1844 				 */
1845 				result_size = curr_char * pg_database_encoding_max_length() + 1;
1846 				result = palloc(result_size);
1847 
1848 				wchar2char(result, workspace, result_size, mylocale);
1849 				pfree(workspace);
1850 			}
1851 			else
1852 			{
1853 				char	   *p;
1854 
1855 				result = pnstrdup(buff, nbytes);
1856 
1857 				/*
1858 				 * Note: we assume that toupper_l() will not be so broken as
1859 				 * to need an islower_l() guard test.  When using the default
1860 				 * collation, we apply the traditional Postgres behavior that
1861 				 * forces ASCII-style treatment of I/i, but in non-default
1862 				 * collations you get exactly what the collation says.
1863 				 */
1864 				for (p = result; *p; p++)
1865 				{
1866 #ifdef HAVE_LOCALE_T
1867 					if (mylocale)
1868 						*p = toupper_l((unsigned char) *p, mylocale->info.lt);
1869 					else
1870 #endif
1871 						*p = pg_toupper((unsigned char) *p);
1872 				}
1873 			}
1874 		}
1875 	}
1876 
1877 	return result;
1878 }
1879 
1880 /*
1881  * collation-aware, wide-character-aware initcap function
1882  *
1883  * We pass the number of bytes so we can pass varlena and char*
1884  * to this function.  The result is a palloc'd, null-terminated string.
1885  */
1886 char *
str_initcap(const char * buff,size_t nbytes,Oid collid)1887 str_initcap(const char *buff, size_t nbytes, Oid collid)
1888 {
1889 	char	   *result;
1890 	int			wasalnum = false;
1891 
1892 	if (!buff)
1893 		return NULL;
1894 
1895 	/* C/POSIX collations use this path regardless of database encoding */
1896 	if (lc_ctype_is_c(collid))
1897 	{
1898 		result = asc_initcap(buff, nbytes);
1899 	}
1900 	else
1901 	{
1902 		pg_locale_t mylocale = 0;
1903 
1904 		if (collid != DEFAULT_COLLATION_OID)
1905 		{
1906 			if (!OidIsValid(collid))
1907 			{
1908 				/*
1909 				 * This typically means that the parser could not resolve a
1910 				 * conflict of implicit collations, so report it that way.
1911 				 */
1912 				ereport(ERROR,
1913 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1914 						 errmsg("could not determine which collation to use for %s function",
1915 								"initcap()"),
1916 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1917 			}
1918 			mylocale = pg_newlocale_from_collation(collid);
1919 		}
1920 
1921 #ifdef USE_ICU
1922 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1923 		{
1924 			int32_t		len_uchar,
1925 						len_conv;
1926 			UChar	   *buff_uchar;
1927 			UChar	   *buff_conv;
1928 
1929 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1930 			len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1931 										&buff_conv, buff_uchar, len_uchar);
1932 			icu_from_uchar(&result, buff_conv, len_conv);
1933 			pfree(buff_uchar);
1934 			pfree(buff_conv);
1935 		}
1936 		else
1937 #endif
1938 		{
1939 			if (pg_database_encoding_max_length() > 1)
1940 			{
1941 				wchar_t    *workspace;
1942 				size_t		curr_char;
1943 				size_t		result_size;
1944 
1945 				/* Overflow paranoia */
1946 				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1947 					ereport(ERROR,
1948 							(errcode(ERRCODE_OUT_OF_MEMORY),
1949 							 errmsg("out of memory")));
1950 
1951 				/* Output workspace cannot have more codes than input bytes */
1952 				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1953 
1954 				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1955 
1956 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1957 				{
1958 #ifdef HAVE_LOCALE_T
1959 					if (mylocale)
1960 					{
1961 						if (wasalnum)
1962 							workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1963 						else
1964 							workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1965 						wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1966 					}
1967 					else
1968 #endif
1969 					{
1970 						if (wasalnum)
1971 							workspace[curr_char] = towlower(workspace[curr_char]);
1972 						else
1973 							workspace[curr_char] = towupper(workspace[curr_char]);
1974 						wasalnum = iswalnum(workspace[curr_char]);
1975 					}
1976 				}
1977 
1978 				/*
1979 				 * Make result large enough; case change might change number
1980 				 * of bytes
1981 				 */
1982 				result_size = curr_char * pg_database_encoding_max_length() + 1;
1983 				result = palloc(result_size);
1984 
1985 				wchar2char(result, workspace, result_size, mylocale);
1986 				pfree(workspace);
1987 			}
1988 			else
1989 			{
1990 				char	   *p;
1991 
1992 				result = pnstrdup(buff, nbytes);
1993 
1994 				/*
1995 				 * Note: we assume that toupper_l()/tolower_l() will not be so
1996 				 * broken as to need guard tests.  When using the default
1997 				 * collation, we apply the traditional Postgres behavior that
1998 				 * forces ASCII-style treatment of I/i, but in non-default
1999 				 * collations you get exactly what the collation says.
2000 				 */
2001 				for (p = result; *p; p++)
2002 				{
2003 #ifdef HAVE_LOCALE_T
2004 					if (mylocale)
2005 					{
2006 						if (wasalnum)
2007 							*p = tolower_l((unsigned char) *p, mylocale->info.lt);
2008 						else
2009 							*p = toupper_l((unsigned char) *p, mylocale->info.lt);
2010 						wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2011 					}
2012 					else
2013 #endif
2014 					{
2015 						if (wasalnum)
2016 							*p = pg_tolower((unsigned char) *p);
2017 						else
2018 							*p = pg_toupper((unsigned char) *p);
2019 						wasalnum = isalnum((unsigned char) *p);
2020 					}
2021 				}
2022 			}
2023 		}
2024 	}
2025 
2026 	return result;
2027 }
2028 
2029 /*
2030  * ASCII-only lower function
2031  *
2032  * We pass the number of bytes so we can pass varlena and char*
2033  * to this function.  The result is a palloc'd, null-terminated string.
2034  */
2035 char *
asc_tolower(const char * buff,size_t nbytes)2036 asc_tolower(const char *buff, size_t nbytes)
2037 {
2038 	char	   *result;
2039 	char	   *p;
2040 
2041 	if (!buff)
2042 		return NULL;
2043 
2044 	result = pnstrdup(buff, nbytes);
2045 
2046 	for (p = result; *p; p++)
2047 		*p = pg_ascii_tolower((unsigned char) *p);
2048 
2049 	return result;
2050 }
2051 
2052 /*
2053  * ASCII-only upper function
2054  *
2055  * We pass the number of bytes so we can pass varlena and char*
2056  * to this function.  The result is a palloc'd, null-terminated string.
2057  */
2058 char *
asc_toupper(const char * buff,size_t nbytes)2059 asc_toupper(const char *buff, size_t nbytes)
2060 {
2061 	char	   *result;
2062 	char	   *p;
2063 
2064 	if (!buff)
2065 		return NULL;
2066 
2067 	result = pnstrdup(buff, nbytes);
2068 
2069 	for (p = result; *p; p++)
2070 		*p = pg_ascii_toupper((unsigned char) *p);
2071 
2072 	return result;
2073 }
2074 
2075 /*
2076  * ASCII-only initcap function
2077  *
2078  * We pass the number of bytes so we can pass varlena and char*
2079  * to this function.  The result is a palloc'd, null-terminated string.
2080  */
2081 char *
asc_initcap(const char * buff,size_t nbytes)2082 asc_initcap(const char *buff, size_t nbytes)
2083 {
2084 	char	   *result;
2085 	char	   *p;
2086 	int			wasalnum = false;
2087 
2088 	if (!buff)
2089 		return NULL;
2090 
2091 	result = pnstrdup(buff, nbytes);
2092 
2093 	for (p = result; *p; p++)
2094 	{
2095 		char		c;
2096 
2097 		if (wasalnum)
2098 			*p = c = pg_ascii_tolower((unsigned char) *p);
2099 		else
2100 			*p = c = pg_ascii_toupper((unsigned char) *p);
2101 		/* we don't trust isalnum() here */
2102 		wasalnum = ((c >= 'A' && c <= 'Z') ||
2103 					(c >= 'a' && c <= 'z') ||
2104 					(c >= '0' && c <= '9'));
2105 	}
2106 
2107 	return result;
2108 }
2109 
2110 /* convenience routines for when the input is null-terminated */
2111 
2112 static char *
str_tolower_z(const char * buff,Oid collid)2113 str_tolower_z(const char *buff, Oid collid)
2114 {
2115 	return str_tolower(buff, strlen(buff), collid);
2116 }
2117 
2118 static char *
str_toupper_z(const char * buff,Oid collid)2119 str_toupper_z(const char *buff, Oid collid)
2120 {
2121 	return str_toupper(buff, strlen(buff), collid);
2122 }
2123 
2124 static char *
str_initcap_z(const char * buff,Oid collid)2125 str_initcap_z(const char *buff, Oid collid)
2126 {
2127 	return str_initcap(buff, strlen(buff), collid);
2128 }
2129 
2130 static char *
asc_tolower_z(const char * buff)2131 asc_tolower_z(const char *buff)
2132 {
2133 	return asc_tolower(buff, strlen(buff));
2134 }
2135 
2136 static char *
asc_toupper_z(const char * buff)2137 asc_toupper_z(const char *buff)
2138 {
2139 	return asc_toupper(buff, strlen(buff));
2140 }
2141 
2142 /* asc_initcap_z is not currently needed */
2143 
2144 
2145 /* ----------
2146  * Skip TM / th in FROM_CHAR
2147  *
2148  * If S_THth is on, skip two chars, assuming there are two available
2149  * ----------
2150  */
2151 #define SKIP_THth(ptr, _suf) \
2152 	do { \
2153 		if (S_THth(_suf)) \
2154 		{ \
2155 			if (*(ptr)) (ptr) += pg_mblen(ptr); \
2156 			if (*(ptr)) (ptr) += pg_mblen(ptr); \
2157 		} \
2158 	} while (0)
2159 
2160 
2161 #ifdef DEBUG_TO_FROM_CHAR
2162 /* -----------
2163  * DEBUG: Call for debug and for index checking; (Show ASCII char
2164  * and defined keyword for each used position
2165  * ----------
2166  */
2167 static void
dump_index(const KeyWord * k,const int * index)2168 dump_index(const KeyWord *k, const int *index)
2169 {
2170 	int			i,
2171 				count = 0,
2172 				free_i = 0;
2173 
2174 	elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2175 
2176 	for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2177 	{
2178 		if (index[i] != -1)
2179 		{
2180 			elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2181 			count++;
2182 		}
2183 		else
2184 		{
2185 			free_i++;
2186 			elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2187 		}
2188 	}
2189 	elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2190 		 count, free_i);
2191 }
2192 #endif							/* DEBUG */
2193 
2194 /* ----------
2195  * Return true if next format picture is not digit value
2196  * ----------
2197  */
2198 static bool
is_next_separator(FormatNode * n)2199 is_next_separator(FormatNode *n)
2200 {
2201 	if (n->type == NODE_TYPE_END)
2202 		return false;
2203 
2204 	if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2205 		return true;
2206 
2207 	/*
2208 	 * Next node
2209 	 */
2210 	n++;
2211 
2212 	/* end of format string is treated like a non-digit separator */
2213 	if (n->type == NODE_TYPE_END)
2214 		return true;
2215 
2216 	if (n->type == NODE_TYPE_ACTION)
2217 	{
2218 		if (n->key->is_digit)
2219 			return false;
2220 
2221 		return true;
2222 	}
2223 	else if (n->character[1] == '\0' &&
2224 			 isdigit((unsigned char) n->character[0]))
2225 		return false;
2226 
2227 	return true;				/* some non-digit input (separator) */
2228 }
2229 
2230 
2231 static int
adjust_partial_year_to_2020(int year)2232 adjust_partial_year_to_2020(int year)
2233 {
2234 	/*
2235 	 * Adjust all dates toward 2020; this is effectively what happens when we
2236 	 * assume '70' is 1970 and '69' is 2069.
2237 	 */
2238 	/* Force 0-69 into the 2000's */
2239 	if (year < 70)
2240 		return year + 2000;
2241 	/* Force 70-99 into the 1900's */
2242 	else if (year < 100)
2243 		return year + 1900;
2244 	/* Force 100-519 into the 2000's */
2245 	else if (year < 520)
2246 		return year + 2000;
2247 	/* Force 520-999 into the 1000's */
2248 	else if (year < 1000)
2249 		return year + 1000;
2250 	else
2251 		return year;
2252 }
2253 
2254 
2255 static int
strspace_len(const char * str)2256 strspace_len(const char *str)
2257 {
2258 	int			len = 0;
2259 
2260 	while (*str && isspace((unsigned char) *str))
2261 	{
2262 		str++;
2263 		len++;
2264 	}
2265 	return len;
2266 }
2267 
2268 /*
2269  * Set the date mode of a from-char conversion.
2270  *
2271  * Puke if the date mode has already been set, and the caller attempts to set
2272  * it to a conflicting mode.
2273  *
2274  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2275  */
2276 static void
from_char_set_mode(TmFromChar * tmfc,const FromCharDateMode mode,bool * have_error)2277 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2278 {
2279 	if (mode != FROM_CHAR_DATE_NONE)
2280 	{
2281 		if (tmfc->mode == FROM_CHAR_DATE_NONE)
2282 			tmfc->mode = mode;
2283 		else if (tmfc->mode != mode)
2284 			RETURN_ERROR(ereport(ERROR,
2285 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2286 								  errmsg("invalid combination of date conventions"),
2287 								  errhint("Do not mix Gregorian and ISO week date "
2288 										  "conventions in a formatting template."))));
2289 	}
2290 
2291 on_error:
2292 	return;
2293 }
2294 
2295 /*
2296  * Set the integer pointed to by 'dest' to the given value.
2297  *
2298  * Puke if the destination integer has previously been set to some other
2299  * non-zero value.
2300  *
2301  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2302  */
2303 static void
from_char_set_int(int * dest,const int value,const FormatNode * node,bool * have_error)2304 from_char_set_int(int *dest, const int value, const FormatNode *node,
2305 				  bool *have_error)
2306 {
2307 	if (*dest != 0 && *dest != value)
2308 		RETURN_ERROR(ereport(ERROR,
2309 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2310 							  errmsg("conflicting values for \"%s\" field in "
2311 									 "formatting string",
2312 									 node->key->name),
2313 							  errdetail("This value contradicts a previous setting "
2314 										"for the same field type."))));
2315 	*dest = value;
2316 
2317 on_error:
2318 	return;
2319 }
2320 
2321 /*
2322  * Read a single integer from the source string, into the int pointed to by
2323  * 'dest'. If 'dest' is NULL, the result is discarded.
2324  *
2325  * In fixed-width mode (the node does not have the FM suffix), consume at most
2326  * 'len' characters.  However, any leading whitespace isn't counted in 'len'.
2327  *
2328  * We use strtol() to recover the integer value from the source string, in
2329  * accordance with the given FormatNode.
2330  *
2331  * If the conversion completes successfully, src will have been advanced to
2332  * point at the character immediately following the last character used in the
2333  * conversion.
2334  *
2335  * Return the number of characters consumed.
2336  *
2337  * Note that from_char_parse_int() provides a more convenient wrapper where
2338  * the length of the field is the same as the length of the format keyword (as
2339  * with DD and MI).
2340  *
2341  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2342  * and -1 is returned.
2343  */
2344 static int
from_char_parse_int_len(int * dest,const char ** src,const int len,FormatNode * node,bool * have_error)2345 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2346 						bool *have_error)
2347 {
2348 	long		result;
2349 	char		copy[DCH_MAX_ITEM_SIZ + 1];
2350 	const char *init = *src;
2351 	int			used;
2352 
2353 	/*
2354 	 * Skip any whitespace before parsing the integer.
2355 	 */
2356 	*src += strspace_len(*src);
2357 
2358 	Assert(len <= DCH_MAX_ITEM_SIZ);
2359 	used = (int) strlcpy(copy, *src, len + 1);
2360 
2361 	if (S_FM(node->suffix) || is_next_separator(node))
2362 	{
2363 		/*
2364 		 * This node is in Fill Mode, or the next node is known to be a
2365 		 * non-digit value, so we just slurp as many characters as we can get.
2366 		 */
2367 		char	   *endptr;
2368 
2369 		errno = 0;
2370 		result = strtol(init, &endptr, 10);
2371 		*src = endptr;
2372 	}
2373 	else
2374 	{
2375 		/*
2376 		 * We need to pull exactly the number of characters given in 'len' out
2377 		 * of the string, and convert those.
2378 		 */
2379 		char	   *last;
2380 
2381 		if (used < len)
2382 			RETURN_ERROR(ereport(ERROR,
2383 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2384 								  errmsg("source string too short for \"%s\" "
2385 										 "formatting field",
2386 										 node->key->name),
2387 								  errdetail("Field requires %d characters, "
2388 											"but only %d remain.",
2389 											len, used),
2390 								  errhint("If your source string is not fixed-width, "
2391 										  "try using the \"FM\" modifier."))));
2392 
2393 		errno = 0;
2394 		result = strtol(copy, &last, 10);
2395 		used = last - copy;
2396 
2397 		if (used > 0 && used < len)
2398 			RETURN_ERROR(ereport(ERROR,
2399 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2400 								  errmsg("invalid value \"%s\" for \"%s\"",
2401 										 copy, node->key->name),
2402 								  errdetail("Field requires %d characters, "
2403 											"but only %d could be parsed.",
2404 											len, used),
2405 								  errhint("If your source string is not fixed-width, "
2406 										  "try using the \"FM\" modifier."))));
2407 
2408 		*src += used;
2409 	}
2410 
2411 	if (*src == init)
2412 		RETURN_ERROR(ereport(ERROR,
2413 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2414 							  errmsg("invalid value \"%s\" for \"%s\"",
2415 									 copy, node->key->name),
2416 							  errdetail("Value must be an integer."))));
2417 
2418 	if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2419 		RETURN_ERROR(ereport(ERROR,
2420 							 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2421 							  errmsg("value for \"%s\" in source string is out of range",
2422 									 node->key->name),
2423 							  errdetail("Value must be in the range %d to %d.",
2424 										INT_MIN, INT_MAX))));
2425 
2426 	if (dest != NULL)
2427 	{
2428 		from_char_set_int(dest, (int) result, node, have_error);
2429 		CHECK_ERROR;
2430 	}
2431 
2432 	return *src - init;
2433 
2434 on_error:
2435 	return -1;
2436 }
2437 
2438 /*
2439  * Call from_char_parse_int_len(), using the length of the format keyword as
2440  * the expected length of the field.
2441  *
2442  * Don't call this function if the field differs in length from the format
2443  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2444  * In such cases, call from_char_parse_int_len() instead to specify the
2445  * required length explicitly.
2446  */
2447 static int
from_char_parse_int(int * dest,const char ** src,FormatNode * node,bool * have_error)2448 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2449 {
2450 	return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2451 }
2452 
2453 /*
2454  * Sequentially search null-terminated "array" for a case-insensitive match
2455  * to the initial character(s) of "name".
2456  *
2457  * Returns array index of match, or -1 for no match.
2458  *
2459  * *len is set to the length of the match, or 0 for no match.
2460  *
2461  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2462  * suitable for comparisons to ASCII strings.
2463  */
2464 static int
seq_search_ascii(const char * name,const char * const * array,int * len)2465 seq_search_ascii(const char *name, const char *const *array, int *len)
2466 {
2467 	unsigned char firstc;
2468 	const char *const *a;
2469 
2470 	*len = 0;
2471 
2472 	/* empty string can't match anything */
2473 	if (!*name)
2474 		return -1;
2475 
2476 	/* we handle first char specially to gain some speed */
2477 	firstc = pg_ascii_tolower((unsigned char) *name);
2478 
2479 	for (a = array; *a != NULL; a++)
2480 	{
2481 		const char *p;
2482 		const char *n;
2483 
2484 		/* compare first chars */
2485 		if (pg_ascii_tolower((unsigned char) **a) != firstc)
2486 			continue;
2487 
2488 		/* compare rest of string */
2489 		for (p = *a + 1, n = name + 1;; p++, n++)
2490 		{
2491 			/* return success if we matched whole array entry */
2492 			if (*p == '\0')
2493 			{
2494 				*len = n - name;
2495 				return a - array;
2496 			}
2497 			/* else, must have another character in "name" ... */
2498 			if (*n == '\0')
2499 				break;
2500 			/* ... and it must match */
2501 			if (pg_ascii_tolower((unsigned char) *p) !=
2502 				pg_ascii_tolower((unsigned char) *n))
2503 				break;
2504 		}
2505 	}
2506 
2507 	return -1;
2508 }
2509 
2510 /*
2511  * Sequentially search an array of possibly non-English words for
2512  * a case-insensitive match to the initial character(s) of "name".
2513  *
2514  * This has the same API as seq_search_ascii(), but we use a more general
2515  * case-folding transformation to achieve case-insensitivity.  Case folding
2516  * is done per the rules of the collation identified by "collid".
2517  *
2518  * The array is treated as const, but we don't declare it that way because
2519  * the arrays exported by pg_locale.c aren't const.
2520  */
2521 static int
seq_search_localized(const char * name,char ** array,int * len,Oid collid)2522 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2523 {
2524 	char	  **a;
2525 	char	   *upper_name;
2526 	char	   *lower_name;
2527 
2528 	*len = 0;
2529 
2530 	/* empty string can't match anything */
2531 	if (!*name)
2532 		return -1;
2533 
2534 	/*
2535 	 * The case-folding processing done below is fairly expensive, so before
2536 	 * doing that, make a quick pass to see if there is an exact match.
2537 	 */
2538 	for (a = array; *a != NULL; a++)
2539 	{
2540 		int			element_len = strlen(*a);
2541 
2542 		if (strncmp(name, *a, element_len) == 0)
2543 		{
2544 			*len = element_len;
2545 			return a - array;
2546 		}
2547 	}
2548 
2549 	/*
2550 	 * Fold to upper case, then to lower case, so that we can match reliably
2551 	 * even in languages in which case conversions are not injective.
2552 	 */
2553 	upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2554 	lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2555 	pfree(upper_name);
2556 
2557 	for (a = array; *a != NULL; a++)
2558 	{
2559 		char	   *upper_element;
2560 		char	   *lower_element;
2561 		int			element_len;
2562 
2563 		/* Likewise upper/lower-case array element */
2564 		upper_element = str_toupper(*a, strlen(*a), collid);
2565 		lower_element = str_tolower(upper_element, strlen(upper_element),
2566 									collid);
2567 		pfree(upper_element);
2568 		element_len = strlen(lower_element);
2569 
2570 		/* Match? */
2571 		if (strncmp(lower_name, lower_element, element_len) == 0)
2572 		{
2573 			*len = element_len;
2574 			pfree(lower_element);
2575 			pfree(lower_name);
2576 			return a - array;
2577 		}
2578 		pfree(lower_element);
2579 	}
2580 
2581 	pfree(lower_name);
2582 	return -1;
2583 }
2584 
2585 /*
2586  * Perform a sequential search in 'array' (or 'localized_array', if that's
2587  * not NULL) for an entry matching the first character(s) of the 'src'
2588  * string case-insensitively.
2589  *
2590  * The 'array' is presumed to be English words (all-ASCII), but
2591  * if 'localized_array' is supplied, that might be non-English
2592  * so we need a more expensive case-folding transformation
2593  * (which will follow the rules of the collation 'collid').
2594  *
2595  * If a match is found, copy the array index of the match into the integer
2596  * pointed to by 'dest', advance 'src' to the end of the part of the string
2597  * which matched, and return the number of characters consumed.
2598  *
2599  * If the string doesn't match, throw an error if 'have_error' is NULL,
2600  * otherwise set '*have_error' and return -1.
2601  *
2602  * 'node' is used only for error reports: node->key->name identifies the
2603  * field type we were searching for.
2604  */
2605 static int
from_char_seq_search(int * dest,const char ** src,const char * const * array,char ** localized_array,Oid collid,FormatNode * node,bool * have_error)2606 from_char_seq_search(int *dest, const char **src, const char *const *array,
2607 					 char **localized_array, Oid collid,
2608 					 FormatNode *node, bool *have_error)
2609 {
2610 	int			len;
2611 
2612 	if (localized_array == NULL)
2613 		*dest = seq_search_ascii(*src, array, &len);
2614 	else
2615 		*dest = seq_search_localized(*src, localized_array, &len, collid);
2616 
2617 	if (len <= 0)
2618 	{
2619 		/*
2620 		 * In the error report, truncate the string at the next whitespace (if
2621 		 * any) to avoid including irrelevant data.
2622 		 */
2623 		char	   *copy = pstrdup(*src);
2624 		char	   *c;
2625 
2626 		for (c = copy; *c; c++)
2627 		{
2628 			if (scanner_isspace(*c))
2629 			{
2630 				*c = '\0';
2631 				break;
2632 			}
2633 		}
2634 
2635 		RETURN_ERROR(ereport(ERROR,
2636 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2637 							  errmsg("invalid value \"%s\" for \"%s\"",
2638 									 copy, node->key->name),
2639 							  errdetail("The given value did not match any of "
2640 										"the allowed values for this field."))));
2641 	}
2642 	*src += len;
2643 	return len;
2644 
2645 on_error:
2646 	return -1;
2647 }
2648 
2649 /* ----------
2650  * Process a TmToChar struct as denoted by a list of FormatNodes.
2651  * The formatted data is written to the string pointed to by 'out'.
2652  * ----------
2653  */
2654 static void
DCH_to_char(FormatNode * node,bool is_interval,TmToChar * in,char * out,Oid collid)2655 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2656 {
2657 	FormatNode *n;
2658 	char	   *s;
2659 	struct pg_tm *tm = &in->tm;
2660 	int			i;
2661 
2662 	/* cache localized days and months */
2663 	cache_locale_time();
2664 
2665 	s = out;
2666 	for (n = node; n->type != NODE_TYPE_END; n++)
2667 	{
2668 		if (n->type != NODE_TYPE_ACTION)
2669 		{
2670 			strcpy(s, n->character);
2671 			s += strlen(s);
2672 			continue;
2673 		}
2674 
2675 		switch (n->key->id)
2676 		{
2677 			case DCH_A_M:
2678 			case DCH_P_M:
2679 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2680 					   ? P_M_STR : A_M_STR);
2681 				s += strlen(s);
2682 				break;
2683 			case DCH_AM:
2684 			case DCH_PM:
2685 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2686 					   ? PM_STR : AM_STR);
2687 				s += strlen(s);
2688 				break;
2689 			case DCH_a_m:
2690 			case DCH_p_m:
2691 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2692 					   ? p_m_STR : a_m_STR);
2693 				s += strlen(s);
2694 				break;
2695 			case DCH_am:
2696 			case DCH_pm:
2697 				strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2698 					   ? pm_STR : am_STR);
2699 				s += strlen(s);
2700 				break;
2701 			case DCH_HH:
2702 			case DCH_HH12:
2703 
2704 				/*
2705 				 * display time as shown on a 12-hour clock, even for
2706 				 * intervals
2707 				 */
2708 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2709 						tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2710 						tm->tm_hour % (HOURS_PER_DAY / 2));
2711 				if (S_THth(n->suffix))
2712 					str_numth(s, s, S_TH_TYPE(n->suffix));
2713 				s += strlen(s);
2714 				break;
2715 			case DCH_HH24:
2716 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2717 						tm->tm_hour);
2718 				if (S_THth(n->suffix))
2719 					str_numth(s, s, S_TH_TYPE(n->suffix));
2720 				s += strlen(s);
2721 				break;
2722 			case DCH_MI:
2723 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2724 						tm->tm_min);
2725 				if (S_THth(n->suffix))
2726 					str_numth(s, s, S_TH_TYPE(n->suffix));
2727 				s += strlen(s);
2728 				break;
2729 			case DCH_SS:
2730 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2731 						tm->tm_sec);
2732 				if (S_THth(n->suffix))
2733 					str_numth(s, s, S_TH_TYPE(n->suffix));
2734 				s += strlen(s);
2735 				break;
2736 
2737 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2738 				sprintf(s, frac_fmt, (int) (frac_val)); \
2739 				if (S_THth(n->suffix)) \
2740 					str_numth(s, s, S_TH_TYPE(n->suffix)); \
2741 				s += strlen(s)
2742 
2743 			case DCH_FF1:		/* tenth of second */
2744 				DCH_to_char_fsec("%01d", in->fsec / 100000);
2745 				break;
2746 			case DCH_FF2:		/* hundredth of second */
2747 				DCH_to_char_fsec("%02d", in->fsec / 10000);
2748 				break;
2749 			case DCH_FF3:
2750 			case DCH_MS:		/* millisecond */
2751 				DCH_to_char_fsec("%03d", in->fsec / 1000);
2752 				break;
2753 			case DCH_FF4:		/* tenth of a millisecond */
2754 				DCH_to_char_fsec("%04d", in->fsec / 100);
2755 				break;
2756 			case DCH_FF5:		/* hundredth of a millisecond */
2757 				DCH_to_char_fsec("%05d", in->fsec / 10);
2758 				break;
2759 			case DCH_FF6:
2760 			case DCH_US:		/* microsecond */
2761 				DCH_to_char_fsec("%06d", in->fsec);
2762 				break;
2763 #undef DCH_to_char_fsec
2764 			case DCH_SSSS:
2765 				sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2766 						tm->tm_min * SECS_PER_MINUTE +
2767 						tm->tm_sec);
2768 				if (S_THth(n->suffix))
2769 					str_numth(s, s, S_TH_TYPE(n->suffix));
2770 				s += strlen(s);
2771 				break;
2772 			case DCH_tz:
2773 				INVALID_FOR_INTERVAL;
2774 				if (tmtcTzn(in))
2775 				{
2776 					/* We assume here that timezone names aren't localized */
2777 					char	   *p = asc_tolower_z(tmtcTzn(in));
2778 
2779 					strcpy(s, p);
2780 					pfree(p);
2781 					s += strlen(s);
2782 				}
2783 				break;
2784 			case DCH_TZ:
2785 				INVALID_FOR_INTERVAL;
2786 				if (tmtcTzn(in))
2787 				{
2788 					strcpy(s, tmtcTzn(in));
2789 					s += strlen(s);
2790 				}
2791 				break;
2792 			case DCH_TZH:
2793 				INVALID_FOR_INTERVAL;
2794 				sprintf(s, "%c%02d",
2795 						(tm->tm_gmtoff >= 0) ? '+' : '-',
2796 						abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2797 				s += strlen(s);
2798 				break;
2799 			case DCH_TZM:
2800 				INVALID_FOR_INTERVAL;
2801 				sprintf(s, "%02d",
2802 						(abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2803 				s += strlen(s);
2804 				break;
2805 			case DCH_OF:
2806 				INVALID_FOR_INTERVAL;
2807 				sprintf(s, "%c%0*d",
2808 						(tm->tm_gmtoff >= 0) ? '+' : '-',
2809 						S_FM(n->suffix) ? 0 : 2,
2810 						abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2811 				s += strlen(s);
2812 				if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2813 				{
2814 					sprintf(s, ":%02d",
2815 							(abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2816 					s += strlen(s);
2817 				}
2818 				break;
2819 			case DCH_A_D:
2820 			case DCH_B_C:
2821 				INVALID_FOR_INTERVAL;
2822 				strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2823 				s += strlen(s);
2824 				break;
2825 			case DCH_AD:
2826 			case DCH_BC:
2827 				INVALID_FOR_INTERVAL;
2828 				strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2829 				s += strlen(s);
2830 				break;
2831 			case DCH_a_d:
2832 			case DCH_b_c:
2833 				INVALID_FOR_INTERVAL;
2834 				strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2835 				s += strlen(s);
2836 				break;
2837 			case DCH_ad:
2838 			case DCH_bc:
2839 				INVALID_FOR_INTERVAL;
2840 				strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2841 				s += strlen(s);
2842 				break;
2843 			case DCH_MONTH:
2844 				INVALID_FOR_INTERVAL;
2845 				if (!tm->tm_mon)
2846 					break;
2847 				if (S_TM(n->suffix))
2848 				{
2849 					char	   *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2850 
2851 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2852 						strcpy(s, str);
2853 					else
2854 						ereport(ERROR,
2855 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2856 								 errmsg("localized string format value too long")));
2857 				}
2858 				else
2859 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2860 							asc_toupper_z(months_full[tm->tm_mon - 1]));
2861 				s += strlen(s);
2862 				break;
2863 			case DCH_Month:
2864 				INVALID_FOR_INTERVAL;
2865 				if (!tm->tm_mon)
2866 					break;
2867 				if (S_TM(n->suffix))
2868 				{
2869 					char	   *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2870 
2871 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2872 						strcpy(s, str);
2873 					else
2874 						ereport(ERROR,
2875 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2876 								 errmsg("localized string format value too long")));
2877 				}
2878 				else
2879 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2880 							months_full[tm->tm_mon - 1]);
2881 				s += strlen(s);
2882 				break;
2883 			case DCH_month:
2884 				INVALID_FOR_INTERVAL;
2885 				if (!tm->tm_mon)
2886 					break;
2887 				if (S_TM(n->suffix))
2888 				{
2889 					char	   *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2890 
2891 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2892 						strcpy(s, str);
2893 					else
2894 						ereport(ERROR,
2895 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2896 								 errmsg("localized string format value too long")));
2897 				}
2898 				else
2899 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2900 							asc_tolower_z(months_full[tm->tm_mon - 1]));
2901 				s += strlen(s);
2902 				break;
2903 			case DCH_MON:
2904 				INVALID_FOR_INTERVAL;
2905 				if (!tm->tm_mon)
2906 					break;
2907 				if (S_TM(n->suffix))
2908 				{
2909 					char	   *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2910 
2911 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2912 						strcpy(s, str);
2913 					else
2914 						ereport(ERROR,
2915 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2916 								 errmsg("localized string format value too long")));
2917 				}
2918 				else
2919 					strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2920 				s += strlen(s);
2921 				break;
2922 			case DCH_Mon:
2923 				INVALID_FOR_INTERVAL;
2924 				if (!tm->tm_mon)
2925 					break;
2926 				if (S_TM(n->suffix))
2927 				{
2928 					char	   *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2929 
2930 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2931 						strcpy(s, str);
2932 					else
2933 						ereport(ERROR,
2934 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2935 								 errmsg("localized string format value too long")));
2936 				}
2937 				else
2938 					strcpy(s, months[tm->tm_mon - 1]);
2939 				s += strlen(s);
2940 				break;
2941 			case DCH_mon:
2942 				INVALID_FOR_INTERVAL;
2943 				if (!tm->tm_mon)
2944 					break;
2945 				if (S_TM(n->suffix))
2946 				{
2947 					char	   *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2948 
2949 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2950 						strcpy(s, str);
2951 					else
2952 						ereport(ERROR,
2953 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2954 								 errmsg("localized string format value too long")));
2955 				}
2956 				else
2957 					strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2958 				s += strlen(s);
2959 				break;
2960 			case DCH_MM:
2961 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2962 						tm->tm_mon);
2963 				if (S_THth(n->suffix))
2964 					str_numth(s, s, S_TH_TYPE(n->suffix));
2965 				s += strlen(s);
2966 				break;
2967 			case DCH_DAY:
2968 				INVALID_FOR_INTERVAL;
2969 				if (S_TM(n->suffix))
2970 				{
2971 					char	   *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2972 
2973 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2974 						strcpy(s, str);
2975 					else
2976 						ereport(ERROR,
2977 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2978 								 errmsg("localized string format value too long")));
2979 				}
2980 				else
2981 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2982 							asc_toupper_z(days[tm->tm_wday]));
2983 				s += strlen(s);
2984 				break;
2985 			case DCH_Day:
2986 				INVALID_FOR_INTERVAL;
2987 				if (S_TM(n->suffix))
2988 				{
2989 					char	   *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2990 
2991 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2992 						strcpy(s, str);
2993 					else
2994 						ereport(ERROR,
2995 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2996 								 errmsg("localized string format value too long")));
2997 				}
2998 				else
2999 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3000 							days[tm->tm_wday]);
3001 				s += strlen(s);
3002 				break;
3003 			case DCH_day:
3004 				INVALID_FOR_INTERVAL;
3005 				if (S_TM(n->suffix))
3006 				{
3007 					char	   *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3008 
3009 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3010 						strcpy(s, str);
3011 					else
3012 						ereport(ERROR,
3013 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3014 								 errmsg("localized string format value too long")));
3015 				}
3016 				else
3017 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3018 							asc_tolower_z(days[tm->tm_wday]));
3019 				s += strlen(s);
3020 				break;
3021 			case DCH_DY:
3022 				INVALID_FOR_INTERVAL;
3023 				if (S_TM(n->suffix))
3024 				{
3025 					char	   *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3026 
3027 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3028 						strcpy(s, str);
3029 					else
3030 						ereport(ERROR,
3031 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3032 								 errmsg("localized string format value too long")));
3033 				}
3034 				else
3035 					strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3036 				s += strlen(s);
3037 				break;
3038 			case DCH_Dy:
3039 				INVALID_FOR_INTERVAL;
3040 				if (S_TM(n->suffix))
3041 				{
3042 					char	   *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3043 
3044 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3045 						strcpy(s, str);
3046 					else
3047 						ereport(ERROR,
3048 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3049 								 errmsg("localized string format value too long")));
3050 				}
3051 				else
3052 					strcpy(s, days_short[tm->tm_wday]);
3053 				s += strlen(s);
3054 				break;
3055 			case DCH_dy:
3056 				INVALID_FOR_INTERVAL;
3057 				if (S_TM(n->suffix))
3058 				{
3059 					char	   *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3060 
3061 					if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3062 						strcpy(s, str);
3063 					else
3064 						ereport(ERROR,
3065 								(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3066 								 errmsg("localized string format value too long")));
3067 				}
3068 				else
3069 					strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3070 				s += strlen(s);
3071 				break;
3072 			case DCH_DDD:
3073 			case DCH_IDDD:
3074 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3075 						(n->key->id == DCH_DDD) ?
3076 						tm->tm_yday :
3077 						date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3078 				if (S_THth(n->suffix))
3079 					str_numth(s, s, S_TH_TYPE(n->suffix));
3080 				s += strlen(s);
3081 				break;
3082 			case DCH_DD:
3083 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3084 				if (S_THth(n->suffix))
3085 					str_numth(s, s, S_TH_TYPE(n->suffix));
3086 				s += strlen(s);
3087 				break;
3088 			case DCH_D:
3089 				INVALID_FOR_INTERVAL;
3090 				sprintf(s, "%d", tm->tm_wday + 1);
3091 				if (S_THth(n->suffix))
3092 					str_numth(s, s, S_TH_TYPE(n->suffix));
3093 				s += strlen(s);
3094 				break;
3095 			case DCH_ID:
3096 				INVALID_FOR_INTERVAL;
3097 				sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3098 				if (S_THth(n->suffix))
3099 					str_numth(s, s, S_TH_TYPE(n->suffix));
3100 				s += strlen(s);
3101 				break;
3102 			case DCH_WW:
3103 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3104 						(tm->tm_yday - 1) / 7 + 1);
3105 				if (S_THth(n->suffix))
3106 					str_numth(s, s, S_TH_TYPE(n->suffix));
3107 				s += strlen(s);
3108 				break;
3109 			case DCH_IW:
3110 				sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3111 						date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3112 				if (S_THth(n->suffix))
3113 					str_numth(s, s, S_TH_TYPE(n->suffix));
3114 				s += strlen(s);
3115 				break;
3116 			case DCH_Q:
3117 				if (!tm->tm_mon)
3118 					break;
3119 				sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3120 				if (S_THth(n->suffix))
3121 					str_numth(s, s, S_TH_TYPE(n->suffix));
3122 				s += strlen(s);
3123 				break;
3124 			case DCH_CC:
3125 				if (is_interval)	/* straight calculation */
3126 					i = tm->tm_year / 100;
3127 				else
3128 				{
3129 					if (tm->tm_year > 0)
3130 						/* Century 20 == 1901 - 2000 */
3131 						i = (tm->tm_year - 1) / 100 + 1;
3132 					else
3133 						/* Century 6BC == 600BC - 501BC */
3134 						i = tm->tm_year / 100 - 1;
3135 				}
3136 				if (i <= 99 && i >= -99)
3137 					sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3138 				else
3139 					sprintf(s, "%d", i);
3140 				if (S_THth(n->suffix))
3141 					str_numth(s, s, S_TH_TYPE(n->suffix));
3142 				s += strlen(s);
3143 				break;
3144 			case DCH_Y_YYY:
3145 				i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3146 				sprintf(s, "%d,%03d", i,
3147 						ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3148 				if (S_THth(n->suffix))
3149 					str_numth(s, s, S_TH_TYPE(n->suffix));
3150 				s += strlen(s);
3151 				break;
3152 			case DCH_YYYY:
3153 			case DCH_IYYY:
3154 				sprintf(s, "%0*d",
3155 						S_FM(n->suffix) ? 0 :
3156 						(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3157 						(n->key->id == DCH_YYYY ?
3158 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3159 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3160 												  tm->tm_mon,
3161 												  tm->tm_mday),
3162 									 is_interval)));
3163 				if (S_THth(n->suffix))
3164 					str_numth(s, s, S_TH_TYPE(n->suffix));
3165 				s += strlen(s);
3166 				break;
3167 			case DCH_YYY:
3168 			case DCH_IYY:
3169 				sprintf(s, "%0*d",
3170 						S_FM(n->suffix) ? 0 :
3171 						(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3172 						(n->key->id == DCH_YYY ?
3173 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3174 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3175 												  tm->tm_mon,
3176 												  tm->tm_mday),
3177 									 is_interval)) % 1000);
3178 				if (S_THth(n->suffix))
3179 					str_numth(s, s, S_TH_TYPE(n->suffix));
3180 				s += strlen(s);
3181 				break;
3182 			case DCH_YY:
3183 			case DCH_IY:
3184 				sprintf(s, "%0*d",
3185 						S_FM(n->suffix) ? 0 :
3186 						(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3187 						(n->key->id == DCH_YY ?
3188 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3189 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3190 												  tm->tm_mon,
3191 												  tm->tm_mday),
3192 									 is_interval)) % 100);
3193 				if (S_THth(n->suffix))
3194 					str_numth(s, s, S_TH_TYPE(n->suffix));
3195 				s += strlen(s);
3196 				break;
3197 			case DCH_Y:
3198 			case DCH_I:
3199 				sprintf(s, "%1d",
3200 						(n->key->id == DCH_Y ?
3201 						 ADJUST_YEAR(tm->tm_year, is_interval) :
3202 						 ADJUST_YEAR(date2isoyear(tm->tm_year,
3203 												  tm->tm_mon,
3204 												  tm->tm_mday),
3205 									 is_interval)) % 10);
3206 				if (S_THth(n->suffix))
3207 					str_numth(s, s, S_TH_TYPE(n->suffix));
3208 				s += strlen(s);
3209 				break;
3210 			case DCH_RM:
3211 				/* FALLTHROUGH */
3212 			case DCH_rm:
3213 
3214 				/*
3215 				 * For intervals, values like '12 month' will be reduced to 0
3216 				 * month and some years.  These should be processed.
3217 				 */
3218 				if (!tm->tm_mon && !tm->tm_year)
3219 					break;
3220 				else
3221 				{
3222 					int			mon = 0;
3223 					const char *const *months;
3224 
3225 					if (n->key->id == DCH_RM)
3226 						months = rm_months_upper;
3227 					else
3228 						months = rm_months_lower;
3229 
3230 					/*
3231 					 * Compute the position in the roman-numeral array.  Note
3232 					 * that the contents of the array are reversed, December
3233 					 * being first and January last.
3234 					 */
3235 					if (tm->tm_mon == 0)
3236 					{
3237 						/*
3238 						 * This case is special, and tracks the case of full
3239 						 * interval years.
3240 						 */
3241 						mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3242 					}
3243 					else if (tm->tm_mon < 0)
3244 					{
3245 						/*
3246 						 * Negative case.  In this case, the calculation is
3247 						 * reversed, where -1 means December, -2 November,
3248 						 * etc.
3249 						 */
3250 						mon = -1 * (tm->tm_mon + 1);
3251 					}
3252 					else
3253 					{
3254 						/*
3255 						 * Common case, with a strictly positive value.  The
3256 						 * position in the array matches with the value of
3257 						 * tm_mon.
3258 						 */
3259 						mon = MONTHS_PER_YEAR - tm->tm_mon;
3260 					}
3261 
3262 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3263 							months[mon]);
3264 					s += strlen(s);
3265 				}
3266 				break;
3267 			case DCH_W:
3268 				sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3269 				if (S_THth(n->suffix))
3270 					str_numth(s, s, S_TH_TYPE(n->suffix));
3271 				s += strlen(s);
3272 				break;
3273 			case DCH_J:
3274 				sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3275 				if (S_THth(n->suffix))
3276 					str_numth(s, s, S_TH_TYPE(n->suffix));
3277 				s += strlen(s);
3278 				break;
3279 		}
3280 	}
3281 
3282 	*s = '\0';
3283 }
3284 
3285 /*
3286  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3287  * The TmFromChar struct pointed to by 'out' is populated with the results.
3288  *
3289  * 'collid' identifies the collation to use, if needed.
3290  * 'std' specifies standard parsing mode.
3291  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3292  *
3293  * Note: we currently don't have any to_interval() function, so there
3294  * is no need here for INVALID_FOR_INTERVAL checks.
3295  */
3296 static void
DCH_from_char(FormatNode * node,const char * in,TmFromChar * out,Oid collid,bool std,bool * have_error)3297 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3298 			  Oid collid, bool std, bool *have_error)
3299 {
3300 	FormatNode *n;
3301 	const char *s;
3302 	int			len,
3303 				value;
3304 	bool		fx_mode = std;
3305 
3306 	/* number of extra skipped characters (more than given in format string) */
3307 	int			extra_skip = 0;
3308 
3309 	/* cache localized days and months */
3310 	cache_locale_time();
3311 
3312 	for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3313 	{
3314 		/*
3315 		 * Ignore spaces at the beginning of the string and before fields when
3316 		 * not in FX (fixed width) mode.
3317 		 */
3318 		if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3319 			(n->type == NODE_TYPE_ACTION || n == node))
3320 		{
3321 			while (*s != '\0' && isspace((unsigned char) *s))
3322 			{
3323 				s++;
3324 				extra_skip++;
3325 			}
3326 		}
3327 
3328 		if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3329 		{
3330 			if (std)
3331 			{
3332 				/*
3333 				 * Standard mode requires strict matching between format
3334 				 * string separators/spaces and input string.
3335 				 */
3336 				Assert(n->character[0] && !n->character[1]);
3337 
3338 				if (*s == n->character[0])
3339 					s++;
3340 				else
3341 					RETURN_ERROR(ereport(ERROR,
3342 										 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3343 										  errmsg("unmatched format separator \"%c\"",
3344 												 n->character[0]))));
3345 			}
3346 			else if (!fx_mode)
3347 			{
3348 				/*
3349 				 * In non FX (fixed format) mode one format string space or
3350 				 * separator match to one space or separator in input string.
3351 				 * Or match nothing if there is no space or separator in the
3352 				 * current position of input string.
3353 				 */
3354 				extra_skip--;
3355 				if (isspace((unsigned char) *s) || is_separator_char(s))
3356 				{
3357 					s++;
3358 					extra_skip++;
3359 				}
3360 			}
3361 			else
3362 			{
3363 				/*
3364 				 * In FX mode, on format string space or separator we consume
3365 				 * exactly one character from input string.  Notice we don't
3366 				 * insist that the consumed character match the format's
3367 				 * character.
3368 				 */
3369 				s += pg_mblen(s);
3370 			}
3371 			continue;
3372 		}
3373 		else if (n->type != NODE_TYPE_ACTION)
3374 		{
3375 			/*
3376 			 * Text character, so consume one character from input string.
3377 			 * Notice we don't insist that the consumed character match the
3378 			 * format's character.
3379 			 */
3380 			if (!fx_mode)
3381 			{
3382 				/*
3383 				 * In non FX mode we might have skipped some extra characters
3384 				 * (more than specified in format string) before.  In this
3385 				 * case we don't skip input string character, because it might
3386 				 * be part of field.
3387 				 */
3388 				if (extra_skip > 0)
3389 					extra_skip--;
3390 				else
3391 					s += pg_mblen(s);
3392 			}
3393 			else
3394 			{
3395 				int			chlen = pg_mblen(s);
3396 
3397 				/*
3398 				 * Standard mode requires strict match of format characters.
3399 				 */
3400 				if (std && n->type == NODE_TYPE_CHAR &&
3401 					strncmp(s, n->character, chlen) != 0)
3402 					RETURN_ERROR(ereport(ERROR,
3403 										 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3404 										  errmsg("unmatched format character \"%s\"",
3405 												 n->character))));
3406 
3407 				s += chlen;
3408 			}
3409 			continue;
3410 		}
3411 
3412 		from_char_set_mode(out, n->key->date_mode, have_error);
3413 		CHECK_ERROR;
3414 
3415 		switch (n->key->id)
3416 		{
3417 			case DCH_FX:
3418 				fx_mode = true;
3419 				break;
3420 			case DCH_A_M:
3421 			case DCH_P_M:
3422 			case DCH_a_m:
3423 			case DCH_p_m:
3424 				from_char_seq_search(&value, &s, ampm_strings_long,
3425 									 NULL, InvalidOid,
3426 									 n, have_error);
3427 				CHECK_ERROR;
3428 				from_char_set_int(&out->pm, value % 2, n, have_error);
3429 				CHECK_ERROR;
3430 				out->clock = CLOCK_12_HOUR;
3431 				break;
3432 			case DCH_AM:
3433 			case DCH_PM:
3434 			case DCH_am:
3435 			case DCH_pm:
3436 				from_char_seq_search(&value, &s, ampm_strings,
3437 									 NULL, InvalidOid,
3438 									 n, have_error);
3439 				CHECK_ERROR;
3440 				from_char_set_int(&out->pm, value % 2, n, have_error);
3441 				CHECK_ERROR;
3442 				out->clock = CLOCK_12_HOUR;
3443 				break;
3444 			case DCH_HH:
3445 			case DCH_HH12:
3446 				from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3447 				CHECK_ERROR;
3448 				out->clock = CLOCK_12_HOUR;
3449 				SKIP_THth(s, n->suffix);
3450 				break;
3451 			case DCH_HH24:
3452 				from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3453 				CHECK_ERROR;
3454 				SKIP_THth(s, n->suffix);
3455 				break;
3456 			case DCH_MI:
3457 				from_char_parse_int(&out->mi, &s, n, have_error);
3458 				CHECK_ERROR;
3459 				SKIP_THth(s, n->suffix);
3460 				break;
3461 			case DCH_SS:
3462 				from_char_parse_int(&out->ss, &s, n, have_error);
3463 				CHECK_ERROR;
3464 				SKIP_THth(s, n->suffix);
3465 				break;
3466 			case DCH_MS:		/* millisecond */
3467 				len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3468 				CHECK_ERROR;
3469 
3470 				/*
3471 				 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3472 				 */
3473 				out->ms *= len == 1 ? 100 :
3474 					len == 2 ? 10 : 1;
3475 
3476 				SKIP_THth(s, n->suffix);
3477 				break;
3478 			case DCH_FF1:
3479 			case DCH_FF2:
3480 			case DCH_FF3:
3481 			case DCH_FF4:
3482 			case DCH_FF5:
3483 			case DCH_FF6:
3484 				out->ff = n->key->id - DCH_FF1 + 1;
3485 				/* fall through */
3486 			case DCH_US:		/* microsecond */
3487 				len = from_char_parse_int_len(&out->us, &s,
3488 											  n->key->id == DCH_US ? 6 :
3489 											  out->ff, n, have_error);
3490 				CHECK_ERROR;
3491 
3492 				out->us *= len == 1 ? 100000 :
3493 					len == 2 ? 10000 :
3494 					len == 3 ? 1000 :
3495 					len == 4 ? 100 :
3496 					len == 5 ? 10 : 1;
3497 
3498 				SKIP_THth(s, n->suffix);
3499 				break;
3500 			case DCH_SSSS:
3501 				from_char_parse_int(&out->ssss, &s, n, have_error);
3502 				CHECK_ERROR;
3503 				SKIP_THth(s, n->suffix);
3504 				break;
3505 			case DCH_tz:
3506 			case DCH_TZ:
3507 			case DCH_OF:
3508 				RETURN_ERROR(ereport(ERROR,
3509 									 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3510 									  errmsg("formatting field \"%s\" is only supported in to_char",
3511 											 n->key->name))));
3512 				CHECK_ERROR;
3513 				break;
3514 			case DCH_TZH:
3515 
3516 				/*
3517 				 * Value of TZH might be negative.  And the issue is that we
3518 				 * might swallow minus sign as the separator.  So, if we have
3519 				 * skipped more characters than specified in the format
3520 				 * string, then we consider prepending last skipped minus to
3521 				 * TZH.
3522 				 */
3523 				if (*s == '+' || *s == '-' || *s == ' ')
3524 				{
3525 					out->tzsign = *s == '-' ? -1 : +1;
3526 					s++;
3527 				}
3528 				else
3529 				{
3530 					if (extra_skip > 0 && *(s - 1) == '-')
3531 						out->tzsign = -1;
3532 					else
3533 						out->tzsign = +1;
3534 				}
3535 
3536 				from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3537 				CHECK_ERROR;
3538 				break;
3539 			case DCH_TZM:
3540 				/* assign positive timezone sign if TZH was not seen before */
3541 				if (!out->tzsign)
3542 					out->tzsign = +1;
3543 				from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3544 				CHECK_ERROR;
3545 				break;
3546 			case DCH_A_D:
3547 			case DCH_B_C:
3548 			case DCH_a_d:
3549 			case DCH_b_c:
3550 				from_char_seq_search(&value, &s, adbc_strings_long,
3551 									 NULL, InvalidOid,
3552 									 n, have_error);
3553 				CHECK_ERROR;
3554 				from_char_set_int(&out->bc, value % 2, n, have_error);
3555 				CHECK_ERROR;
3556 				break;
3557 			case DCH_AD:
3558 			case DCH_BC:
3559 			case DCH_ad:
3560 			case DCH_bc:
3561 				from_char_seq_search(&value, &s, adbc_strings,
3562 									 NULL, InvalidOid,
3563 									 n, have_error);
3564 				CHECK_ERROR;
3565 				from_char_set_int(&out->bc, value % 2, n, have_error);
3566 				CHECK_ERROR;
3567 				break;
3568 			case DCH_MONTH:
3569 			case DCH_Month:
3570 			case DCH_month:
3571 				from_char_seq_search(&value, &s, months_full,
3572 									 S_TM(n->suffix) ? localized_full_months : NULL,
3573 									 collid,
3574 									 n, have_error);
3575 				CHECK_ERROR;
3576 				from_char_set_int(&out->mm, value + 1, n, have_error);
3577 				CHECK_ERROR;
3578 				break;
3579 			case DCH_MON:
3580 			case DCH_Mon:
3581 			case DCH_mon:
3582 				from_char_seq_search(&value, &s, months,
3583 									 S_TM(n->suffix) ? localized_abbrev_months : NULL,
3584 									 collid,
3585 									 n, have_error);
3586 				CHECK_ERROR;
3587 				from_char_set_int(&out->mm, value + 1, n, have_error);
3588 				CHECK_ERROR;
3589 				break;
3590 			case DCH_MM:
3591 				from_char_parse_int(&out->mm, &s, n, have_error);
3592 				CHECK_ERROR;
3593 				SKIP_THth(s, n->suffix);
3594 				break;
3595 			case DCH_DAY:
3596 			case DCH_Day:
3597 			case DCH_day:
3598 				from_char_seq_search(&value, &s, days,
3599 									 S_TM(n->suffix) ? localized_full_days : NULL,
3600 									 collid,
3601 									 n, have_error);
3602 				CHECK_ERROR;
3603 				from_char_set_int(&out->d, value, n, have_error);
3604 				CHECK_ERROR;
3605 				out->d++;
3606 				break;
3607 			case DCH_DY:
3608 			case DCH_Dy:
3609 			case DCH_dy:
3610 				from_char_seq_search(&value, &s, days_short,
3611 									 S_TM(n->suffix) ? localized_abbrev_days : NULL,
3612 									 collid,
3613 									 n, have_error);
3614 				CHECK_ERROR;
3615 				from_char_set_int(&out->d, value, n, have_error);
3616 				CHECK_ERROR;
3617 				out->d++;
3618 				break;
3619 			case DCH_DDD:
3620 				from_char_parse_int(&out->ddd, &s, n, have_error);
3621 				CHECK_ERROR;
3622 				SKIP_THth(s, n->suffix);
3623 				break;
3624 			case DCH_IDDD:
3625 				from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3626 				CHECK_ERROR;
3627 				SKIP_THth(s, n->suffix);
3628 				break;
3629 			case DCH_DD:
3630 				from_char_parse_int(&out->dd, &s, n, have_error);
3631 				CHECK_ERROR;
3632 				SKIP_THth(s, n->suffix);
3633 				break;
3634 			case DCH_D:
3635 				from_char_parse_int(&out->d, &s, n, have_error);
3636 				CHECK_ERROR;
3637 				SKIP_THth(s, n->suffix);
3638 				break;
3639 			case DCH_ID:
3640 				from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3641 				CHECK_ERROR;
3642 				/* Shift numbering to match Gregorian where Sunday = 1 */
3643 				if (++out->d > 7)
3644 					out->d = 1;
3645 				SKIP_THth(s, n->suffix);
3646 				break;
3647 			case DCH_WW:
3648 			case DCH_IW:
3649 				from_char_parse_int(&out->ww, &s, n, have_error);
3650 				CHECK_ERROR;
3651 				SKIP_THth(s, n->suffix);
3652 				break;
3653 			case DCH_Q:
3654 
3655 				/*
3656 				 * We ignore 'Q' when converting to date because it is unclear
3657 				 * which date in the quarter to use, and some people specify
3658 				 * both quarter and month, so if it was honored it might
3659 				 * conflict with the supplied month. That is also why we don't
3660 				 * throw an error.
3661 				 *
3662 				 * We still parse the source string for an integer, but it
3663 				 * isn't stored anywhere in 'out'.
3664 				 */
3665 				from_char_parse_int((int *) NULL, &s, n, have_error);
3666 				CHECK_ERROR;
3667 				SKIP_THth(s, n->suffix);
3668 				break;
3669 			case DCH_CC:
3670 				from_char_parse_int(&out->cc, &s, n, have_error);
3671 				CHECK_ERROR;
3672 				SKIP_THth(s, n->suffix);
3673 				break;
3674 			case DCH_Y_YYY:
3675 				{
3676 					int			matched,
3677 								years,
3678 								millennia,
3679 								nch;
3680 
3681 					matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3682 					if (matched < 2)
3683 						RETURN_ERROR(ereport(ERROR,
3684 											 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3685 											  errmsg("invalid input string for \"Y,YYY\""))));
3686 					years += (millennia * 1000);
3687 					from_char_set_int(&out->year, years, n, have_error);
3688 					CHECK_ERROR;
3689 					out->yysz = 4;
3690 					s += nch;
3691 					SKIP_THth(s, n->suffix);
3692 				}
3693 				break;
3694 			case DCH_YYYY:
3695 			case DCH_IYYY:
3696 				from_char_parse_int(&out->year, &s, n, have_error);
3697 				CHECK_ERROR;
3698 				out->yysz = 4;
3699 				SKIP_THth(s, n->suffix);
3700 				break;
3701 			case DCH_YYY:
3702 			case DCH_IYY:
3703 				len = from_char_parse_int(&out->year, &s, n, have_error);
3704 				CHECK_ERROR;
3705 				if (len < 4)
3706 					out->year = adjust_partial_year_to_2020(out->year);
3707 				out->yysz = 3;
3708 				SKIP_THth(s, n->suffix);
3709 				break;
3710 			case DCH_YY:
3711 			case DCH_IY:
3712 				len = from_char_parse_int(&out->year, &s, n, have_error);
3713 				CHECK_ERROR;
3714 				if (len < 4)
3715 					out->year = adjust_partial_year_to_2020(out->year);
3716 				out->yysz = 2;
3717 				SKIP_THth(s, n->suffix);
3718 				break;
3719 			case DCH_Y:
3720 			case DCH_I:
3721 				len = from_char_parse_int(&out->year, &s, n, have_error);
3722 				CHECK_ERROR;
3723 				if (len < 4)
3724 					out->year = adjust_partial_year_to_2020(out->year);
3725 				out->yysz = 1;
3726 				SKIP_THth(s, n->suffix);
3727 				break;
3728 			case DCH_RM:
3729 			case DCH_rm:
3730 				from_char_seq_search(&value, &s, rm_months_lower,
3731 									 NULL, InvalidOid,
3732 									 n, have_error);
3733 				CHECK_ERROR;
3734 				from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3735 								  n, have_error);
3736 				CHECK_ERROR;
3737 				break;
3738 			case DCH_W:
3739 				from_char_parse_int(&out->w, &s, n, have_error);
3740 				CHECK_ERROR;
3741 				SKIP_THth(s, n->suffix);
3742 				break;
3743 			case DCH_J:
3744 				from_char_parse_int(&out->j, &s, n, have_error);
3745 				CHECK_ERROR;
3746 				SKIP_THth(s, n->suffix);
3747 				break;
3748 		}
3749 
3750 		/* Ignore all spaces after fields */
3751 		if (!fx_mode)
3752 		{
3753 			extra_skip = 0;
3754 			while (*s != '\0' && isspace((unsigned char) *s))
3755 			{
3756 				s++;
3757 				extra_skip++;
3758 			}
3759 		}
3760 	}
3761 
3762 	/*
3763 	 * Standard parsing mode doesn't allow unmatched format patterns or
3764 	 * trailing characters in the input string.
3765 	 */
3766 	if (std)
3767 	{
3768 		if (n->type != NODE_TYPE_END)
3769 			RETURN_ERROR(ereport(ERROR,
3770 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3771 								  errmsg("input string is too short for datetime format"))));
3772 
3773 		while (*s != '\0' && isspace((unsigned char) *s))
3774 			s++;
3775 
3776 		if (*s != '\0')
3777 			RETURN_ERROR(ereport(ERROR,
3778 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3779 								  errmsg("trailing characters remain in input string "
3780 										 "after datetime format"))));
3781 	}
3782 
3783 on_error:
3784 	return;
3785 }
3786 
3787 /*
3788  * The invariant for DCH cache entry management is that DCHCounter is equal
3789  * to the maximum age value among the existing entries, and we increment it
3790  * whenever an access occurs.  If we approach overflow, deal with that by
3791  * halving all the age values, so that we retain a fairly accurate idea of
3792  * which entries are oldest.
3793  */
3794 static inline void
DCH_prevent_counter_overflow(void)3795 DCH_prevent_counter_overflow(void)
3796 {
3797 	if (DCHCounter >= (INT_MAX - 1))
3798 	{
3799 		for (int i = 0; i < n_DCHCache; i++)
3800 			DCHCache[i]->age >>= 1;
3801 		DCHCounter >>= 1;
3802 	}
3803 }
3804 
3805 /*
3806  * Get mask of date/time/zone components present in format nodes.
3807  *
3808  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3809  */
3810 static int
DCH_datetime_type(FormatNode * node,bool * have_error)3811 DCH_datetime_type(FormatNode *node, bool *have_error)
3812 {
3813 	FormatNode *n;
3814 	int			flags = 0;
3815 
3816 	for (n = node; n->type != NODE_TYPE_END; n++)
3817 	{
3818 		if (n->type != NODE_TYPE_ACTION)
3819 			continue;
3820 
3821 		switch (n->key->id)
3822 		{
3823 			case DCH_FX:
3824 				break;
3825 			case DCH_A_M:
3826 			case DCH_P_M:
3827 			case DCH_a_m:
3828 			case DCH_p_m:
3829 			case DCH_AM:
3830 			case DCH_PM:
3831 			case DCH_am:
3832 			case DCH_pm:
3833 			case DCH_HH:
3834 			case DCH_HH12:
3835 			case DCH_HH24:
3836 			case DCH_MI:
3837 			case DCH_SS:
3838 			case DCH_MS:		/* millisecond */
3839 			case DCH_US:		/* microsecond */
3840 			case DCH_FF1:
3841 			case DCH_FF2:
3842 			case DCH_FF3:
3843 			case DCH_FF4:
3844 			case DCH_FF5:
3845 			case DCH_FF6:
3846 			case DCH_SSSS:
3847 				flags |= DCH_TIMED;
3848 				break;
3849 			case DCH_tz:
3850 			case DCH_TZ:
3851 			case DCH_OF:
3852 				RETURN_ERROR(ereport(ERROR,
3853 									 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3854 									  errmsg("formatting field \"%s\" is only supported in to_char",
3855 											 n->key->name))));
3856 				flags |= DCH_ZONED;
3857 				break;
3858 			case DCH_TZH:
3859 			case DCH_TZM:
3860 				flags |= DCH_ZONED;
3861 				break;
3862 			case DCH_A_D:
3863 			case DCH_B_C:
3864 			case DCH_a_d:
3865 			case DCH_b_c:
3866 			case DCH_AD:
3867 			case DCH_BC:
3868 			case DCH_ad:
3869 			case DCH_bc:
3870 			case DCH_MONTH:
3871 			case DCH_Month:
3872 			case DCH_month:
3873 			case DCH_MON:
3874 			case DCH_Mon:
3875 			case DCH_mon:
3876 			case DCH_MM:
3877 			case DCH_DAY:
3878 			case DCH_Day:
3879 			case DCH_day:
3880 			case DCH_DY:
3881 			case DCH_Dy:
3882 			case DCH_dy:
3883 			case DCH_DDD:
3884 			case DCH_IDDD:
3885 			case DCH_DD:
3886 			case DCH_D:
3887 			case DCH_ID:
3888 			case DCH_WW:
3889 			case DCH_Q:
3890 			case DCH_CC:
3891 			case DCH_Y_YYY:
3892 			case DCH_YYYY:
3893 			case DCH_IYYY:
3894 			case DCH_YYY:
3895 			case DCH_IYY:
3896 			case DCH_YY:
3897 			case DCH_IY:
3898 			case DCH_Y:
3899 			case DCH_I:
3900 			case DCH_RM:
3901 			case DCH_rm:
3902 			case DCH_W:
3903 			case DCH_J:
3904 				flags |= DCH_DATED;
3905 				break;
3906 		}
3907 	}
3908 
3909 on_error:
3910 	return flags;
3911 }
3912 
3913 /* select a DCHCacheEntry to hold the given format picture */
3914 static DCHCacheEntry *
DCH_cache_getnew(const char * str,bool std)3915 DCH_cache_getnew(const char *str, bool std)
3916 {
3917 	DCHCacheEntry *ent;
3918 
3919 	/* Ensure we can advance DCHCounter below */
3920 	DCH_prevent_counter_overflow();
3921 
3922 	/*
3923 	 * If cache is full, remove oldest entry (or recycle first not-valid one)
3924 	 */
3925 	if (n_DCHCache >= DCH_CACHE_ENTRIES)
3926 	{
3927 		DCHCacheEntry *old = DCHCache[0];
3928 
3929 #ifdef DEBUG_TO_FROM_CHAR
3930 		elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3931 #endif
3932 		if (old->valid)
3933 		{
3934 			for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3935 			{
3936 				ent = DCHCache[i];
3937 				if (!ent->valid)
3938 				{
3939 					old = ent;
3940 					break;
3941 				}
3942 				if (ent->age < old->age)
3943 					old = ent;
3944 			}
3945 		}
3946 #ifdef DEBUG_TO_FROM_CHAR
3947 		elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3948 #endif
3949 		old->valid = false;
3950 		StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3951 		old->age = (++DCHCounter);
3952 		/* caller is expected to fill format, then set valid */
3953 		return old;
3954 	}
3955 	else
3956 	{
3957 #ifdef DEBUG_TO_FROM_CHAR
3958 		elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3959 #endif
3960 		Assert(DCHCache[n_DCHCache] == NULL);
3961 		DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3962 			MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
3963 		ent->valid = false;
3964 		StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3965 		ent->std = std;
3966 		ent->age = (++DCHCounter);
3967 		/* caller is expected to fill format, then set valid */
3968 		++n_DCHCache;
3969 		return ent;
3970 	}
3971 }
3972 
3973 /* look for an existing DCHCacheEntry matching the given format picture */
3974 static DCHCacheEntry *
DCH_cache_search(const char * str,bool std)3975 DCH_cache_search(const char *str, bool std)
3976 {
3977 	/* Ensure we can advance DCHCounter below */
3978 	DCH_prevent_counter_overflow();
3979 
3980 	for (int i = 0; i < n_DCHCache; i++)
3981 	{
3982 		DCHCacheEntry *ent = DCHCache[i];
3983 
3984 		if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3985 		{
3986 			ent->age = (++DCHCounter);
3987 			return ent;
3988 		}
3989 	}
3990 
3991 	return NULL;
3992 }
3993 
3994 /* Find or create a DCHCacheEntry for the given format picture */
3995 static DCHCacheEntry *
DCH_cache_fetch(const char * str,bool std)3996 DCH_cache_fetch(const char *str, bool std)
3997 {
3998 	DCHCacheEntry *ent;
3999 
4000 	if ((ent = DCH_cache_search(str, std)) == NULL)
4001 	{
4002 		/*
4003 		 * Not in the cache, must run parser and save a new format-picture to
4004 		 * the cache.  Do not mark the cache entry valid until parsing
4005 		 * succeeds.
4006 		 */
4007 		ent = DCH_cache_getnew(str, std);
4008 
4009 		parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4010 					 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4011 
4012 		ent->valid = true;
4013 	}
4014 	return ent;
4015 }
4016 
4017 /*
4018  * Format a date/time or interval into a string according to fmt.
4019  * We parse fmt into a list of FormatNodes.  This is then passed to DCH_to_char
4020  * for formatting.
4021  */
4022 static text *
datetime_to_char_body(TmToChar * tmtc,text * fmt,bool is_interval,Oid collid)4023 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4024 {
4025 	FormatNode *format;
4026 	char	   *fmt_str,
4027 			   *result;
4028 	bool		incache;
4029 	int			fmt_len;
4030 	text	   *res;
4031 
4032 	/*
4033 	 * Convert fmt to C string
4034 	 */
4035 	fmt_str = text_to_cstring(fmt);
4036 	fmt_len = strlen(fmt_str);
4037 
4038 	/*
4039 	 * Allocate workspace for result as C string
4040 	 */
4041 	result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4042 	*result = '\0';
4043 
4044 	if (fmt_len > DCH_CACHE_SIZE)
4045 	{
4046 		/*
4047 		 * Allocate new memory if format picture is bigger than static cache
4048 		 * and do not use cache (call parser always)
4049 		 */
4050 		incache = false;
4051 
4052 		format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4053 
4054 		parse_format(format, fmt_str, DCH_keywords,
4055 					 DCH_suff, DCH_index, DCH_FLAG, NULL);
4056 	}
4057 	else
4058 	{
4059 		/*
4060 		 * Use cache buffers
4061 		 */
4062 		DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4063 
4064 		incache = true;
4065 		format = ent->format;
4066 	}
4067 
4068 	/* The real work is here */
4069 	DCH_to_char(format, is_interval, tmtc, result, collid);
4070 
4071 	if (!incache)
4072 		pfree(format);
4073 
4074 	pfree(fmt_str);
4075 
4076 	/* convert C-string result to TEXT format */
4077 	res = cstring_to_text(result);
4078 
4079 	pfree(result);
4080 	return res;
4081 }
4082 
4083 /****************************************************************************
4084  *				Public routines
4085  ***************************************************************************/
4086 
4087 /* -------------------
4088  * TIMESTAMP to_char()
4089  * -------------------
4090  */
4091 Datum
timestamp_to_char(PG_FUNCTION_ARGS)4092 timestamp_to_char(PG_FUNCTION_ARGS)
4093 {
4094 	Timestamp	dt = PG_GETARG_TIMESTAMP(0);
4095 	text	   *fmt = PG_GETARG_TEXT_PP(1),
4096 			   *res;
4097 	TmToChar	tmtc;
4098 	struct pg_tm *tm;
4099 	int			thisdate;
4100 
4101 	if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4102 		PG_RETURN_NULL();
4103 
4104 	ZERO_tmtc(&tmtc);
4105 	tm = tmtcTm(&tmtc);
4106 
4107 	if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4108 		ereport(ERROR,
4109 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4110 				 errmsg("timestamp out of range")));
4111 
4112 	thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4113 	tm->tm_wday = (thisdate + 1) % 7;
4114 	tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4115 
4116 	if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4117 		PG_RETURN_NULL();
4118 
4119 	PG_RETURN_TEXT_P(res);
4120 }
4121 
4122 Datum
timestamptz_to_char(PG_FUNCTION_ARGS)4123 timestamptz_to_char(PG_FUNCTION_ARGS)
4124 {
4125 	TimestampTz dt = PG_GETARG_TIMESTAMP(0);
4126 	text	   *fmt = PG_GETARG_TEXT_PP(1),
4127 			   *res;
4128 	TmToChar	tmtc;
4129 	int			tz;
4130 	struct pg_tm *tm;
4131 	int			thisdate;
4132 
4133 	if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4134 		PG_RETURN_NULL();
4135 
4136 	ZERO_tmtc(&tmtc);
4137 	tm = tmtcTm(&tmtc);
4138 
4139 	if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4140 		ereport(ERROR,
4141 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4142 				 errmsg("timestamp out of range")));
4143 
4144 	thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4145 	tm->tm_wday = (thisdate + 1) % 7;
4146 	tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4147 
4148 	if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4149 		PG_RETURN_NULL();
4150 
4151 	PG_RETURN_TEXT_P(res);
4152 }
4153 
4154 
4155 /* -------------------
4156  * INTERVAL to_char()
4157  * -------------------
4158  */
4159 Datum
interval_to_char(PG_FUNCTION_ARGS)4160 interval_to_char(PG_FUNCTION_ARGS)
4161 {
4162 	Interval   *it = PG_GETARG_INTERVAL_P(0);
4163 	text	   *fmt = PG_GETARG_TEXT_PP(1),
4164 			   *res;
4165 	TmToChar	tmtc;
4166 	struct pg_tm *tm;
4167 
4168 	if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4169 		PG_RETURN_NULL();
4170 
4171 	ZERO_tmtc(&tmtc);
4172 	tm = tmtcTm(&tmtc);
4173 
4174 	if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4175 		PG_RETURN_NULL();
4176 
4177 	/* wday is meaningless, yday approximates the total span in days */
4178 	tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4179 
4180 	if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4181 		PG_RETURN_NULL();
4182 
4183 	PG_RETURN_TEXT_P(res);
4184 }
4185 
4186 /* ---------------------
4187  * TO_TIMESTAMP()
4188  *
4189  * Make Timestamp from date_str which is formatted at argument 'fmt'
4190  * ( to_timestamp is reverse to_char() )
4191  * ---------------------
4192  */
4193 Datum
to_timestamp(PG_FUNCTION_ARGS)4194 to_timestamp(PG_FUNCTION_ARGS)
4195 {
4196 	text	   *date_txt = PG_GETARG_TEXT_PP(0);
4197 	text	   *fmt = PG_GETARG_TEXT_PP(1);
4198 	Oid			collid = PG_GET_COLLATION();
4199 	Timestamp	result;
4200 	int			tz;
4201 	struct pg_tm tm;
4202 	fsec_t		fsec;
4203 	int			fprec;
4204 
4205 	do_to_timestamp(date_txt, fmt, collid, false,
4206 					&tm, &fsec, &fprec, NULL, NULL);
4207 
4208 	/* Use the specified time zone, if any. */
4209 	if (tm.tm_zone)
4210 	{
4211 		int			dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4212 
4213 		if (dterr)
4214 			DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4215 	}
4216 	else
4217 		tz = DetermineTimeZoneOffset(&tm, session_timezone);
4218 
4219 	if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4220 		ereport(ERROR,
4221 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4222 				 errmsg("timestamp out of range")));
4223 
4224 	/* Use the specified fractional precision, if any. */
4225 	if (fprec)
4226 		AdjustTimestampForTypmod(&result, fprec);
4227 
4228 	PG_RETURN_TIMESTAMP(result);
4229 }
4230 
4231 /* ----------
4232  * TO_DATE
4233  *	Make Date from date_str which is formatted at argument 'fmt'
4234  * ----------
4235  */
4236 Datum
to_date(PG_FUNCTION_ARGS)4237 to_date(PG_FUNCTION_ARGS)
4238 {
4239 	text	   *date_txt = PG_GETARG_TEXT_PP(0);
4240 	text	   *fmt = PG_GETARG_TEXT_PP(1);
4241 	Oid			collid = PG_GET_COLLATION();
4242 	DateADT		result;
4243 	struct pg_tm tm;
4244 	fsec_t		fsec;
4245 
4246 	do_to_timestamp(date_txt, fmt, collid, false,
4247 					&tm, &fsec, NULL, NULL, NULL);
4248 
4249 	/* Prevent overflow in Julian-day routines */
4250 	if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4251 		ereport(ERROR,
4252 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4253 				 errmsg("date out of range: \"%s\"",
4254 						text_to_cstring(date_txt))));
4255 
4256 	result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4257 
4258 	/* Now check for just-out-of-range dates */
4259 	if (!IS_VALID_DATE(result))
4260 		ereport(ERROR,
4261 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4262 				 errmsg("date out of range: \"%s\"",
4263 						text_to_cstring(date_txt))));
4264 
4265 	PG_RETURN_DATEADT(result);
4266 }
4267 
4268 /*
4269  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4270  * as a format string.  The collation 'collid' may be used for case-folding
4271  * rules in some cases.  'strict' specifies standard parsing mode.
4272  *
4273  * The actual data type (returned in 'typid', 'typmod') is determined by
4274  * the presence of date/time/zone components in the format string.
4275  *
4276  * When timezone component is present, the corresponding offset is
4277  * returned in '*tz'.
4278  *
4279  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4280  * and zero value is returned.
4281  */
4282 Datum
parse_datetime(text * date_txt,text * fmt,Oid collid,bool strict,Oid * typid,int32 * typmod,int * tz,bool * have_error)4283 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4284 			   Oid *typid, int32 *typmod, int *tz,
4285 			   bool *have_error)
4286 {
4287 	struct pg_tm tm;
4288 	fsec_t		fsec;
4289 	int			fprec;
4290 	uint32		flags;
4291 
4292 	do_to_timestamp(date_txt, fmt, collid, strict,
4293 					&tm, &fsec, &fprec, &flags, have_error);
4294 	CHECK_ERROR;
4295 
4296 	*typmod = fprec ? fprec : -1;	/* fractional part precision */
4297 
4298 	if (flags & DCH_DATED)
4299 	{
4300 		if (flags & DCH_TIMED)
4301 		{
4302 			if (flags & DCH_ZONED)
4303 			{
4304 				TimestampTz result;
4305 
4306 				if (tm.tm_zone)
4307 				{
4308 					int			dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4309 
4310 					if (dterr)
4311 						DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4312 				}
4313 				else
4314 				{
4315 					/*
4316 					 * Time zone is present in format string, but not in input
4317 					 * string.  Assuming do_to_timestamp() triggers no error
4318 					 * this should be possible only in non-strict case.
4319 					 */
4320 					Assert(!strict);
4321 
4322 					RETURN_ERROR(ereport(ERROR,
4323 										 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4324 										  errmsg("missing time zone in input string for type timestamptz"))));
4325 				}
4326 
4327 				if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4328 					RETURN_ERROR(ereport(ERROR,
4329 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4330 										  errmsg("timestamptz out of range"))));
4331 
4332 				AdjustTimestampForTypmod(&result, *typmod);
4333 
4334 				*typid = TIMESTAMPTZOID;
4335 				return TimestampTzGetDatum(result);
4336 			}
4337 			else
4338 			{
4339 				Timestamp	result;
4340 
4341 				if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4342 					RETURN_ERROR(ereport(ERROR,
4343 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4344 										  errmsg("timestamp out of range"))));
4345 
4346 				AdjustTimestampForTypmod(&result, *typmod);
4347 
4348 				*typid = TIMESTAMPOID;
4349 				return TimestampGetDatum(result);
4350 			}
4351 		}
4352 		else
4353 		{
4354 			if (flags & DCH_ZONED)
4355 			{
4356 				RETURN_ERROR(ereport(ERROR,
4357 									 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4358 									  errmsg("datetime format is zoned but not timed"))));
4359 			}
4360 			else
4361 			{
4362 				DateADT		result;
4363 
4364 				/* Prevent overflow in Julian-day routines */
4365 				if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4366 					RETURN_ERROR(ereport(ERROR,
4367 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4368 										  errmsg("date out of range: \"%s\"",
4369 												 text_to_cstring(date_txt)))));
4370 
4371 				result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4372 					POSTGRES_EPOCH_JDATE;
4373 
4374 				/* Now check for just-out-of-range dates */
4375 				if (!IS_VALID_DATE(result))
4376 					RETURN_ERROR(ereport(ERROR,
4377 										 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4378 										  errmsg("date out of range: \"%s\"",
4379 												 text_to_cstring(date_txt)))));
4380 
4381 				*typid = DATEOID;
4382 				return DateADTGetDatum(result);
4383 			}
4384 		}
4385 	}
4386 	else if (flags & DCH_TIMED)
4387 	{
4388 		if (flags & DCH_ZONED)
4389 		{
4390 			TimeTzADT  *result = palloc(sizeof(TimeTzADT));
4391 
4392 			if (tm.tm_zone)
4393 			{
4394 				int			dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4395 
4396 				if (dterr)
4397 					RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4398 			}
4399 			else
4400 			{
4401 				/*
4402 				 * Time zone is present in format string, but not in input
4403 				 * string.  Assuming do_to_timestamp() triggers no error this
4404 				 * should be possible only in non-strict case.
4405 				 */
4406 				Assert(!strict);
4407 
4408 				RETURN_ERROR(ereport(ERROR,
4409 									 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4410 									  errmsg("missing time zone in input string for type timetz"))));
4411 			}
4412 
4413 			if (tm2timetz(&tm, fsec, *tz, result) != 0)
4414 				RETURN_ERROR(ereport(ERROR,
4415 									 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4416 									  errmsg("timetz out of range"))));
4417 
4418 			AdjustTimeForTypmod(&result->time, *typmod);
4419 
4420 			*typid = TIMETZOID;
4421 			return TimeTzADTPGetDatum(result);
4422 		}
4423 		else
4424 		{
4425 			TimeADT		result;
4426 
4427 			if (tm2time(&tm, fsec, &result) != 0)
4428 				RETURN_ERROR(ereport(ERROR,
4429 									 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4430 									  errmsg("time out of range"))));
4431 
4432 			AdjustTimeForTypmod(&result, *typmod);
4433 
4434 			*typid = TIMEOID;
4435 			return TimeADTGetDatum(result);
4436 		}
4437 	}
4438 	else
4439 	{
4440 		RETURN_ERROR(ereport(ERROR,
4441 							 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4442 							  errmsg("datetime format is not dated and not timed"))));
4443 	}
4444 
4445 on_error:
4446 	return (Datum) 0;
4447 }
4448 
4449 /*
4450  * do_to_timestamp: shared code for to_timestamp and to_date
4451  *
4452  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4453  * fractional seconds, and fractional precision.
4454  *
4455  * 'collid' identifies the collation to use, if needed.
4456  * 'std' specifies standard parsing mode.
4457  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4458  * if that is not NULL.
4459  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4460  *
4461  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4462  * DCH_from_char to populate a TmFromChar with the parsed contents of
4463  * 'date_txt'.
4464  *
4465  * The TmFromChar is then analysed and converted into the final results in
4466  * struct 'tm', 'fsec', and 'fprec'.
4467  */
4468 static void
do_to_timestamp(text * date_txt,text * fmt,Oid collid,bool std,struct pg_tm * tm,fsec_t * fsec,int * fprec,uint32 * flags,bool * have_error)4469 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4470 				struct pg_tm *tm, fsec_t *fsec, int *fprec,
4471 				uint32 *flags, bool *have_error)
4472 {
4473 	FormatNode *format = NULL;
4474 	TmFromChar	tmfc;
4475 	int			fmt_len;
4476 	char	   *date_str;
4477 	int			fmask;
4478 	bool		incache = false;
4479 
4480 	Assert(tm != NULL);
4481 	Assert(fsec != NULL);
4482 
4483 	date_str = text_to_cstring(date_txt);
4484 
4485 	ZERO_tmfc(&tmfc);
4486 	ZERO_tm(tm);
4487 	*fsec = 0;
4488 	if (fprec)
4489 		*fprec = 0;
4490 	if (flags)
4491 		*flags = 0;
4492 	fmask = 0;					/* bit mask for ValidateDate() */
4493 
4494 	fmt_len = VARSIZE_ANY_EXHDR(fmt);
4495 
4496 	if (fmt_len)
4497 	{
4498 		char	   *fmt_str;
4499 
4500 		fmt_str = text_to_cstring(fmt);
4501 
4502 		if (fmt_len > DCH_CACHE_SIZE)
4503 		{
4504 			/*
4505 			 * Allocate new memory if format picture is bigger than static
4506 			 * cache and do not use cache (call parser always)
4507 			 */
4508 			format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4509 
4510 			parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4511 						 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4512 		}
4513 		else
4514 		{
4515 			/*
4516 			 * Use cache buffers
4517 			 */
4518 			DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4519 
4520 			incache = true;
4521 			format = ent->format;
4522 		}
4523 
4524 #ifdef DEBUG_TO_FROM_CHAR
4525 		/* dump_node(format, fmt_len); */
4526 		/* dump_index(DCH_keywords, DCH_index); */
4527 #endif
4528 
4529 		DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4530 		CHECK_ERROR;
4531 
4532 		pfree(fmt_str);
4533 
4534 		if (flags)
4535 			*flags = DCH_datetime_type(format, have_error);
4536 
4537 		if (!incache)
4538 		{
4539 			pfree(format);
4540 			format = NULL;
4541 		}
4542 
4543 		CHECK_ERROR;
4544 	}
4545 
4546 	DEBUG_TMFC(&tmfc);
4547 
4548 	/*
4549 	 * Convert to_date/to_timestamp input fields to standard 'tm'
4550 	 */
4551 	if (tmfc.ssss)
4552 	{
4553 		int			x = tmfc.ssss;
4554 
4555 		tm->tm_hour = x / SECS_PER_HOUR;
4556 		x %= SECS_PER_HOUR;
4557 		tm->tm_min = x / SECS_PER_MINUTE;
4558 		x %= SECS_PER_MINUTE;
4559 		tm->tm_sec = x;
4560 	}
4561 
4562 	if (tmfc.ss)
4563 		tm->tm_sec = tmfc.ss;
4564 	if (tmfc.mi)
4565 		tm->tm_min = tmfc.mi;
4566 	if (tmfc.hh)
4567 		tm->tm_hour = tmfc.hh;
4568 
4569 	if (tmfc.clock == CLOCK_12_HOUR)
4570 	{
4571 		if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4572 		{
4573 			RETURN_ERROR(ereport(ERROR,
4574 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4575 								  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4576 										 tm->tm_hour),
4577 								  errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4578 		}
4579 
4580 		if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4581 			tm->tm_hour += HOURS_PER_DAY / 2;
4582 		else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4583 			tm->tm_hour = 0;
4584 	}
4585 
4586 	if (tmfc.year)
4587 	{
4588 		/*
4589 		 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4590 		 * the year in the given century.  Keep in mind that the 21st century
4591 		 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4592 		 * 600BC to 501BC.
4593 		 */
4594 		if (tmfc.cc && tmfc.yysz <= 2)
4595 		{
4596 			if (tmfc.bc)
4597 				tmfc.cc = -tmfc.cc;
4598 			tm->tm_year = tmfc.year % 100;
4599 			if (tm->tm_year)
4600 			{
4601 				if (tmfc.cc >= 0)
4602 					tm->tm_year += (tmfc.cc - 1) * 100;
4603 				else
4604 					tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4605 			}
4606 			else
4607 			{
4608 				/* find century year for dates ending in "00" */
4609 				tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4610 			}
4611 		}
4612 		else
4613 		{
4614 			/* If a 4-digit year is provided, we use that and ignore CC. */
4615 			tm->tm_year = tmfc.year;
4616 			if (tmfc.bc)
4617 				tm->tm_year = -tm->tm_year;
4618 			/* correct for our representation of BC years */
4619 			if (tm->tm_year < 0)
4620 				tm->tm_year++;
4621 		}
4622 		fmask |= DTK_M(YEAR);
4623 	}
4624 	else if (tmfc.cc)
4625 	{
4626 		/* use first year of century */
4627 		if (tmfc.bc)
4628 			tmfc.cc = -tmfc.cc;
4629 		if (tmfc.cc >= 0)
4630 			/* +1 because 21st century started in 2001 */
4631 			tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4632 		else
4633 			/* +1 because year == 599 is 600 BC */
4634 			tm->tm_year = tmfc.cc * 100 + 1;
4635 		fmask |= DTK_M(YEAR);
4636 	}
4637 
4638 	if (tmfc.j)
4639 	{
4640 		j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4641 		fmask |= DTK_DATE_M;
4642 	}
4643 
4644 	if (tmfc.ww)
4645 	{
4646 		if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4647 		{
4648 			/*
4649 			 * If tmfc.d is not set, then the date is left at the beginning of
4650 			 * the ISO week (Monday).
4651 			 */
4652 			if (tmfc.d)
4653 				isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4654 			else
4655 				isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4656 			fmask |= DTK_DATE_M;
4657 		}
4658 		else
4659 			tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4660 	}
4661 
4662 	if (tmfc.w)
4663 		tmfc.dd = (tmfc.w - 1) * 7 + 1;
4664 	if (tmfc.dd)
4665 	{
4666 		tm->tm_mday = tmfc.dd;
4667 		fmask |= DTK_M(DAY);
4668 	}
4669 	if (tmfc.mm)
4670 	{
4671 		tm->tm_mon = tmfc.mm;
4672 		fmask |= DTK_M(MONTH);
4673 	}
4674 
4675 	if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4676 	{
4677 		/*
4678 		 * The month and day field have not been set, so we use the
4679 		 * day-of-year field to populate them.  Depending on the date mode,
4680 		 * this field may be interpreted as a Gregorian day-of-year, or an ISO
4681 		 * week date day-of-year.
4682 		 */
4683 
4684 		if (!tm->tm_year && !tmfc.bc)
4685 		{
4686 			RETURN_ERROR(ereport(ERROR,
4687 								 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4688 								  errmsg("cannot calculate day of year without year information"))));
4689 		}
4690 
4691 		if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4692 		{
4693 			int			j0;		/* zeroth day of the ISO year, in Julian */
4694 
4695 			j0 = isoweek2j(tm->tm_year, 1) - 1;
4696 
4697 			j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4698 			fmask |= DTK_DATE_M;
4699 		}
4700 		else
4701 		{
4702 			const int  *y;
4703 			int			i;
4704 
4705 			static const int ysum[2][13] = {
4706 				{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4707 			{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4708 
4709 			y = ysum[isleap(tm->tm_year)];
4710 
4711 			for (i = 1; i <= MONTHS_PER_YEAR; i++)
4712 			{
4713 				if (tmfc.ddd <= y[i])
4714 					break;
4715 			}
4716 			if (tm->tm_mon <= 1)
4717 				tm->tm_mon = i;
4718 
4719 			if (tm->tm_mday <= 1)
4720 				tm->tm_mday = tmfc.ddd - y[i - 1];
4721 
4722 			fmask |= DTK_M(MONTH) | DTK_M(DAY);
4723 		}
4724 	}
4725 
4726 	if (tmfc.ms)
4727 		*fsec += tmfc.ms * 1000;
4728 	if (tmfc.us)
4729 		*fsec += tmfc.us;
4730 	if (fprec)
4731 		*fprec = tmfc.ff;		/* fractional precision, if specified */
4732 
4733 	/* Range-check date fields according to bit mask computed above */
4734 	if (fmask != 0)
4735 	{
4736 		/* We already dealt with AD/BC, so pass isjulian = true */
4737 		int			dterr = ValidateDate(fmask, true, false, false, tm);
4738 
4739 		if (dterr != 0)
4740 		{
4741 			/*
4742 			 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4743 			 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4744 			 * irrelevant hint about datestyle.
4745 			 */
4746 			RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4747 		}
4748 	}
4749 
4750 	/* Range-check time fields too */
4751 	if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4752 		tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4753 		tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4754 		*fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4755 	{
4756 		RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4757 	}
4758 
4759 	/* Save parsed time-zone into tm->tm_zone if it was specified */
4760 	if (tmfc.tzsign)
4761 	{
4762 		char	   *tz;
4763 
4764 		if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4765 			tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4766 		{
4767 			RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4768 		}
4769 
4770 		tz = psprintf("%c%02d:%02d",
4771 					  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4772 
4773 		tm->tm_zone = tz;
4774 	}
4775 
4776 	DEBUG_TM(tm);
4777 
4778 on_error:
4779 
4780 	if (format && !incache)
4781 		pfree(format);
4782 
4783 	pfree(date_str);
4784 }
4785 
4786 
4787 /**********************************************************************
4788  *	the NUMBER version part
4789  *********************************************************************/
4790 
4791 
4792 static char *
fill_str(char * str,int c,int max)4793 fill_str(char *str, int c, int max)
4794 {
4795 	memset(str, c, max);
4796 	*(str + max) = '\0';
4797 	return str;
4798 }
4799 
4800 #define zeroize_NUM(_n) \
4801 do { \
4802 	(_n)->flag		= 0;	\
4803 	(_n)->lsign		= 0;	\
4804 	(_n)->pre		= 0;	\
4805 	(_n)->post		= 0;	\
4806 	(_n)->pre_lsign_num = 0;	\
4807 	(_n)->need_locale	= 0;	\
4808 	(_n)->multi		= 0;	\
4809 	(_n)->zero_start	= 0;	\
4810 	(_n)->zero_end		= 0;	\
4811 } while(0)
4812 
4813 /* This works the same as DCH_prevent_counter_overflow */
4814 static inline void
NUM_prevent_counter_overflow(void)4815 NUM_prevent_counter_overflow(void)
4816 {
4817 	if (NUMCounter >= (INT_MAX - 1))
4818 	{
4819 		for (int i = 0; i < n_NUMCache; i++)
4820 			NUMCache[i]->age >>= 1;
4821 		NUMCounter >>= 1;
4822 	}
4823 }
4824 
4825 /* select a NUMCacheEntry to hold the given format picture */
4826 static NUMCacheEntry *
NUM_cache_getnew(const char * str)4827 NUM_cache_getnew(const char *str)
4828 {
4829 	NUMCacheEntry *ent;
4830 
4831 	/* Ensure we can advance NUMCounter below */
4832 	NUM_prevent_counter_overflow();
4833 
4834 	/*
4835 	 * If cache is full, remove oldest entry (or recycle first not-valid one)
4836 	 */
4837 	if (n_NUMCache >= NUM_CACHE_ENTRIES)
4838 	{
4839 		NUMCacheEntry *old = NUMCache[0];
4840 
4841 #ifdef DEBUG_TO_FROM_CHAR
4842 		elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4843 #endif
4844 		if (old->valid)
4845 		{
4846 			for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4847 			{
4848 				ent = NUMCache[i];
4849 				if (!ent->valid)
4850 				{
4851 					old = ent;
4852 					break;
4853 				}
4854 				if (ent->age < old->age)
4855 					old = ent;
4856 			}
4857 		}
4858 #ifdef DEBUG_TO_FROM_CHAR
4859 		elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4860 #endif
4861 		old->valid = false;
4862 		StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
4863 		old->age = (++NUMCounter);
4864 		/* caller is expected to fill format and Num, then set valid */
4865 		return old;
4866 	}
4867 	else
4868 	{
4869 #ifdef DEBUG_TO_FROM_CHAR
4870 		elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4871 #endif
4872 		Assert(NUMCache[n_NUMCache] == NULL);
4873 		NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4874 			MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
4875 		ent->valid = false;
4876 		StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4877 		ent->age = (++NUMCounter);
4878 		/* caller is expected to fill format and Num, then set valid */
4879 		++n_NUMCache;
4880 		return ent;
4881 	}
4882 }
4883 
4884 /* look for an existing NUMCacheEntry matching the given format picture */
4885 static NUMCacheEntry *
NUM_cache_search(const char * str)4886 NUM_cache_search(const char *str)
4887 {
4888 	/* Ensure we can advance NUMCounter below */
4889 	NUM_prevent_counter_overflow();
4890 
4891 	for (int i = 0; i < n_NUMCache; i++)
4892 	{
4893 		NUMCacheEntry *ent = NUMCache[i];
4894 
4895 		if (ent->valid && strcmp(ent->str, str) == 0)
4896 		{
4897 			ent->age = (++NUMCounter);
4898 			return ent;
4899 		}
4900 	}
4901 
4902 	return NULL;
4903 }
4904 
4905 /* Find or create a NUMCacheEntry for the given format picture */
4906 static NUMCacheEntry *
NUM_cache_fetch(const char * str)4907 NUM_cache_fetch(const char *str)
4908 {
4909 	NUMCacheEntry *ent;
4910 
4911 	if ((ent = NUM_cache_search(str)) == NULL)
4912 	{
4913 		/*
4914 		 * Not in the cache, must run parser and save a new format-picture to
4915 		 * the cache.  Do not mark the cache entry valid until parsing
4916 		 * succeeds.
4917 		 */
4918 		ent = NUM_cache_getnew(str);
4919 
4920 		zeroize_NUM(&ent->Num);
4921 
4922 		parse_format(ent->format, str, NUM_keywords,
4923 					 NULL, NUM_index, NUM_FLAG, &ent->Num);
4924 
4925 		ent->valid = true;
4926 	}
4927 	return ent;
4928 }
4929 
4930 /* ----------
4931  * Cache routine for NUM to_char version
4932  * ----------
4933  */
4934 static FormatNode *
NUM_cache(int len,NUMDesc * Num,text * pars_str,bool * shouldFree)4935 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4936 {
4937 	FormatNode *format = NULL;
4938 	char	   *str;
4939 
4940 	str = text_to_cstring(pars_str);
4941 
4942 	if (len > NUM_CACHE_SIZE)
4943 	{
4944 		/*
4945 		 * Allocate new memory if format picture is bigger than static cache
4946 		 * and do not use cache (call parser always)
4947 		 */
4948 		format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4949 
4950 		*shouldFree = true;
4951 
4952 		zeroize_NUM(Num);
4953 
4954 		parse_format(format, str, NUM_keywords,
4955 					 NULL, NUM_index, NUM_FLAG, Num);
4956 	}
4957 	else
4958 	{
4959 		/*
4960 		 * Use cache buffers
4961 		 */
4962 		NUMCacheEntry *ent = NUM_cache_fetch(str);
4963 
4964 		*shouldFree = false;
4965 
4966 		format = ent->format;
4967 
4968 		/*
4969 		 * Copy cache to used struct
4970 		 */
4971 		Num->flag = ent->Num.flag;
4972 		Num->lsign = ent->Num.lsign;
4973 		Num->pre = ent->Num.pre;
4974 		Num->post = ent->Num.post;
4975 		Num->pre_lsign_num = ent->Num.pre_lsign_num;
4976 		Num->need_locale = ent->Num.need_locale;
4977 		Num->multi = ent->Num.multi;
4978 		Num->zero_start = ent->Num.zero_start;
4979 		Num->zero_end = ent->Num.zero_end;
4980 	}
4981 
4982 #ifdef DEBUG_TO_FROM_CHAR
4983 	/* dump_node(format, len); */
4984 	dump_index(NUM_keywords, NUM_index);
4985 #endif
4986 
4987 	pfree(str);
4988 	return format;
4989 }
4990 
4991 
4992 static char *
int_to_roman(int number)4993 int_to_roman(int number)
4994 {
4995 	int			len = 0,
4996 				num = 0;
4997 	char	   *p = NULL,
4998 			   *result,
4999 				numstr[12];
5000 
5001 	result = (char *) palloc(16);
5002 	*result = '\0';
5003 
5004 	if (number > 3999 || number < 1)
5005 	{
5006 		fill_str(result, '#', 15);
5007 		return result;
5008 	}
5009 	len = snprintf(numstr, sizeof(numstr), "%d", number);
5010 
5011 	for (p = numstr; *p != '\0'; p++, --len)
5012 	{
5013 		num = *p - 49;			/* 48 ascii + 1 */
5014 		if (num < 0)
5015 			continue;
5016 
5017 		if (len > 3)
5018 		{
5019 			while (num-- != -1)
5020 				strcat(result, "M");
5021 		}
5022 		else
5023 		{
5024 			if (len == 3)
5025 				strcat(result, rm100[num]);
5026 			else if (len == 2)
5027 				strcat(result, rm10[num]);
5028 			else if (len == 1)
5029 				strcat(result, rm1[num]);
5030 		}
5031 	}
5032 	return result;
5033 }
5034 
5035 
5036 
5037 /* ----------
5038  * Locale
5039  * ----------
5040  */
5041 static void
NUM_prepare_locale(NUMProc * Np)5042 NUM_prepare_locale(NUMProc *Np)
5043 {
5044 	if (Np->Num->need_locale)
5045 	{
5046 		struct lconv *lconv;
5047 
5048 		/*
5049 		 * Get locales
5050 		 */
5051 		lconv = PGLC_localeconv();
5052 
5053 		/*
5054 		 * Positive / Negative number sign
5055 		 */
5056 		if (lconv->negative_sign && *lconv->negative_sign)
5057 			Np->L_negative_sign = lconv->negative_sign;
5058 		else
5059 			Np->L_negative_sign = "-";
5060 
5061 		if (lconv->positive_sign && *lconv->positive_sign)
5062 			Np->L_positive_sign = lconv->positive_sign;
5063 		else
5064 			Np->L_positive_sign = "+";
5065 
5066 		/*
5067 		 * Number decimal point
5068 		 */
5069 		if (lconv->decimal_point && *lconv->decimal_point)
5070 			Np->decimal = lconv->decimal_point;
5071 
5072 		else
5073 			Np->decimal = ".";
5074 
5075 		if (!IS_LDECIMAL(Np->Num))
5076 			Np->decimal = ".";
5077 
5078 		/*
5079 		 * Number thousands separator
5080 		 *
5081 		 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5082 		 * but "" for thousands_sep, so we set the thousands_sep too.
5083 		 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5084 		 */
5085 		if (lconv->thousands_sep && *lconv->thousands_sep)
5086 			Np->L_thousands_sep = lconv->thousands_sep;
5087 		/* Make sure thousands separator doesn't match decimal point symbol. */
5088 		else if (strcmp(Np->decimal, ",") != 0)
5089 			Np->L_thousands_sep = ",";
5090 		else
5091 			Np->L_thousands_sep = ".";
5092 
5093 		/*
5094 		 * Currency symbol
5095 		 */
5096 		if (lconv->currency_symbol && *lconv->currency_symbol)
5097 			Np->L_currency_symbol = lconv->currency_symbol;
5098 		else
5099 			Np->L_currency_symbol = " ";
5100 	}
5101 	else
5102 	{
5103 		/*
5104 		 * Default values
5105 		 */
5106 		Np->L_negative_sign = "-";
5107 		Np->L_positive_sign = "+";
5108 		Np->decimal = ".";
5109 
5110 		Np->L_thousands_sep = ",";
5111 		Np->L_currency_symbol = " ";
5112 	}
5113 }
5114 
5115 /* ----------
5116  * Return pointer of last relevant number after decimal point
5117  *	12.0500 --> last relevant is '5'
5118  *	12.0000 --> last relevant is '.'
5119  * If there is no decimal point, return NULL (which will result in same
5120  * behavior as if FM hadn't been specified).
5121  * ----------
5122  */
5123 static char *
get_last_relevant_decnum(char * num)5124 get_last_relevant_decnum(char *num)
5125 {
5126 	char	   *result,
5127 			   *p = strchr(num, '.');
5128 
5129 #ifdef DEBUG_TO_FROM_CHAR
5130 	elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5131 #endif
5132 
5133 	if (!p)
5134 		return NULL;
5135 
5136 	result = p;
5137 
5138 	while (*(++p))
5139 	{
5140 		if (*p != '0')
5141 			result = p;
5142 	}
5143 
5144 	return result;
5145 }
5146 
5147 /*
5148  * These macros are used in NUM_processor() and its subsidiary routines.
5149  * OVERLOAD_TEST: true if we've reached end of input string
5150  * AMOUNT_TEST(s): true if at least s bytes remain in string
5151  */
5152 #define OVERLOAD_TEST	(Np->inout_p >= Np->inout + input_len)
5153 #define AMOUNT_TEST(s)	(Np->inout_p <= Np->inout + (input_len - (s)))
5154 
5155 /* ----------
5156  * Number extraction for TO_NUMBER()
5157  * ----------
5158  */
5159 static void
NUM_numpart_from_char(NUMProc * Np,int id,int input_len)5160 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5161 {
5162 	bool		isread = false;
5163 
5164 #ifdef DEBUG_TO_FROM_CHAR
5165 	elog(DEBUG_elog_output, " --- scan start --- id=%s",
5166 		 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5167 #endif
5168 
5169 	if (OVERLOAD_TEST)
5170 		return;
5171 
5172 	if (*Np->inout_p == ' ')
5173 		Np->inout_p++;
5174 
5175 	if (OVERLOAD_TEST)
5176 		return;
5177 
5178 	/*
5179 	 * read sign before number
5180 	 */
5181 	if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5182 		(Np->read_pre + Np->read_post) == 0)
5183 	{
5184 #ifdef DEBUG_TO_FROM_CHAR
5185 		elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5186 			 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5187 #endif
5188 
5189 		/*
5190 		 * locale sign
5191 		 */
5192 		if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5193 		{
5194 			int			x = 0;
5195 
5196 #ifdef DEBUG_TO_FROM_CHAR
5197 			elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5198 #endif
5199 			if ((x = strlen(Np->L_negative_sign)) &&
5200 				AMOUNT_TEST(x) &&
5201 				strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5202 			{
5203 				Np->inout_p += x;
5204 				*Np->number = '-';
5205 			}
5206 			else if ((x = strlen(Np->L_positive_sign)) &&
5207 					 AMOUNT_TEST(x) &&
5208 					 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5209 			{
5210 				Np->inout_p += x;
5211 				*Np->number = '+';
5212 			}
5213 		}
5214 		else
5215 		{
5216 #ifdef DEBUG_TO_FROM_CHAR
5217 			elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5218 #endif
5219 
5220 			/*
5221 			 * simple + - < >
5222 			 */
5223 			if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5224 										*Np->inout_p == '<'))
5225 			{
5226 				*Np->number = '-';	/* set - */
5227 				Np->inout_p++;
5228 			}
5229 			else if (*Np->inout_p == '+')
5230 			{
5231 				*Np->number = '+';	/* set + */
5232 				Np->inout_p++;
5233 			}
5234 		}
5235 	}
5236 
5237 	if (OVERLOAD_TEST)
5238 		return;
5239 
5240 #ifdef DEBUG_TO_FROM_CHAR
5241 	elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5242 #endif
5243 
5244 	/*
5245 	 * read digit or decimal point
5246 	 */
5247 	if (isdigit((unsigned char) *Np->inout_p))
5248 	{
5249 		if (Np->read_dec && Np->read_post == Np->Num->post)
5250 			return;
5251 
5252 		*Np->number_p = *Np->inout_p;
5253 		Np->number_p++;
5254 
5255 		if (Np->read_dec)
5256 			Np->read_post++;
5257 		else
5258 			Np->read_pre++;
5259 
5260 		isread = true;
5261 
5262 #ifdef DEBUG_TO_FROM_CHAR
5263 		elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5264 #endif
5265 	}
5266 	else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5267 	{
5268 		/*
5269 		 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5270 		 * Np->decimal is always just "." if we don't have a D format token.
5271 		 * So we just unconditionally match to Np->decimal.
5272 		 */
5273 		int			x = strlen(Np->decimal);
5274 
5275 #ifdef DEBUG_TO_FROM_CHAR
5276 		elog(DEBUG_elog_output, "Try read decimal point (%c)",
5277 			 *Np->inout_p);
5278 #endif
5279 		if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5280 		{
5281 			Np->inout_p += x - 1;
5282 			*Np->number_p = '.';
5283 			Np->number_p++;
5284 			Np->read_dec = true;
5285 			isread = true;
5286 		}
5287 	}
5288 
5289 	if (OVERLOAD_TEST)
5290 		return;
5291 
5292 	/*
5293 	 * Read sign behind "last" number
5294 	 *
5295 	 * We need sign detection because determine exact position of post-sign is
5296 	 * difficult:
5297 	 *
5298 	 * FM9999.9999999S	   -> 123.001- 9.9S			   -> .5- FM9.999999MI ->
5299 	 * 5.01-
5300 	 */
5301 	if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5302 	{
5303 		/*
5304 		 * locale sign (NUM_S) is always anchored behind a last number, if: -
5305 		 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5306 		 * next char is not digit
5307 		 */
5308 		if (IS_LSIGN(Np->Num) && isread &&
5309 			(Np->inout_p + 1) < Np->inout + input_len &&
5310 			!isdigit((unsigned char) *(Np->inout_p + 1)))
5311 		{
5312 			int			x;
5313 			char	   *tmp = Np->inout_p++;
5314 
5315 #ifdef DEBUG_TO_FROM_CHAR
5316 			elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5317 #endif
5318 			if ((x = strlen(Np->L_negative_sign)) &&
5319 				AMOUNT_TEST(x) &&
5320 				strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5321 			{
5322 				Np->inout_p += x - 1;	/* -1 .. NUM_processor() do inout_p++ */
5323 				*Np->number = '-';
5324 			}
5325 			else if ((x = strlen(Np->L_positive_sign)) &&
5326 					 AMOUNT_TEST(x) &&
5327 					 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5328 			{
5329 				Np->inout_p += x - 1;	/* -1 .. NUM_processor() do inout_p++ */
5330 				*Np->number = '+';
5331 			}
5332 			if (*Np->number == ' ')
5333 				/* no sign read */
5334 				Np->inout_p = tmp;
5335 		}
5336 
5337 		/*
5338 		 * try read non-locale sign, it's happen only if format is not exact
5339 		 * and we cannot determine sign position of MI/PL/SG, an example:
5340 		 *
5341 		 * FM9.999999MI			   -> 5.01-
5342 		 *
5343 		 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5344 		 * like to_number('1 -', '9S') where sign is not anchored to last
5345 		 * number.
5346 		 */
5347 		else if (isread == false && IS_LSIGN(Np->Num) == false &&
5348 				 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5349 		{
5350 #ifdef DEBUG_TO_FROM_CHAR
5351 			elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5352 #endif
5353 
5354 			/*
5355 			 * simple + -
5356 			 */
5357 			if (*Np->inout_p == '-' || *Np->inout_p == '+')
5358 				/* NUM_processor() do inout_p++ */
5359 				*Np->number = *Np->inout_p;
5360 		}
5361 	}
5362 }
5363 
5364 #define IS_PREDEC_SPACE(_n) \
5365 		(IS_ZERO((_n)->Num)==false && \
5366 		 (_n)->number == (_n)->number_p && \
5367 		 *(_n)->number == '0' && \
5368 				 (_n)->Num->post != 0)
5369 
5370 /* ----------
5371  * Add digit or sign to number-string
5372  * ----------
5373  */
5374 static void
NUM_numpart_to_char(NUMProc * Np,int id)5375 NUM_numpart_to_char(NUMProc *Np, int id)
5376 {
5377 	int			end;
5378 
5379 	if (IS_ROMAN(Np->Num))
5380 		return;
5381 
5382 	/* Note: in this elog() output not set '\0' in 'inout' */
5383 
5384 #ifdef DEBUG_TO_FROM_CHAR
5385 
5386 	/*
5387 	 * Np->num_curr is number of current item in format-picture, it is not
5388 	 * current position in inout!
5389 	 */
5390 	elog(DEBUG_elog_output,
5391 		 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5392 		 Np->sign_wrote,
5393 		 Np->num_curr,
5394 		 Np->number_p,
5395 		 Np->inout);
5396 #endif
5397 	Np->num_in = false;
5398 
5399 	/*
5400 	 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5401 	 * handle "9.9" --> " .1"
5402 	 */
5403 	if (Np->sign_wrote == false &&
5404 		(Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5405 		(IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5406 	{
5407 		if (IS_LSIGN(Np->Num))
5408 		{
5409 			if (Np->Num->lsign == NUM_LSIGN_PRE)
5410 			{
5411 				if (Np->sign == '-')
5412 					strcpy(Np->inout_p, Np->L_negative_sign);
5413 				else
5414 					strcpy(Np->inout_p, Np->L_positive_sign);
5415 				Np->inout_p += strlen(Np->inout_p);
5416 				Np->sign_wrote = true;
5417 			}
5418 		}
5419 		else if (IS_BRACKET(Np->Num))
5420 		{
5421 			*Np->inout_p = Np->sign == '+' ? ' ' : '<';
5422 			++Np->inout_p;
5423 			Np->sign_wrote = true;
5424 		}
5425 		else if (Np->sign == '+')
5426 		{
5427 			if (!IS_FILLMODE(Np->Num))
5428 			{
5429 				*Np->inout_p = ' '; /* Write + */
5430 				++Np->inout_p;
5431 			}
5432 			Np->sign_wrote = true;
5433 		}
5434 		else if (Np->sign == '-')
5435 		{						/* Write - */
5436 			*Np->inout_p = '-';
5437 			++Np->inout_p;
5438 			Np->sign_wrote = true;
5439 		}
5440 	}
5441 
5442 
5443 	/*
5444 	 * digits / FM / Zero / Dec. point
5445 	 */
5446 	if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
5447 	{
5448 		if (Np->num_curr < Np->out_pre_spaces &&
5449 			(Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
5450 		{
5451 			/*
5452 			 * Write blank space
5453 			 */
5454 			if (!IS_FILLMODE(Np->Num))
5455 			{
5456 				*Np->inout_p = ' '; /* Write ' ' */
5457 				++Np->inout_p;
5458 			}
5459 		}
5460 		else if (IS_ZERO(Np->Num) &&
5461 				 Np->num_curr < Np->out_pre_spaces &&
5462 				 Np->Num->zero_start <= Np->num_curr)
5463 		{
5464 			/*
5465 			 * Write ZERO
5466 			 */
5467 			*Np->inout_p = '0'; /* Write '0' */
5468 			++Np->inout_p;
5469 			Np->num_in = true;
5470 		}
5471 		else
5472 		{
5473 			/*
5474 			 * Write Decimal point
5475 			 */
5476 			if (*Np->number_p == '.')
5477 			{
5478 				if (!Np->last_relevant || *Np->last_relevant != '.')
5479 				{
5480 					strcpy(Np->inout_p, Np->decimal);	/* Write DEC/D */
5481 					Np->inout_p += strlen(Np->inout_p);
5482 				}
5483 
5484 				/*
5485 				 * Ora 'n' -- FM9.9 --> 'n.'
5486 				 */
5487 				else if (IS_FILLMODE(Np->Num) &&
5488 						 Np->last_relevant && *Np->last_relevant == '.')
5489 				{
5490 					strcpy(Np->inout_p, Np->decimal);	/* Write DEC/D */
5491 					Np->inout_p += strlen(Np->inout_p);
5492 				}
5493 			}
5494 			else
5495 			{
5496 				/*
5497 				 * Write Digits
5498 				 */
5499 				if (Np->last_relevant && Np->number_p > Np->last_relevant &&
5500 					id != NUM_0)
5501 					;
5502 
5503 				/*
5504 				 * '0.1' -- 9.9 --> '  .1'
5505 				 */
5506 				else if (IS_PREDEC_SPACE(Np))
5507 				{
5508 					if (!IS_FILLMODE(Np->Num))
5509 					{
5510 						*Np->inout_p = ' ';
5511 						++Np->inout_p;
5512 					}
5513 
5514 					/*
5515 					 * '0' -- FM9.9 --> '0.'
5516 					 */
5517 					else if (Np->last_relevant && *Np->last_relevant == '.')
5518 					{
5519 						*Np->inout_p = '0';
5520 						++Np->inout_p;
5521 					}
5522 				}
5523 				else
5524 				{
5525 					*Np->inout_p = *Np->number_p;	/* Write DIGIT */
5526 					++Np->inout_p;
5527 					Np->num_in = true;
5528 				}
5529 			}
5530 			/* do no exceed string length */
5531 			if (*Np->number_p)
5532 				++Np->number_p;
5533 		}
5534 
5535 		end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
5536 
5537 		if (Np->last_relevant && Np->last_relevant == Np->number_p)
5538 			end = Np->num_curr;
5539 
5540 		if (Np->num_curr + 1 == end)
5541 		{
5542 			if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
5543 			{
5544 				*Np->inout_p = Np->sign == '+' ? ' ' : '>';
5545 				++Np->inout_p;
5546 			}
5547 			else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
5548 			{
5549 				if (Np->sign == '-')
5550 					strcpy(Np->inout_p, Np->L_negative_sign);
5551 				else
5552 					strcpy(Np->inout_p, Np->L_positive_sign);
5553 				Np->inout_p += strlen(Np->inout_p);
5554 			}
5555 		}
5556 	}
5557 
5558 	++Np->num_curr;
5559 }
5560 
5561 /*
5562  * Skip over "n" input characters, but only if they aren't numeric data
5563  */
5564 static void
NUM_eat_non_data_chars(NUMProc * Np,int n,int input_len)5565 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
5566 {
5567 	while (n-- > 0)
5568 	{
5569 		if (OVERLOAD_TEST)
5570 			break;				/* end of input */
5571 		if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
5572 			break;				/* it's a data character */
5573 		Np->inout_p += pg_mblen(Np->inout_p);
5574 	}
5575 }
5576 
5577 static char *
NUM_processor(FormatNode * node,NUMDesc * Num,char * inout,char * number,int input_len,int to_char_out_pre_spaces,int sign,bool is_to_char,Oid collid)5578 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
5579 			  char *number, int input_len, int to_char_out_pre_spaces,
5580 			  int sign, bool is_to_char, Oid collid)
5581 {
5582 	FormatNode *n;
5583 	NUMProc		_Np,
5584 			   *Np = &_Np;
5585 	const char *pattern;
5586 	int			pattern_len;
5587 
5588 	MemSet(Np, 0, sizeof(NUMProc));
5589 
5590 	Np->Num = Num;
5591 	Np->is_to_char = is_to_char;
5592 	Np->number = number;
5593 	Np->inout = inout;
5594 	Np->last_relevant = NULL;
5595 	Np->read_post = 0;
5596 	Np->read_pre = 0;
5597 	Np->read_dec = false;
5598 
5599 	if (Np->Num->zero_start)
5600 		--Np->Num->zero_start;
5601 
5602 	if (IS_EEEE(Np->Num))
5603 	{
5604 		if (!Np->is_to_char)
5605 			ereport(ERROR,
5606 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5607 					 errmsg("\"EEEE\" not supported for input")));
5608 		return strcpy(inout, number);
5609 	}
5610 
5611 	/*
5612 	 * Roman correction
5613 	 */
5614 	if (IS_ROMAN(Np->Num))
5615 	{
5616 		if (!Np->is_to_char)
5617 			ereport(ERROR,
5618 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5619 					 errmsg("\"RN\" not supported for input")));
5620 
5621 		Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
5622 			Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
5623 
5624 		if (IS_FILLMODE(Np->Num))
5625 		{
5626 			Np->Num->flag = 0;
5627 			Np->Num->flag |= NUM_F_FILLMODE;
5628 		}
5629 		else
5630 			Np->Num->flag = 0;
5631 		Np->Num->flag |= NUM_F_ROMAN;
5632 	}
5633 
5634 	/*
5635 	 * Sign
5636 	 */
5637 	if (is_to_char)
5638 	{
5639 		Np->sign = sign;
5640 
5641 		/* MI/PL/SG - write sign itself and not in number */
5642 		if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
5643 		{
5644 			if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
5645 				Np->sign_wrote = false; /* need sign */
5646 			else
5647 				Np->sign_wrote = true;	/* needn't sign */
5648 		}
5649 		else
5650 		{
5651 			if (Np->sign != '-')
5652 			{
5653 				if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
5654 					Np->Num->flag &= ~NUM_F_BRACKET;
5655 				if (IS_MINUS(Np->Num))
5656 					Np->Num->flag &= ~NUM_F_MINUS;
5657 			}
5658 			else if (Np->sign != '+' && IS_PLUS(Np->Num))
5659 				Np->Num->flag &= ~NUM_F_PLUS;
5660 
5661 			if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
5662 				Np->sign_wrote = true;	/* needn't sign */
5663 			else
5664 				Np->sign_wrote = false; /* need sign */
5665 
5666 			if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
5667 				Np->Num->lsign = NUM_LSIGN_POST;
5668 		}
5669 	}
5670 	else
5671 		Np->sign = false;
5672 
5673 	/*
5674 	 * Count
5675 	 */
5676 	Np->num_count = Np->Num->post + Np->Num->pre - 1;
5677 
5678 	if (is_to_char)
5679 	{
5680 		Np->out_pre_spaces = to_char_out_pre_spaces;
5681 
5682 		if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
5683 		{
5684 			Np->last_relevant = get_last_relevant_decnum(Np->number);
5685 
5686 			/*
5687 			 * If any '0' specifiers are present, make sure we don't strip
5688 			 * those digits.
5689 			 */
5690 			if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
5691 			{
5692 				char	   *last_zero;
5693 
5694 				last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
5695 				if (Np->last_relevant < last_zero)
5696 					Np->last_relevant = last_zero;
5697 			}
5698 		}
5699 
5700 		if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
5701 			++Np->num_count;
5702 	}
5703 	else
5704 	{
5705 		Np->out_pre_spaces = 0;
5706 		*Np->number = ' ';		/* sign space */
5707 		*(Np->number + 1) = '\0';
5708 	}
5709 
5710 	Np->num_in = 0;
5711 	Np->num_curr = 0;
5712 
5713 #ifdef DEBUG_TO_FROM_CHAR
5714 	elog(DEBUG_elog_output,
5715 		 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5716 		 Np->sign,
5717 		 Np->number,
5718 		 Np->Num->pre,
5719 		 Np->Num->post,
5720 		 Np->num_count,
5721 		 Np->out_pre_spaces,
5722 		 Np->sign_wrote ? "Yes" : "No",
5723 		 IS_ZERO(Np->Num) ? "Yes" : "No",
5724 		 Np->Num->zero_start,
5725 		 Np->Num->zero_end,
5726 		 Np->last_relevant ? Np->last_relevant : "<not set>",
5727 		 IS_BRACKET(Np->Num) ? "Yes" : "No",
5728 		 IS_PLUS(Np->Num) ? "Yes" : "No",
5729 		 IS_MINUS(Np->Num) ? "Yes" : "No",
5730 		 IS_FILLMODE(Np->Num) ? "Yes" : "No",
5731 		 IS_ROMAN(Np->Num) ? "Yes" : "No",
5732 		 IS_EEEE(Np->Num) ? "Yes" : "No"
5733 		);
5734 #endif
5735 
5736 	/*
5737 	 * Locale
5738 	 */
5739 	NUM_prepare_locale(Np);
5740 
5741 	/*
5742 	 * Processor direct cycle
5743 	 */
5744 	if (Np->is_to_char)
5745 		Np->number_p = Np->number;
5746 	else
5747 		Np->number_p = Np->number + 1;	/* first char is space for sign */
5748 
5749 	for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5750 	{
5751 		if (!Np->is_to_char)
5752 		{
5753 			/*
5754 			 * Check at least one byte remains to be scanned.  (In actions
5755 			 * below, must use AMOUNT_TEST if we want to read more bytes than
5756 			 * that.)
5757 			 */
5758 			if (OVERLOAD_TEST)
5759 				break;
5760 		}
5761 
5762 		/*
5763 		 * Format pictures actions
5764 		 */
5765 		if (n->type == NODE_TYPE_ACTION)
5766 		{
5767 			/*
5768 			 * Create/read digit/zero/blank/sign/special-case
5769 			 *
5770 			 * 'NUM_S' note: The locale sign is anchored to number and we
5771 			 * read/write it when we work with first or last number
5772 			 * (NUM_0/NUM_9).  This is why NUM_S is missing in switch().
5773 			 *
5774 			 * Notice the "Np->inout_p++" at the bottom of the loop.  This is
5775 			 * why most of the actions advance inout_p one less than you might
5776 			 * expect.  In cases where we don't want that increment to happen,
5777 			 * a switch case ends with "continue" not "break".
5778 			 */
5779 			switch (n->key->id)
5780 			{
5781 				case NUM_9:
5782 				case NUM_0:
5783 				case NUM_DEC:
5784 				case NUM_D:
5785 					if (Np->is_to_char)
5786 					{
5787 						NUM_numpart_to_char(Np, n->key->id);
5788 						continue;	/* for() */
5789 					}
5790 					else
5791 					{
5792 						NUM_numpart_from_char(Np, n->key->id, input_len);
5793 						break;	/* switch() case: */
5794 					}
5795 
5796 				case NUM_COMMA:
5797 					if (Np->is_to_char)
5798 					{
5799 						if (!Np->num_in)
5800 						{
5801 							if (IS_FILLMODE(Np->Num))
5802 								continue;
5803 							else
5804 								*Np->inout_p = ' ';
5805 						}
5806 						else
5807 							*Np->inout_p = ',';
5808 					}
5809 					else
5810 					{
5811 						if (!Np->num_in)
5812 						{
5813 							if (IS_FILLMODE(Np->Num))
5814 								continue;
5815 						}
5816 						if (*Np->inout_p != ',')
5817 							continue;
5818 					}
5819 					break;
5820 
5821 				case NUM_G:
5822 					pattern = Np->L_thousands_sep;
5823 					pattern_len = strlen(pattern);
5824 					if (Np->is_to_char)
5825 					{
5826 						if (!Np->num_in)
5827 						{
5828 							if (IS_FILLMODE(Np->Num))
5829 								continue;
5830 							else
5831 							{
5832 								/* just in case there are MB chars */
5833 								pattern_len = pg_mbstrlen(pattern);
5834 								memset(Np->inout_p, ' ', pattern_len);
5835 								Np->inout_p += pattern_len - 1;
5836 							}
5837 						}
5838 						else
5839 						{
5840 							strcpy(Np->inout_p, pattern);
5841 							Np->inout_p += pattern_len - 1;
5842 						}
5843 					}
5844 					else
5845 					{
5846 						if (!Np->num_in)
5847 						{
5848 							if (IS_FILLMODE(Np->Num))
5849 								continue;
5850 						}
5851 
5852 						/*
5853 						 * Because L_thousands_sep typically contains data
5854 						 * characters (either '.' or ','), we can't use
5855 						 * NUM_eat_non_data_chars here.  Instead skip only if
5856 						 * the input matches L_thousands_sep.
5857 						 */
5858 						if (AMOUNT_TEST(pattern_len) &&
5859 							strncmp(Np->inout_p, pattern, pattern_len) == 0)
5860 							Np->inout_p += pattern_len - 1;
5861 						else
5862 							continue;
5863 					}
5864 					break;
5865 
5866 				case NUM_L:
5867 					pattern = Np->L_currency_symbol;
5868 					if (Np->is_to_char)
5869 					{
5870 						strcpy(Np->inout_p, pattern);
5871 						Np->inout_p += strlen(pattern) - 1;
5872 					}
5873 					else
5874 					{
5875 						NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5876 						continue;
5877 					}
5878 					break;
5879 
5880 				case NUM_RN:
5881 					if (IS_FILLMODE(Np->Num))
5882 					{
5883 						strcpy(Np->inout_p, Np->number_p);
5884 						Np->inout_p += strlen(Np->inout_p) - 1;
5885 					}
5886 					else
5887 					{
5888 						sprintf(Np->inout_p, "%15s", Np->number_p);
5889 						Np->inout_p += strlen(Np->inout_p) - 1;
5890 					}
5891 					break;
5892 
5893 				case NUM_rn:
5894 					if (IS_FILLMODE(Np->Num))
5895 					{
5896 						strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5897 						Np->inout_p += strlen(Np->inout_p) - 1;
5898 					}
5899 					else
5900 					{
5901 						sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5902 						Np->inout_p += strlen(Np->inout_p) - 1;
5903 					}
5904 					break;
5905 
5906 				case NUM_th:
5907 					if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5908 						Np->sign == '-' || IS_DECIMAL(Np->Num))
5909 						continue;
5910 
5911 					if (Np->is_to_char)
5912 					{
5913 						strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5914 						Np->inout_p += 1;
5915 					}
5916 					else
5917 					{
5918 						/* All variants of 'th' occupy 2 characters */
5919 						NUM_eat_non_data_chars(Np, 2, input_len);
5920 						continue;
5921 					}
5922 					break;
5923 
5924 				case NUM_TH:
5925 					if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5926 						Np->sign == '-' || IS_DECIMAL(Np->Num))
5927 						continue;
5928 
5929 					if (Np->is_to_char)
5930 					{
5931 						strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5932 						Np->inout_p += 1;
5933 					}
5934 					else
5935 					{
5936 						/* All variants of 'TH' occupy 2 characters */
5937 						NUM_eat_non_data_chars(Np, 2, input_len);
5938 						continue;
5939 					}
5940 					break;
5941 
5942 				case NUM_MI:
5943 					if (Np->is_to_char)
5944 					{
5945 						if (Np->sign == '-')
5946 							*Np->inout_p = '-';
5947 						else if (IS_FILLMODE(Np->Num))
5948 							continue;
5949 						else
5950 							*Np->inout_p = ' ';
5951 					}
5952 					else
5953 					{
5954 						if (*Np->inout_p == '-')
5955 							*Np->number = '-';
5956 						else
5957 						{
5958 							NUM_eat_non_data_chars(Np, 1, input_len);
5959 							continue;
5960 						}
5961 					}
5962 					break;
5963 
5964 				case NUM_PL:
5965 					if (Np->is_to_char)
5966 					{
5967 						if (Np->sign == '+')
5968 							*Np->inout_p = '+';
5969 						else if (IS_FILLMODE(Np->Num))
5970 							continue;
5971 						else
5972 							*Np->inout_p = ' ';
5973 					}
5974 					else
5975 					{
5976 						if (*Np->inout_p == '+')
5977 							*Np->number = '+';
5978 						else
5979 						{
5980 							NUM_eat_non_data_chars(Np, 1, input_len);
5981 							continue;
5982 						}
5983 					}
5984 					break;
5985 
5986 				case NUM_SG:
5987 					if (Np->is_to_char)
5988 						*Np->inout_p = Np->sign;
5989 					else
5990 					{
5991 						if (*Np->inout_p == '-')
5992 							*Np->number = '-';
5993 						else if (*Np->inout_p == '+')
5994 							*Np->number = '+';
5995 						else
5996 						{
5997 							NUM_eat_non_data_chars(Np, 1, input_len);
5998 							continue;
5999 						}
6000 					}
6001 					break;
6002 
6003 				default:
6004 					continue;
6005 					break;
6006 			}
6007 		}
6008 		else
6009 		{
6010 			/*
6011 			 * In TO_CHAR, non-pattern characters in the format are copied to
6012 			 * the output.  In TO_NUMBER, we skip one input character for each
6013 			 * non-pattern format character, whether or not it matches the
6014 			 * format character.
6015 			 */
6016 			if (Np->is_to_char)
6017 			{
6018 				strcpy(Np->inout_p, n->character);
6019 				Np->inout_p += strlen(Np->inout_p);
6020 			}
6021 			else
6022 			{
6023 				Np->inout_p += pg_mblen(Np->inout_p);
6024 			}
6025 			continue;
6026 		}
6027 		Np->inout_p++;
6028 	}
6029 
6030 	if (Np->is_to_char)
6031 	{
6032 		*Np->inout_p = '\0';
6033 		return Np->inout;
6034 	}
6035 	else
6036 	{
6037 		if (*(Np->number_p - 1) == '.')
6038 			*(Np->number_p - 1) = '\0';
6039 		else
6040 			*Np->number_p = '\0';
6041 
6042 		/*
6043 		 * Correction - precision of dec. number
6044 		 */
6045 		Np->Num->post = Np->read_post;
6046 
6047 #ifdef DEBUG_TO_FROM_CHAR
6048 		elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
6049 #endif
6050 		return Np->number;
6051 	}
6052 }
6053 
6054 /* ----------
6055  * MACRO: Start part of NUM - for all NUM's to_char variants
6056  *	(sorry, but I hate copy same code - macro is better..)
6057  * ----------
6058  */
6059 #define NUM_TOCHAR_prepare \
6060 do { \
6061 	int len = VARSIZE_ANY_EXHDR(fmt); \
6062 	if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ)		\
6063 		PG_RETURN_TEXT_P(cstring_to_text("")); \
6064 	result	= (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ);	\
6065 	format	= NUM_cache(len, &Num, fmt, &shouldFree);		\
6066 } while (0)
6067 
6068 /* ----------
6069  * MACRO: Finish part of NUM
6070  * ----------
6071  */
6072 #define NUM_TOCHAR_finish \
6073 do { \
6074 	int		len; \
6075 									\
6076 	NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
6077 									\
6078 	if (shouldFree)					\
6079 		pfree(format);				\
6080 									\
6081 	/*								\
6082 	 * Convert null-terminated representation of result to standard text. \
6083 	 * The result is usually much bigger than it needs to be, but there \
6084 	 * seems little point in realloc'ing it smaller. \
6085 	 */								\
6086 	len = strlen(VARDATA(result));	\
6087 	SET_VARSIZE(result, len + VARHDRSZ); \
6088 } while (0)
6089 
6090 /* -------------------
6091  * NUMERIC to_number() (convert string to numeric)
6092  * -------------------
6093  */
6094 Datum
numeric_to_number(PG_FUNCTION_ARGS)6095 numeric_to_number(PG_FUNCTION_ARGS)
6096 {
6097 	text	   *value = PG_GETARG_TEXT_PP(0);
6098 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6099 	NUMDesc		Num;
6100 	Datum		result;
6101 	FormatNode *format;
6102 	char	   *numstr;
6103 	bool		shouldFree;
6104 	int			len = 0;
6105 	int			scale,
6106 				precision;
6107 
6108 	len = VARSIZE_ANY_EXHDR(fmt);
6109 
6110 	if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
6111 		PG_RETURN_NULL();
6112 
6113 	format = NUM_cache(len, &Num, fmt, &shouldFree);
6114 
6115 	numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
6116 
6117 	NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
6118 				  VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
6119 
6120 	scale = Num.post;
6121 	precision = Num.pre + Num.multi + scale;
6122 
6123 	if (shouldFree)
6124 		pfree(format);
6125 
6126 	result = DirectFunctionCall3(numeric_in,
6127 								 CStringGetDatum(numstr),
6128 								 ObjectIdGetDatum(InvalidOid),
6129 								 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
6130 
6131 	if (IS_MULTI(&Num))
6132 	{
6133 		Numeric		x;
6134 		Numeric		a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6135 															Int32GetDatum(10)));
6136 		Numeric		b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6137 															Int32GetDatum(-Num.multi)));
6138 
6139 		x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6140 												NumericGetDatum(a),
6141 												NumericGetDatum(b)));
6142 		result = DirectFunctionCall2(numeric_mul,
6143 									 result,
6144 									 NumericGetDatum(x));
6145 	}
6146 
6147 	pfree(numstr);
6148 	return result;
6149 }
6150 
6151 /* ------------------
6152  * NUMERIC to_char()
6153  * ------------------
6154  */
6155 Datum
numeric_to_char(PG_FUNCTION_ARGS)6156 numeric_to_char(PG_FUNCTION_ARGS)
6157 {
6158 	Numeric		value = PG_GETARG_NUMERIC(0);
6159 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6160 	NUMDesc		Num;
6161 	FormatNode *format;
6162 	text	   *result;
6163 	bool		shouldFree;
6164 	int			out_pre_spaces = 0,
6165 				sign = 0;
6166 	char	   *numstr,
6167 			   *orgnum,
6168 			   *p;
6169 	Numeric		x;
6170 
6171 	NUM_TOCHAR_prepare;
6172 
6173 	/*
6174 	 * On DateType depend part (numeric)
6175 	 */
6176 	if (IS_ROMAN(&Num))
6177 	{
6178 		x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6179 												NumericGetDatum(value),
6180 												Int32GetDatum(0)));
6181 		numstr = orgnum =
6182 			int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
6183 														   NumericGetDatum(x))));
6184 	}
6185 	else if (IS_EEEE(&Num))
6186 	{
6187 		orgnum = numeric_out_sci(value, Num.post);
6188 
6189 		/*
6190 		 * numeric_out_sci() does not emit a sign for positive numbers.  We
6191 		 * need to add a space in this case so that positive and negative
6192 		 * numbers are aligned.  We also have to do the right thing for NaN.
6193 		 */
6194 		if (strcmp(orgnum, "NaN") == 0)
6195 		{
6196 			/*
6197 			 * Allow 6 characters for the leading sign, the decimal point,
6198 			 * "e", the exponent's sign and two exponent digits.
6199 			 */
6200 			numstr = (char *) palloc(Num.pre + Num.post + 7);
6201 			fill_str(numstr, '#', Num.pre + Num.post + 6);
6202 			*numstr = ' ';
6203 			*(numstr + Num.pre + 1) = '.';
6204 		}
6205 		else if (*orgnum != '-')
6206 		{
6207 			numstr = (char *) palloc(strlen(orgnum) + 2);
6208 			*numstr = ' ';
6209 			strcpy(numstr + 1, orgnum);
6210 		}
6211 		else
6212 		{
6213 			numstr = orgnum;
6214 		}
6215 	}
6216 	else
6217 	{
6218 		int			numstr_pre_len;
6219 		Numeric		val = value;
6220 
6221 		if (IS_MULTI(&Num))
6222 		{
6223 			Numeric		a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6224 																Int32GetDatum(10)));
6225 			Numeric		b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
6226 																Int32GetDatum(Num.multi)));
6227 
6228 			x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6229 													NumericGetDatum(a),
6230 													NumericGetDatum(b)));
6231 			val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
6232 													  NumericGetDatum(value),
6233 													  NumericGetDatum(x)));
6234 			Num.pre += Num.multi;
6235 		}
6236 
6237 		x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6238 												NumericGetDatum(val),
6239 												Int32GetDatum(Num.post)));
6240 		orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
6241 													 NumericGetDatum(x)));
6242 
6243 		if (*orgnum == '-')
6244 		{
6245 			sign = '-';
6246 			numstr = orgnum + 1;
6247 		}
6248 		else
6249 		{
6250 			sign = '+';
6251 			numstr = orgnum;
6252 		}
6253 
6254 		if ((p = strchr(numstr, '.')))
6255 			numstr_pre_len = p - numstr;
6256 		else
6257 			numstr_pre_len = strlen(numstr);
6258 
6259 		/* needs padding? */
6260 		if (numstr_pre_len < Num.pre)
6261 			out_pre_spaces = Num.pre - numstr_pre_len;
6262 		/* overflowed prefix digit format? */
6263 		else if (numstr_pre_len > Num.pre)
6264 		{
6265 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6266 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6267 			*(numstr + Num.pre) = '.';
6268 		}
6269 	}
6270 
6271 	NUM_TOCHAR_finish;
6272 	PG_RETURN_TEXT_P(result);
6273 }
6274 
6275 /* ---------------
6276  * INT4 to_char()
6277  * ---------------
6278  */
6279 Datum
int4_to_char(PG_FUNCTION_ARGS)6280 int4_to_char(PG_FUNCTION_ARGS)
6281 {
6282 	int32		value = PG_GETARG_INT32(0);
6283 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6284 	NUMDesc		Num;
6285 	FormatNode *format;
6286 	text	   *result;
6287 	bool		shouldFree;
6288 	int			out_pre_spaces = 0,
6289 				sign = 0;
6290 	char	   *numstr,
6291 			   *orgnum;
6292 
6293 	NUM_TOCHAR_prepare;
6294 
6295 	/*
6296 	 * On DateType depend part (int32)
6297 	 */
6298 	if (IS_ROMAN(&Num))
6299 		numstr = orgnum = int_to_roman(value);
6300 	else if (IS_EEEE(&Num))
6301 	{
6302 		/* we can do it easily because float8 won't lose any precision */
6303 		float8		val = (float8) value;
6304 
6305 		orgnum = (char *) psprintf("%+.*e", Num.post, val);
6306 
6307 		/*
6308 		 * Swap a leading positive sign for a space.
6309 		 */
6310 		if (*orgnum == '+')
6311 			*orgnum = ' ';
6312 
6313 		numstr = orgnum;
6314 	}
6315 	else
6316 	{
6317 		int			numstr_pre_len;
6318 
6319 		if (IS_MULTI(&Num))
6320 		{
6321 			orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6322 														 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
6323 			Num.pre += Num.multi;
6324 		}
6325 		else
6326 		{
6327 			orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6328 														 Int32GetDatum(value)));
6329 		}
6330 
6331 		if (*orgnum == '-')
6332 		{
6333 			sign = '-';
6334 			orgnum++;
6335 		}
6336 		else
6337 			sign = '+';
6338 
6339 		numstr_pre_len = strlen(orgnum);
6340 
6341 		/* post-decimal digits?  Pad out with zeros. */
6342 		if (Num.post)
6343 		{
6344 			numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6345 			strcpy(numstr, orgnum);
6346 			*(numstr + numstr_pre_len) = '.';
6347 			memset(numstr + numstr_pre_len + 1, '0', Num.post);
6348 			*(numstr + numstr_pre_len + Num.post + 1) = '\0';
6349 		}
6350 		else
6351 			numstr = orgnum;
6352 
6353 		/* needs padding? */
6354 		if (numstr_pre_len < Num.pre)
6355 			out_pre_spaces = Num.pre - numstr_pre_len;
6356 		/* overflowed prefix digit format? */
6357 		else if (numstr_pre_len > Num.pre)
6358 		{
6359 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6360 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6361 			*(numstr + Num.pre) = '.';
6362 		}
6363 	}
6364 
6365 	NUM_TOCHAR_finish;
6366 	PG_RETURN_TEXT_P(result);
6367 }
6368 
6369 /* ---------------
6370  * INT8 to_char()
6371  * ---------------
6372  */
6373 Datum
int8_to_char(PG_FUNCTION_ARGS)6374 int8_to_char(PG_FUNCTION_ARGS)
6375 {
6376 	int64		value = PG_GETARG_INT64(0);
6377 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6378 	NUMDesc		Num;
6379 	FormatNode *format;
6380 	text	   *result;
6381 	bool		shouldFree;
6382 	int			out_pre_spaces = 0,
6383 				sign = 0;
6384 	char	   *numstr,
6385 			   *orgnum;
6386 
6387 	NUM_TOCHAR_prepare;
6388 
6389 	/*
6390 	 * On DateType depend part (int32)
6391 	 */
6392 	if (IS_ROMAN(&Num))
6393 	{
6394 		/* Currently don't support int8 conversion to roman... */
6395 		numstr = orgnum = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
6396 	}
6397 	else if (IS_EEEE(&Num))
6398 	{
6399 		/* to avoid loss of precision, must go via numeric not float8 */
6400 		Numeric		val;
6401 
6402 		val = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
6403 												  Int64GetDatum(value)));
6404 		orgnum = numeric_out_sci(val, Num.post);
6405 
6406 		/*
6407 		 * numeric_out_sci() does not emit a sign for positive numbers.  We
6408 		 * need to add a space in this case so that positive and negative
6409 		 * numbers are aligned.  We don't have to worry about NaN here.
6410 		 */
6411 		if (*orgnum != '-')
6412 		{
6413 			numstr = (char *) palloc(strlen(orgnum) + 2);
6414 			*numstr = ' ';
6415 			strcpy(numstr + 1, orgnum);
6416 		}
6417 		else
6418 		{
6419 			numstr = orgnum;
6420 		}
6421 	}
6422 	else
6423 	{
6424 		int			numstr_pre_len;
6425 
6426 		if (IS_MULTI(&Num))
6427 		{
6428 			double		multi = pow((double) 10, (double) Num.multi);
6429 
6430 			value = DatumGetInt64(DirectFunctionCall2(int8mul,
6431 													  Int64GetDatum(value),
6432 													  DirectFunctionCall1(dtoi8,
6433 																		  Float8GetDatum(multi))));
6434 			Num.pre += Num.multi;
6435 		}
6436 
6437 		orgnum = DatumGetCString(DirectFunctionCall1(int8out,
6438 													 Int64GetDatum(value)));
6439 
6440 		if (*orgnum == '-')
6441 		{
6442 			sign = '-';
6443 			orgnum++;
6444 		}
6445 		else
6446 			sign = '+';
6447 
6448 		numstr_pre_len = strlen(orgnum);
6449 
6450 		/* post-decimal digits?  Pad out with zeros. */
6451 		if (Num.post)
6452 		{
6453 			numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6454 			strcpy(numstr, orgnum);
6455 			*(numstr + numstr_pre_len) = '.';
6456 			memset(numstr + numstr_pre_len + 1, '0', Num.post);
6457 			*(numstr + numstr_pre_len + Num.post + 1) = '\0';
6458 		}
6459 		else
6460 			numstr = orgnum;
6461 
6462 		/* needs padding? */
6463 		if (numstr_pre_len < Num.pre)
6464 			out_pre_spaces = Num.pre - numstr_pre_len;
6465 		/* overflowed prefix digit format? */
6466 		else if (numstr_pre_len > Num.pre)
6467 		{
6468 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6469 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6470 			*(numstr + Num.pre) = '.';
6471 		}
6472 	}
6473 
6474 	NUM_TOCHAR_finish;
6475 	PG_RETURN_TEXT_P(result);
6476 }
6477 
6478 /* -----------------
6479  * FLOAT4 to_char()
6480  * -----------------
6481  */
6482 Datum
float4_to_char(PG_FUNCTION_ARGS)6483 float4_to_char(PG_FUNCTION_ARGS)
6484 {
6485 	float4		value = PG_GETARG_FLOAT4(0);
6486 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6487 	NUMDesc		Num;
6488 	FormatNode *format;
6489 	text	   *result;
6490 	bool		shouldFree;
6491 	int			out_pre_spaces = 0,
6492 				sign = 0;
6493 	char	   *numstr,
6494 			   *orgnum,
6495 			   *p;
6496 
6497 	NUM_TOCHAR_prepare;
6498 
6499 	if (IS_ROMAN(&Num))
6500 		numstr = orgnum = int_to_roman((int) rint(value));
6501 	else if (IS_EEEE(&Num))
6502 	{
6503 		if (isnan(value) || isinf(value))
6504 		{
6505 			/*
6506 			 * Allow 6 characters for the leading sign, the decimal point,
6507 			 * "e", the exponent's sign and two exponent digits.
6508 			 */
6509 			numstr = (char *) palloc(Num.pre + Num.post + 7);
6510 			fill_str(numstr, '#', Num.pre + Num.post + 6);
6511 			*numstr = ' ';
6512 			*(numstr + Num.pre + 1) = '.';
6513 		}
6514 		else
6515 		{
6516 			numstr = orgnum = psprintf("%+.*e", Num.post, value);
6517 
6518 			/*
6519 			 * Swap a leading positive sign for a space.
6520 			 */
6521 			if (*orgnum == '+')
6522 				*orgnum = ' ';
6523 
6524 			numstr = orgnum;
6525 		}
6526 	}
6527 	else
6528 	{
6529 		float4		val = value;
6530 		int			numstr_pre_len;
6531 
6532 		if (IS_MULTI(&Num))
6533 		{
6534 			float		multi = pow((double) 10, (double) Num.multi);
6535 
6536 			val = value * multi;
6537 			Num.pre += Num.multi;
6538 		}
6539 
6540 		orgnum = (char *) psprintf("%.0f", fabs(val));
6541 		numstr_pre_len = strlen(orgnum);
6542 
6543 		/* adjust post digits to fit max float digits */
6544 		if (numstr_pre_len >= FLT_DIG)
6545 			Num.post = 0;
6546 		else if (numstr_pre_len + Num.post > FLT_DIG)
6547 			Num.post = FLT_DIG - numstr_pre_len;
6548 		orgnum = psprintf("%.*f", Num.post, val);
6549 
6550 		if (*orgnum == '-')
6551 		{						/* < 0 */
6552 			sign = '-';
6553 			numstr = orgnum + 1;
6554 		}
6555 		else
6556 		{
6557 			sign = '+';
6558 			numstr = orgnum;
6559 		}
6560 
6561 		if ((p = strchr(numstr, '.')))
6562 			numstr_pre_len = p - numstr;
6563 		else
6564 			numstr_pre_len = strlen(numstr);
6565 
6566 		/* needs padding? */
6567 		if (numstr_pre_len < Num.pre)
6568 			out_pre_spaces = Num.pre - numstr_pre_len;
6569 		/* overflowed prefix digit format? */
6570 		else if (numstr_pre_len > Num.pre)
6571 		{
6572 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6573 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6574 			*(numstr + Num.pre) = '.';
6575 		}
6576 	}
6577 
6578 	NUM_TOCHAR_finish;
6579 	PG_RETURN_TEXT_P(result);
6580 }
6581 
6582 /* -----------------
6583  * FLOAT8 to_char()
6584  * -----------------
6585  */
6586 Datum
float8_to_char(PG_FUNCTION_ARGS)6587 float8_to_char(PG_FUNCTION_ARGS)
6588 {
6589 	float8		value = PG_GETARG_FLOAT8(0);
6590 	text	   *fmt = PG_GETARG_TEXT_PP(1);
6591 	NUMDesc		Num;
6592 	FormatNode *format;
6593 	text	   *result;
6594 	bool		shouldFree;
6595 	int			out_pre_spaces = 0,
6596 				sign = 0;
6597 	char	   *numstr,
6598 			   *orgnum,
6599 			   *p;
6600 
6601 	NUM_TOCHAR_prepare;
6602 
6603 	if (IS_ROMAN(&Num))
6604 		numstr = orgnum = int_to_roman((int) rint(value));
6605 	else if (IS_EEEE(&Num))
6606 	{
6607 		if (isnan(value) || isinf(value))
6608 		{
6609 			/*
6610 			 * Allow 6 characters for the leading sign, the decimal point,
6611 			 * "e", the exponent's sign and two exponent digits.
6612 			 */
6613 			numstr = (char *) palloc(Num.pre + Num.post + 7);
6614 			fill_str(numstr, '#', Num.pre + Num.post + 6);
6615 			*numstr = ' ';
6616 			*(numstr + Num.pre + 1) = '.';
6617 		}
6618 		else
6619 		{
6620 			numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value);
6621 
6622 			/*
6623 			 * Swap a leading positive sign for a space.
6624 			 */
6625 			if (*orgnum == '+')
6626 				*orgnum = ' ';
6627 
6628 			numstr = orgnum;
6629 		}
6630 	}
6631 	else
6632 	{
6633 		float8		val = value;
6634 		int			numstr_pre_len;
6635 
6636 		if (IS_MULTI(&Num))
6637 		{
6638 			double		multi = pow((double) 10, (double) Num.multi);
6639 
6640 			val = value * multi;
6641 			Num.pre += Num.multi;
6642 		}
6643 		orgnum = psprintf("%.0f", fabs(val));
6644 		numstr_pre_len = strlen(orgnum);
6645 
6646 		/* adjust post digits to fit max double digits */
6647 		if (numstr_pre_len >= DBL_DIG)
6648 			Num.post = 0;
6649 		else if (numstr_pre_len + Num.post > DBL_DIG)
6650 			Num.post = DBL_DIG - numstr_pre_len;
6651 		orgnum = psprintf("%.*f", Num.post, val);
6652 
6653 		if (*orgnum == '-')
6654 		{						/* < 0 */
6655 			sign = '-';
6656 			numstr = orgnum + 1;
6657 		}
6658 		else
6659 		{
6660 			sign = '+';
6661 			numstr = orgnum;
6662 		}
6663 
6664 		if ((p = strchr(numstr, '.')))
6665 			numstr_pre_len = p - numstr;
6666 		else
6667 			numstr_pre_len = strlen(numstr);
6668 
6669 		/* needs padding? */
6670 		if (numstr_pre_len < Num.pre)
6671 			out_pre_spaces = Num.pre - numstr_pre_len;
6672 		/* overflowed prefix digit format? */
6673 		else if (numstr_pre_len > Num.pre)
6674 		{
6675 			numstr = (char *) palloc(Num.pre + Num.post + 2);
6676 			fill_str(numstr, '#', Num.pre + Num.post + 1);
6677 			*(numstr + Num.pre) = '.';
6678 		}
6679 	}
6680 
6681 	NUM_TOCHAR_finish;
6682 	PG_RETURN_TEXT_P(result);
6683 }
6684