1 /* ----------------------------------------------------------------------- 2 * formatting.c foo(int x)3 * 4 * src/backend/utils/adt/formatting.c 5 * 6 * 7 * Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group 8 * 9 * 10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER(); 11 * 12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting, 13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines. 14 * 15 * 16 * Cache & Memory: 17 * Routines use (itself) internal cache for format pictures. 18 * 19 * The cache uses a static buffer and is persistent across transactions. If 20 * the format-picture is bigger than the cache buffer, the parser is called 21 * always. 22 * 23 * NOTE for Number version: 24 * All in this version is implemented as keywords ( => not used 25 * suffixes), because a format picture is for *one* item (number) 26 * only. It not is as a timestamp version, where each keyword (can) 27 * has suffix. 28 * 29 * NOTE for Timestamp routines: 30 * In this module the POSIX 'struct tm' type is *not* used, but rather 31 * PgSQL type, which has tm_mon based on one (*non* zero) and 32 * year *not* based on 1900, but is used full year number. 33 * Module supports AD / BC / AM / PM. 34 * 35 * Supported types for to_char(): 36 * 37 * Timestamp, Numeric, int4, int8, float4, float8 38 * 39 * Supported types for reverse conversion: 40 * 41 * Timestamp - to_timestamp() 42 * Date - to_date() 43 * Numeric - to_number() 44 * 45 * 46 * Karel Zak 47 * 48 * TODO 49 * - better number building (formatting) / parsing, now it isn't 50 * ideal code 51 * - use Assert() 52 * - add support for roman number to standard number conversion 53 * - add support for number spelling 54 * - add support for string to string formatting (we must be better 55 * than Oracle :-), 56 * to_char('Hello', 'X X X X X') -> 'H e l l o' 57 * 58 * ----------------------------------------------------------------------- 59 */ 60 61 #ifdef DEBUG_TO_FROM_CHAR 62 #define DEBUG_elog_output DEBUG3 63 #endif 64 65 #include "postgres.h" 66 67 #include <ctype.h> 68 #include <unistd.h> 69 #include <math.h> 70 #include <float.h> 71 #include <limits.h> 72 73 /* 74 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems 75 * declare them in <wchar.h>, so include that too. 76 */ 77 #include <wchar.h> 78 #ifdef HAVE_WCTYPE_H 79 #include <wctype.h> 80 #endif 81 82 #ifdef USE_ICU 83 #include <unicode/ustring.h> 84 #endif 85 86 #include "catalog/pg_collation.h" 87 #include "catalog/pg_type.h" 88 #include "mb/pg_wchar.h" 89 #include "parser/scansup.h" 90 #include "utils/builtins.h" 91 #include "utils/date.h" 92 #include "utils/datetime.h" 93 #include "utils/float.h" 94 #include "utils/formatting.h" 95 #include "utils/int8.h" 96 #include "utils/memutils.h" 97 #include "utils/numeric.h" 98 #include "utils/pg_locale.h" 99 100 /* ---------- 101 * Convenience macros for error handling 102 * ---------- 103 * 104 * Two macros below help to handle errors in functions that take 105 * 'bool *have_error' argument. When this argument is not NULL, it's expected 106 * that function will suppress ereports when possible. Instead it should 107 * return some default value and set *have_error flag. 108 * 109 * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error 110 * function argument is not NULL, then instead of ereport'ing we set 111 * *have_error flag and go to on_error label. It's supposed that jump 112 * resources will be freed and some 'default' value returned. 113 * 114 * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set. 115 * It's supposed to be used for immediate exit from the function on error 116 * after call of another function with 'bool *have_error' argument. 117 */ 118 #define RETURN_ERROR(throw_error) \ 119 do { \ 120 if (have_error) \ 121 { \ 122 *have_error = true; \ 123 goto on_error; \ 124 } \ 125 else \ 126 { \ 127 throw_error; \ 128 } \ 129 } while (0) 130 131 #define CHECK_ERROR \ 132 do { \ 133 if (have_error && *have_error) \ 134 goto on_error; \ 135 } while (0) 136 137 /* ---------- 138 * Routines flags 139 * ---------- 140 */ 141 #define DCH_FLAG 0x1 /* DATE-TIME flag */ 142 #define NUM_FLAG 0x2 /* NUMBER flag */ 143 #define STD_FLAG 0x4 /* STANDARD flag */ 144 145 /* ---------- 146 * KeyWord Index (ascii from position 32 (' ') to 126 (~)) 147 * ---------- 148 */ 149 #define KeyWord_INDEX_SIZE ('~' - ' ') 150 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1) 151 152 /* ---------- 153 * Maximal length of one node 154 * ---------- 155 */ 156 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */ 157 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */ 158 159 160 /* ---------- 161 * Format parser structs 162 * ---------- 163 */ 164 typedef struct 165 { 166 const char *name; /* suffix string */ 167 int len, /* suffix length */ 168 id, /* used in node->suffix */ 169 type; /* prefix / postfix */ 170 } KeySuffix; 171 172 /* ---------- 173 * FromCharDateMode 174 * ---------- 175 * 176 * This value is used to nominate one of several distinct (and mutually 177 * exclusive) date conventions that a keyword can belong to. 178 */ 179 typedef enum 180 { 181 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */ 182 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */ 183 FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */ 184 } FromCharDateMode; 185 186 typedef struct 187 { 188 const char *name; 189 int len; 190 int id; 191 bool is_digit; 192 FromCharDateMode date_mode; 193 } KeyWord; 194 195 typedef struct 196 { 197 uint8 type; /* NODE_TYPE_XXX, see below */ 198 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */ 199 uint8 suffix; /* keyword prefix/suffix code, if any */ 200 const KeyWord *key; /* if type is ACTION */ 201 } FormatNode; 202 203 #define NODE_TYPE_END 1 204 #define NODE_TYPE_ACTION 2 205 #define NODE_TYPE_CHAR 3 206 #define NODE_TYPE_SEPARATOR 4 207 #define NODE_TYPE_SPACE 5 208 209 #define SUFFTYPE_PREFIX 1 210 #define SUFFTYPE_POSTFIX 2 211 212 #define CLOCK_24_HOUR 0 213 #define CLOCK_12_HOUR 1 214 215 216 /* ---------- 217 * Full months 218 * ---------- 219 */ 220 static const char *const months_full[] = { 221 "January", "February", "March", "April", "May", "June", "July", 222 "August", "September", "October", "November", "December", NULL 223 }; 224 225 static const char *const days_short[] = { 226 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL 227 }; 228 229 /* ---------- 230 * AD / BC 231 * ---------- 232 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it 233 * positive and map year == -1 to year zero, and shift all negative 234 * years up one. For interval years, we just return the year. 235 */ 236 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year))) 237 238 #define A_D_STR "A.D." 239 #define a_d_STR "a.d." 240 #define AD_STR "AD" 241 #define ad_STR "ad" 242 243 #define B_C_STR "B.C." 244 #define b_c_STR "b.c." 245 #define BC_STR "BC" 246 #define bc_STR "bc" 247 248 /* 249 * AD / BC strings for seq_search. 250 * 251 * These are given in two variants, a long form with periods and a standard 252 * form without. 253 * 254 * The array is laid out such that matches for AD have an even index, and 255 * matches for BC have an odd index. So the boolean value for BC is given by 256 * taking the array index of the match, modulo 2. 257 */ 258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL}; 259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL}; 260 261 /* ---------- 262 * AM / PM 263 * ---------- 264 */ 265 #define A_M_STR "A.M." 266 #define a_m_STR "a.m." 267 #define AM_STR "AM" 268 #define am_STR "am" 269 270 #define P_M_STR "P.M." 271 #define p_m_STR "p.m." 272 #define PM_STR "PM" 273 #define pm_STR "pm" 274 275 /* 276 * AM / PM strings for seq_search. 277 * 278 * These are given in two variants, a long form with periods and a standard 279 * form without. 280 * 281 * The array is laid out such that matches for AM have an even index, and 282 * matches for PM have an odd index. So the boolean value for PM is given by 283 * taking the array index of the match, modulo 2. 284 */ 285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL}; 286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL}; 287 288 /* ---------- 289 * Months in roman-numeral 290 * (Must be in reverse order for seq_search (in FROM_CHAR), because 291 * 'VIII' must have higher precedence than 'V') 292 * ---------- 293 */ 294 static const char *const rm_months_upper[] = 295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL}; 296 297 static const char *const rm_months_lower[] = 298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL}; 299 300 /* ---------- 301 * Roman numbers 302 * ---------- 303 */ 304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL}; 305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL}; 306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL}; 307 308 /* ---------- 309 * Ordinal postfixes 310 * ---------- 311 */ 312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL}; 313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL}; 314 315 /* ---------- 316 * Flags & Options: 317 * ---------- 318 */ 319 #define TH_UPPER 1 320 #define TH_LOWER 2 321 322 /* ---------- 323 * Number description struct 324 * ---------- 325 */ 326 typedef struct 327 { 328 int pre, /* (count) numbers before decimal */ 329 post, /* (count) numbers after decimal */ 330 lsign, /* want locales sign */ 331 flag, /* number parameters */ 332 pre_lsign_num, /* tmp value for lsign */ 333 multi, /* multiplier for 'V' */ 334 zero_start, /* position of first zero */ 335 zero_end, /* position of last zero */ 336 need_locale; /* needs it locale */ 337 } NUMDesc; 338 339 /* ---------- 340 * Flags for NUMBER version 341 * ---------- 342 */ 343 #define NUM_F_DECIMAL (1 << 1) 344 #define NUM_F_LDECIMAL (1 << 2) 345 #define NUM_F_ZERO (1 << 3) 346 #define NUM_F_BLANK (1 << 4) 347 #define NUM_F_FILLMODE (1 << 5) 348 #define NUM_F_LSIGN (1 << 6) 349 #define NUM_F_BRACKET (1 << 7) 350 #define NUM_F_MINUS (1 << 8) 351 #define NUM_F_PLUS (1 << 9) 352 #define NUM_F_ROMAN (1 << 10) 353 #define NUM_F_MULTI (1 << 11) 354 #define NUM_F_PLUS_POST (1 << 12) 355 #define NUM_F_MINUS_POST (1 << 13) 356 #define NUM_F_EEEE (1 << 14) 357 358 #define NUM_LSIGN_PRE (-1) 359 #define NUM_LSIGN_POST 1 360 #define NUM_LSIGN_NONE 0 361 362 /* ---------- 363 * Tests 364 * ---------- 365 */ 366 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL) 367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL) 368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO) 369 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK) 370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE) 371 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET) 372 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS) 373 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN) 374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS) 375 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN) 376 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI) 377 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE) 378 379 /* ---------- 380 * Format picture cache 381 * 382 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long; 383 * likewise number format pictures up to NUM_CACHE_SIZE bytes long. 384 * 385 * For simplicity, the cache entries are fixed-size, so they allow for the 386 * worst case of a FormatNode for each byte in the picture string. 387 * 388 * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and 389 * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that 390 * we don't waste too much space by palloc'ing them individually. Be sure 391 * to adjust those macros if you add fields to those structs. 392 * 393 * The max number of entries in each cache is DCH_CACHE_ENTRIES 394 * resp. NUM_CACHE_ENTRIES. 395 * ---------- 396 */ 397 #define DCH_CACHE_OVERHEAD \ 398 MAXALIGN(sizeof(bool) + sizeof(int)) 399 #define NUM_CACHE_OVERHEAD \ 400 MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc)) 401 402 #define DCH_CACHE_SIZE \ 403 ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) 404 #define NUM_CACHE_SIZE \ 405 ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) 406 407 #define DCH_CACHE_ENTRIES 20 408 #define NUM_CACHE_ENTRIES 20 409 410 typedef struct 411 { 412 FormatNode format[DCH_CACHE_SIZE + 1]; 413 char str[DCH_CACHE_SIZE + 1]; 414 bool std; 415 bool valid; 416 int age; 417 } DCHCacheEntry; 418 419 typedef struct 420 { 421 FormatNode format[NUM_CACHE_SIZE + 1]; 422 char str[NUM_CACHE_SIZE + 1]; 423 bool valid; 424 int age; 425 NUMDesc Num; 426 } NUMCacheEntry; 427 428 /* global cache for date/time format pictures */ 429 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES]; 430 static int n_DCHCache = 0; /* current number of entries */ 431 static int DCHCounter = 0; /* aging-event counter */ 432 433 /* global cache for number format pictures */ 434 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES]; 435 static int n_NUMCache = 0; /* current number of entries */ 436 static int NUMCounter = 0; /* aging-event counter */ 437 438 /* ---------- 439 * For char->date/time conversion 440 * ---------- 441 */ 442 typedef struct 443 { 444 FromCharDateMode mode; 445 int hh, 446 pm, 447 mi, 448 ss, 449 ssss, 450 d, /* stored as 1-7, Sunday = 1, 0 means missing */ 451 dd, 452 ddd, 453 mm, 454 ms, 455 year, 456 bc, 457 ww, 458 w, 459 cc, 460 j, 461 us, 462 yysz, /* is it YY or YYYY ? */ 463 clock, /* 12 or 24 hour clock? */ 464 tzsign, /* +1, -1 or 0 if timezone info is absent */ 465 tzh, 466 tzm, 467 ff; /* fractional precision */ 468 } TmFromChar; 469 470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar)) 471 472 /* ---------- 473 * Debug 474 * ---------- 475 */ 476 #ifdef DEBUG_TO_FROM_CHAR 477 #define DEBUG_TMFC(_X) \ 478 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \ 479 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \ 480 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \ 481 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \ 482 (_X)->yysz, (_X)->clock) 483 #define DEBUG_TM(_X) \ 484 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\ 485 (_X)->tm_sec, (_X)->tm_year,\ 486 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\ 487 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon) 488 #else 489 #define DEBUG_TMFC(_X) 490 #define DEBUG_TM(_X) 491 #endif 492 493 /* ---------- 494 * Datetime to char conversion 495 * ---------- 496 */ 497 typedef struct TmToChar 498 { 499 struct pg_tm tm; /* classic 'tm' struct */ 500 fsec_t fsec; /* fractional seconds */ 501 const char *tzn; /* timezone */ 502 } TmToChar; 503 504 #define tmtcTm(_X) (&(_X)->tm) 505 #define tmtcTzn(_X) ((_X)->tzn) 506 #define tmtcFsec(_X) ((_X)->fsec) 507 508 #define ZERO_tm(_X) \ 509 do { \ 510 (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \ 511 (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \ 512 (_X)->tm_mday = (_X)->tm_mon = 1; \ 513 (_X)->tm_zone = NULL; \ 514 } while(0) 515 516 #define ZERO_tmtc(_X) \ 517 do { \ 518 ZERO_tm( tmtcTm(_X) ); \ 519 tmtcFsec(_X) = 0; \ 520 tmtcTzn(_X) = NULL; \ 521 } while(0) 522 523 /* 524 * to_char(time) appears to to_char() as an interval, so this check 525 * is really for interval and time data types. 526 */ 527 #define INVALID_FOR_INTERVAL \ 528 do { \ 529 if (is_interval) \ 530 ereport(ERROR, \ 531 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \ 532 errmsg("invalid format specification for an interval value"), \ 533 errhint("Intervals are not tied to specific calendar dates."))); \ 534 } while(0) 535 536 /***************************************************************************** 537 * KeyWord definitions 538 *****************************************************************************/ 539 540 /* ---------- 541 * Suffixes (FormatNode.suffix is an OR of these codes) 542 * ---------- 543 */ 544 #define DCH_S_FM 0x01 545 #define DCH_S_TH 0x02 546 #define DCH_S_th 0x04 547 #define DCH_S_SP 0x08 548 #define DCH_S_TM 0x10 549 550 /* ---------- 551 * Suffix tests 552 * ---------- 553 */ 554 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0) 555 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0) 556 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0) 557 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER) 558 559 /* Oracle toggles FM behavior, we don't; see docs. */ 560 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0) 561 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0) 562 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0) 563 564 /* ---------- 565 * Suffixes definition for DATE-TIME TO/FROM CHAR 566 * ---------- 567 */ 568 #define TM_SUFFIX_LEN 2 569 570 static const KeySuffix DCH_suff[] = { 571 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX}, 572 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX}, 573 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX}, 574 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX}, 575 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX}, 576 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX}, 577 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX}, 578 /* last */ 579 {NULL, 0, 0, 0} 580 }; 581 582 583 /* ---------- 584 * Format-pictures (KeyWord). 585 * 586 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted 587 * complicated -to-> easy: 588 * 589 * (example: "DDD","DD","Day","D" ) 590 * 591 * (this specific sort needs the algorithm for sequential search for strings, 592 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH" 593 * or "HH12"? You must first try "HH12", because "HH" is in string, but 594 * it is not good. 595 * 596 * (!) 597 * - Position for the keyword is similar as position in the enum DCH/NUM_poz. 598 * (!) 599 * 600 * For fast search is used the 'int index[]', index is ascii table from position 601 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII 602 * position or -1 if char is not used in the KeyWord. Search example for 603 * string "MM": 604 * 1) see in index to index['M' - 32], 605 * 2) take keywords position (enum DCH_MI) from index 606 * 3) run sequential search in keywords[] from this position 607 * 608 * ---------- 609 */ 610 611 typedef enum 612 { 613 DCH_A_D, 614 DCH_A_M, 615 DCH_AD, 616 DCH_AM, 617 DCH_B_C, 618 DCH_BC, 619 DCH_CC, 620 DCH_DAY, 621 DCH_DDD, 622 DCH_DD, 623 DCH_DY, 624 DCH_Day, 625 DCH_Dy, 626 DCH_D, 627 DCH_FF1, 628 DCH_FF2, 629 DCH_FF3, 630 DCH_FF4, 631 DCH_FF5, 632 DCH_FF6, 633 DCH_FX, /* global suffix */ 634 DCH_HH24, 635 DCH_HH12, 636 DCH_HH, 637 DCH_IDDD, 638 DCH_ID, 639 DCH_IW, 640 DCH_IYYY, 641 DCH_IYY, 642 DCH_IY, 643 DCH_I, 644 DCH_J, 645 DCH_MI, 646 DCH_MM, 647 DCH_MONTH, 648 DCH_MON, 649 DCH_MS, 650 DCH_Month, 651 DCH_Mon, 652 DCH_OF, 653 DCH_P_M, 654 DCH_PM, 655 DCH_Q, 656 DCH_RM, 657 DCH_SSSSS, 658 DCH_SSSS, 659 DCH_SS, 660 DCH_TZH, 661 DCH_TZM, 662 DCH_TZ, 663 DCH_US, 664 DCH_WW, 665 DCH_W, 666 DCH_Y_YYY, 667 DCH_YYYY, 668 DCH_YYY, 669 DCH_YY, 670 DCH_Y, 671 DCH_a_d, 672 DCH_a_m, 673 DCH_ad, 674 DCH_am, 675 DCH_b_c, 676 DCH_bc, 677 DCH_cc, 678 DCH_day, 679 DCH_ddd, 680 DCH_dd, 681 DCH_dy, 682 DCH_d, 683 DCH_ff1, 684 DCH_ff2, 685 DCH_ff3, 686 DCH_ff4, 687 DCH_ff5, 688 DCH_ff6, 689 DCH_fx, 690 DCH_hh24, 691 DCH_hh12, 692 DCH_hh, 693 DCH_iddd, 694 DCH_id, 695 DCH_iw, 696 DCH_iyyy, 697 DCH_iyy, 698 DCH_iy, 699 DCH_i, 700 DCH_j, 701 DCH_mi, 702 DCH_mm, 703 DCH_month, 704 DCH_mon, 705 DCH_ms, 706 DCH_p_m, 707 DCH_pm, 708 DCH_q, 709 DCH_rm, 710 DCH_sssss, 711 DCH_ssss, 712 DCH_ss, 713 DCH_tz, 714 DCH_us, 715 DCH_ww, 716 DCH_w, 717 DCH_y_yyy, 718 DCH_yyyy, 719 DCH_yyy, 720 DCH_yy, 721 DCH_y, 722 723 /* last */ 724 _DCH_last_ 725 } DCH_poz; 726 727 typedef enum 728 { 729 NUM_COMMA, 730 NUM_DEC, 731 NUM_0, 732 NUM_9, 733 NUM_B, 734 NUM_C, 735 NUM_D, 736 NUM_E, 737 NUM_FM, 738 NUM_G, 739 NUM_L, 740 NUM_MI, 741 NUM_PL, 742 NUM_PR, 743 NUM_RN, 744 NUM_SG, 745 NUM_SP, 746 NUM_S, 747 NUM_TH, 748 NUM_V, 749 NUM_b, 750 NUM_c, 751 NUM_d, 752 NUM_e, 753 NUM_fm, 754 NUM_g, 755 NUM_l, 756 NUM_mi, 757 NUM_pl, 758 NUM_pr, 759 NUM_rn, 760 NUM_sg, 761 NUM_sp, 762 NUM_s, 763 NUM_th, 764 NUM_v, 765 766 /* last */ 767 _NUM_last_ 768 } NUM_poz; 769 770 /* ---------- 771 * KeyWords for DATE-TIME version 772 * ---------- 773 */ 774 static const KeyWord DCH_keywords[] = { 775 /* name, len, id, is_digit, date_mode */ 776 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */ 777 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE}, 778 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE}, 779 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE}, 780 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */ 781 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE}, 782 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */ 783 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */ 784 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, 785 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, 786 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE}, 787 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE}, 788 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE}, 789 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, 790 {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */ 791 {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE}, 792 {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE}, 793 {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE}, 794 {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE}, 795 {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE}, 796 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, 797 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */ 798 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, 799 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, 800 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */ 801 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, 802 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, 803 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, 804 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, 805 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, 806 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, 807 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */ 808 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */ 809 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, 810 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN}, 811 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN}, 812 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, 813 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN}, 814 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN}, 815 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */ 816 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */ 817 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE}, 818 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */ 819 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */ 820 {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */ 821 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, 822 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, 823 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */ 824 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE}, 825 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE}, 826 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */ 827 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */ 828 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, 829 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */ 830 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, 831 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, 832 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, 833 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, 834 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */ 835 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE}, 836 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE}, 837 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE}, 838 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */ 839 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE}, 840 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */ 841 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */ 842 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, 843 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, 844 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE}, 845 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, 846 {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */ 847 {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE}, 848 {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE}, 849 {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE}, 850 {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE}, 851 {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE}, 852 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, 853 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */ 854 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, 855 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, 856 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */ 857 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, 858 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, 859 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, 860 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, 861 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, 862 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, 863 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */ 864 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */ 865 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, 866 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN}, 867 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN}, 868 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, 869 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */ 870 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE}, 871 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */ 872 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */ 873 {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */ 874 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, 875 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, 876 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */ 877 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */ 878 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */ 879 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, 880 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */ 881 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, 882 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, 883 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, 884 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, 885 886 /* last */ 887 {NULL, 0, 0, 0, 0} 888 }; 889 890 /* ---------- 891 * KeyWords for NUMBER version 892 * 893 * The is_digit and date_mode fields are not relevant here. 894 * ---------- 895 */ 896 static const KeyWord NUM_keywords[] = { 897 /* name, len, id is in Index */ 898 {",", 1, NUM_COMMA}, /* , */ 899 {".", 1, NUM_DEC}, /* . */ 900 {"0", 1, NUM_0}, /* 0 */ 901 {"9", 1, NUM_9}, /* 9 */ 902 {"B", 1, NUM_B}, /* B */ 903 {"C", 1, NUM_C}, /* C */ 904 {"D", 1, NUM_D}, /* D */ 905 {"EEEE", 4, NUM_E}, /* E */ 906 {"FM", 2, NUM_FM}, /* F */ 907 {"G", 1, NUM_G}, /* G */ 908 {"L", 1, NUM_L}, /* L */ 909 {"MI", 2, NUM_MI}, /* M */ 910 {"PL", 2, NUM_PL}, /* P */ 911 {"PR", 2, NUM_PR}, 912 {"RN", 2, NUM_RN}, /* R */ 913 {"SG", 2, NUM_SG}, /* S */ 914 {"SP", 2, NUM_SP}, 915 {"S", 1, NUM_S}, 916 {"TH", 2, NUM_TH}, /* T */ 917 {"V", 1, NUM_V}, /* V */ 918 {"b", 1, NUM_B}, /* b */ 919 {"c", 1, NUM_C}, /* c */ 920 {"d", 1, NUM_D}, /* d */ 921 {"eeee", 4, NUM_E}, /* e */ 922 {"fm", 2, NUM_FM}, /* f */ 923 {"g", 1, NUM_G}, /* g */ 924 {"l", 1, NUM_L}, /* l */ 925 {"mi", 2, NUM_MI}, /* m */ 926 {"pl", 2, NUM_PL}, /* p */ 927 {"pr", 2, NUM_PR}, 928 {"rn", 2, NUM_rn}, /* r */ 929 {"sg", 2, NUM_SG}, /* s */ 930 {"sp", 2, NUM_SP}, 931 {"s", 1, NUM_S}, 932 {"th", 2, NUM_th}, /* t */ 933 {"v", 1, NUM_V}, /* v */ 934 935 /* last */ 936 {NULL, 0, 0} 937 }; 938 939 940 /* ---------- 941 * KeyWords index for DATE-TIME version 942 * ---------- 943 */ 944 static const int DCH_index[KeyWord_INDEX_SIZE] = { 945 /* 946 0 1 2 3 4 5 6 7 8 9 947 */ 948 /*---- first 0..31 chars are skipped ----*/ 949 950 -1, -1, -1, -1, -1, -1, -1, -1, 951 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 952 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 953 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1, 954 DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF, 955 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY, 956 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc, 957 DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi, 958 -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww, 959 -1, DCH_y_yyy, -1, -1, -1, -1 960 961 /*---- chars over 126 are skipped ----*/ 962 }; 963 964 /* ---------- 965 * KeyWords index for NUMBER version 966 * ---------- 967 */ 968 static const int NUM_index[KeyWord_INDEX_SIZE] = { 969 /* 970 0 1 2 3 4 5 6 7 8 9 971 */ 972 /*---- first 0..31 chars are skipped ----*/ 973 974 -1, -1, -1, -1, -1, -1, -1, -1, 975 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1, 976 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1, 977 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E, 978 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1, 979 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1, 980 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c, 981 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi, 982 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1, 983 -1, -1, -1, -1, -1, -1 984 985 /*---- chars over 126 are skipped ----*/ 986 }; 987 988 /* ---------- 989 * Number processor struct 990 * ---------- 991 */ 992 typedef struct NUMProc 993 { 994 bool is_to_char; 995 NUMDesc *Num; /* number description */ 996 997 int sign, /* '-' or '+' */ 998 sign_wrote, /* was sign write */ 999 num_count, /* number of write digits */ 1000 num_in, /* is inside number */ 1001 num_curr, /* current position in number */ 1002 out_pre_spaces, /* spaces before first digit */ 1003 1004 read_dec, /* to_number - was read dec. point */ 1005 read_post, /* to_number - number of dec. digit */ 1006 read_pre; /* to_number - number non-dec. digit */ 1007 1008 char *number, /* string with number */ 1009 *number_p, /* pointer to current number position */ 1010 *inout, /* in / out buffer */ 1011 *inout_p, /* pointer to current inout position */ 1012 *last_relevant, /* last relevant number after decimal point */ 1013 1014 *L_negative_sign, /* Locale */ 1015 *L_positive_sign, 1016 *decimal, 1017 *L_thousands_sep, 1018 *L_currency_symbol; 1019 } NUMProc; 1020 1021 /* Return flags for DCH_from_char() */ 1022 #define DCH_DATED 0x01 1023 #define DCH_TIMED 0x02 1024 #define DCH_ZONED 0x04 1025 1026 /* ---------- 1027 * Functions 1028 * ---------- 1029 */ 1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, 1031 const int *index); 1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); 1033 static bool is_separator_char(const char *str); 1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); 1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, 1036 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num); 1037 1038 static void DCH_to_char(FormatNode *node, bool is_interval, 1039 TmToChar *in, char *out, Oid collid); 1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, 1041 Oid collid, bool std, bool *have_error); 1042 1043 #ifdef DEBUG_TO_FROM_CHAR 1044 static void dump_index(const KeyWord *k, const int *index); 1045 static void dump_node(FormatNode *node, int max); 1046 #endif 1047 1048 static const char *get_th(char *num, int type); 1049 static char *str_numth(char *dest, char *num, int type); 1050 static int adjust_partial_year_to_2020(int year); 1051 static int strspace_len(const char *str); 1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, 1053 bool *have_error); 1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node, 1055 bool *have_error); 1056 static int from_char_parse_int_len(int *dest, const char **src, const int len, 1057 FormatNode *node, bool *have_error); 1058 static int from_char_parse_int(int *dest, const char **src, FormatNode *node, 1059 bool *have_error); 1060 static int seq_search_ascii(const char *name, const char *const *array, int *len); 1061 static int seq_search_localized(const char *name, char **array, int *len, 1062 Oid collid); 1063 static int from_char_seq_search(int *dest, const char **src, 1064 const char *const *array, 1065 char **localized_array, Oid collid, 1066 FormatNode *node, bool *have_error); 1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, 1068 struct pg_tm *tm, fsec_t *fsec, int *fprec, 1069 uint32 *flags, bool *have_error); 1070 static char *fill_str(char *str, int c, int max); 1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); 1072 static char *int_to_roman(int number); 1073 static void NUM_prepare_locale(NUMProc *Np); 1074 static char *get_last_relevant_decnum(char *num); 1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); 1076 static void NUM_numpart_to_char(NUMProc *Np, int id); 1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, 1078 char *number, int input_len, int to_char_out_pre_spaces, 1079 int sign, bool is_to_char, Oid collid); 1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std); 1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std); 1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std); 1083 static NUMCacheEntry *NUM_cache_getnew(const char *str); 1084 static NUMCacheEntry *NUM_cache_search(const char *str); 1085 static NUMCacheEntry *NUM_cache_fetch(const char *str); 1086 1087 1088 /* ---------- 1089 * Fast sequential search, use index for data selection which 1090 * go to seq. cycle (it is very fast for unwanted strings) 1091 * (can't be used binary search in format parsing) 1092 * ---------- 1093 */ 1094 static const KeyWord * 1095 index_seq_search(const char *str, const KeyWord *kw, const int *index) 1096 { 1097 int poz; 1098 1099 if (!KeyWord_INDEX_FILTER(*str)) 1100 return NULL; 1101 1102 if ((poz = *(index + (*str - ' '))) > -1) 1103 { 1104 const KeyWord *k = kw + poz; 1105 1106 do 1107 { 1108 if (strncmp(str, k->name, k->len) == 0) 1109 return k; 1110 k++; 1111 if (!k->name) 1112 return NULL; 1113 } while (*str == *k->name); 1114 } 1115 return NULL; 1116 } 1117 1118 static const KeySuffix * 1119 suff_search(const char *str, const KeySuffix *suf, int type) 1120 { 1121 const KeySuffix *s; 1122 1123 for (s = suf; s->name != NULL; s++) 1124 { 1125 if (s->type != type) 1126 continue; 1127 1128 if (strncmp(str, s->name, s->len) == 0) 1129 return s; 1130 } 1131 return NULL; 1132 } 1133 1134 static bool 1135 is_separator_char(const char *str) 1136 { 1137 /* ASCII printable character, but not letter or digit */ 1138 return (*str > 0x20 && *str < 0x7F && 1139 !(*str >= 'A' && *str <= 'Z') && 1140 !(*str >= 'a' && *str <= 'z') && 1141 !(*str >= '0' && *str <= '9')); 1142 } 1143 1144 /* ---------- 1145 * Prepare NUMDesc (number description struct) via FormatNode struct 1146 * ---------- 1147 */ 1148 static void 1149 NUMDesc_prepare(NUMDesc *num, FormatNode *n) 1150 { 1151 if (n->type != NODE_TYPE_ACTION) 1152 return; 1153 1154 if (IS_EEEE(num) && n->key->id != NUM_E) 1155 ereport(ERROR, 1156 (errcode(ERRCODE_SYNTAX_ERROR), 1157 errmsg("\"EEEE\" must be the last pattern used"))); 1158 1159 switch (n->key->id) 1160 { 1161 case NUM_9: 1162 if (IS_BRACKET(num)) 1163 ereport(ERROR, 1164 (errcode(ERRCODE_SYNTAX_ERROR), 1165 errmsg("\"9\" must be ahead of \"PR\""))); 1166 if (IS_MULTI(num)) 1167 { 1168 ++num->multi; 1169 break; 1170 } 1171 if (IS_DECIMAL(num)) 1172 ++num->post; 1173 else 1174 ++num->pre; 1175 break; 1176 1177 case NUM_0: 1178 if (IS_BRACKET(num)) 1179 ereport(ERROR, 1180 (errcode(ERRCODE_SYNTAX_ERROR), 1181 errmsg("\"0\" must be ahead of \"PR\""))); 1182 if (!IS_ZERO(num) && !IS_DECIMAL(num)) 1183 { 1184 num->flag |= NUM_F_ZERO; 1185 num->zero_start = num->pre + 1; 1186 } 1187 if (!IS_DECIMAL(num)) 1188 ++num->pre; 1189 else 1190 ++num->post; 1191 1192 num->zero_end = num->pre + num->post; 1193 break; 1194 1195 case NUM_B: 1196 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num))) 1197 num->flag |= NUM_F_BLANK; 1198 break; 1199 1200 case NUM_D: 1201 num->flag |= NUM_F_LDECIMAL; 1202 num->need_locale = true; 1203 /* FALLTHROUGH */ 1204 case NUM_DEC: 1205 if (IS_DECIMAL(num)) 1206 ereport(ERROR, 1207 (errcode(ERRCODE_SYNTAX_ERROR), 1208 errmsg("multiple decimal points"))); 1209 if (IS_MULTI(num)) 1210 ereport(ERROR, 1211 (errcode(ERRCODE_SYNTAX_ERROR), 1212 errmsg("cannot use \"V\" and decimal point together"))); 1213 num->flag |= NUM_F_DECIMAL; 1214 break; 1215 1216 case NUM_FM: 1217 num->flag |= NUM_F_FILLMODE; 1218 break; 1219 1220 case NUM_S: 1221 if (IS_LSIGN(num)) 1222 ereport(ERROR, 1223 (errcode(ERRCODE_SYNTAX_ERROR), 1224 errmsg("cannot use \"S\" twice"))); 1225 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num)) 1226 ereport(ERROR, 1227 (errcode(ERRCODE_SYNTAX_ERROR), 1228 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"))); 1229 if (!IS_DECIMAL(num)) 1230 { 1231 num->lsign = NUM_LSIGN_PRE; 1232 num->pre_lsign_num = num->pre; 1233 num->need_locale = true; 1234 num->flag |= NUM_F_LSIGN; 1235 } 1236 else if (num->lsign == NUM_LSIGN_NONE) 1237 { 1238 num->lsign = NUM_LSIGN_POST; 1239 num->need_locale = true; 1240 num->flag |= NUM_F_LSIGN; 1241 } 1242 break; 1243 1244 case NUM_MI: 1245 if (IS_LSIGN(num)) 1246 ereport(ERROR, 1247 (errcode(ERRCODE_SYNTAX_ERROR), 1248 errmsg("cannot use \"S\" and \"MI\" together"))); 1249 num->flag |= NUM_F_MINUS; 1250 if (IS_DECIMAL(num)) 1251 num->flag |= NUM_F_MINUS_POST; 1252 break; 1253 1254 case NUM_PL: 1255 if (IS_LSIGN(num)) 1256 ereport(ERROR, 1257 (errcode(ERRCODE_SYNTAX_ERROR), 1258 errmsg("cannot use \"S\" and \"PL\" together"))); 1259 num->flag |= NUM_F_PLUS; 1260 if (IS_DECIMAL(num)) 1261 num->flag |= NUM_F_PLUS_POST; 1262 break; 1263 1264 case NUM_SG: 1265 if (IS_LSIGN(num)) 1266 ereport(ERROR, 1267 (errcode(ERRCODE_SYNTAX_ERROR), 1268 errmsg("cannot use \"S\" and \"SG\" together"))); 1269 num->flag |= NUM_F_MINUS; 1270 num->flag |= NUM_F_PLUS; 1271 break; 1272 1273 case NUM_PR: 1274 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num)) 1275 ereport(ERROR, 1276 (errcode(ERRCODE_SYNTAX_ERROR), 1277 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"))); 1278 num->flag |= NUM_F_BRACKET; 1279 break; 1280 1281 case NUM_rn: 1282 case NUM_RN: 1283 num->flag |= NUM_F_ROMAN; 1284 break; 1285 1286 case NUM_L: 1287 case NUM_G: 1288 num->need_locale = true; 1289 break; 1290 1291 case NUM_V: 1292 if (IS_DECIMAL(num)) 1293 ereport(ERROR, 1294 (errcode(ERRCODE_SYNTAX_ERROR), 1295 errmsg("cannot use \"V\" and decimal point together"))); 1296 num->flag |= NUM_F_MULTI; 1297 break; 1298 1299 case NUM_E: 1300 if (IS_EEEE(num)) 1301 ereport(ERROR, 1302 (errcode(ERRCODE_SYNTAX_ERROR), 1303 errmsg("cannot use \"EEEE\" twice"))); 1304 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) || 1305 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) || 1306 IS_ROMAN(num) || IS_MULTI(num)) 1307 ereport(ERROR, 1308 (errcode(ERRCODE_SYNTAX_ERROR), 1309 errmsg("\"EEEE\" is incompatible with other formats"), 1310 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns."))); 1311 num->flag |= NUM_F_EEEE; 1312 break; 1313 } 1314 } 1315 1316 /* ---------- 1317 * Format parser, search small keywords and keyword's suffixes, and make 1318 * format-node tree. 1319 * 1320 * for DATE-TIME & NUMBER version 1321 * ---------- 1322 */ 1323 static void 1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw, 1325 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num) 1326 { 1327 FormatNode *n; 1328 1329 #ifdef DEBUG_TO_FROM_CHAR 1330 elog(DEBUG_elog_output, "to_char/number(): run parser"); 1331 #endif 1332 1333 n = node; 1334 1335 while (*str) 1336 { 1337 int suffix = 0; 1338 const KeySuffix *s; 1339 1340 /* 1341 * Prefix 1342 */ 1343 if ((flags & DCH_FLAG) && 1344 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL) 1345 { 1346 suffix |= s->id; 1347 if (s->len) 1348 str += s->len; 1349 } 1350 1351 /* 1352 * Keyword 1353 */ 1354 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL) 1355 { 1356 n->type = NODE_TYPE_ACTION; 1357 n->suffix = suffix; 1358 if (n->key->len) 1359 str += n->key->len; 1360 1361 /* 1362 * NUM version: Prepare global NUMDesc struct 1363 */ 1364 if (flags & NUM_FLAG) 1365 NUMDesc_prepare(Num, n); 1366 1367 /* 1368 * Postfix 1369 */ 1370 if ((flags & DCH_FLAG) && *str && 1371 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL) 1372 { 1373 n->suffix |= s->id; 1374 if (s->len) 1375 str += s->len; 1376 } 1377 1378 n++; 1379 } 1380 else if (*str) 1381 { 1382 int chlen; 1383 1384 if ((flags & STD_FLAG) && *str != '"') 1385 { 1386 /* 1387 * Standard mode, allow only following separators: "-./,':; ". 1388 * However, we support double quotes even in standard mode 1389 * (see below). This is our extension of standard mode. 1390 */ 1391 if (strchr("-./,':; ", *str) == NULL) 1392 ereport(ERROR, 1393 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 1394 errmsg("invalid datetime format separator: \"%s\"", 1395 pnstrdup(str, pg_mblen(str))))); 1396 1397 if (*str == ' ') 1398 n->type = NODE_TYPE_SPACE; 1399 else 1400 n->type = NODE_TYPE_SEPARATOR; 1401 1402 n->character[0] = *str; 1403 n->character[1] = '\0'; 1404 n->key = NULL; 1405 n->suffix = 0; 1406 n++; 1407 str++; 1408 } 1409 else if (*str == '"') 1410 { 1411 /* 1412 * Process double-quoted literal string, if any 1413 */ 1414 str++; 1415 while (*str) 1416 { 1417 if (*str == '"') 1418 { 1419 str++; 1420 break; 1421 } 1422 /* backslash quotes the next character, if any */ 1423 if (*str == '\\' && *(str + 1)) 1424 str++; 1425 chlen = pg_mblen(str); 1426 n->type = NODE_TYPE_CHAR; 1427 memcpy(n->character, str, chlen); 1428 n->character[chlen] = '\0'; 1429 n->key = NULL; 1430 n->suffix = 0; 1431 n++; 1432 str += chlen; 1433 } 1434 } 1435 else 1436 { 1437 /* 1438 * Outside double-quoted strings, backslash is only special if 1439 * it immediately precedes a double quote. 1440 */ 1441 if (*str == '\\' && *(str + 1) == '"') 1442 str++; 1443 chlen = pg_mblen(str); 1444 1445 if ((flags & DCH_FLAG) && is_separator_char(str)) 1446 n->type = NODE_TYPE_SEPARATOR; 1447 else if (isspace((unsigned char) *str)) 1448 n->type = NODE_TYPE_SPACE; 1449 else 1450 n->type = NODE_TYPE_CHAR; 1451 1452 memcpy(n->character, str, chlen); 1453 n->character[chlen] = '\0'; 1454 n->key = NULL; 1455 n->suffix = 0; 1456 n++; 1457 str += chlen; 1458 } 1459 } 1460 } 1461 1462 n->type = NODE_TYPE_END; 1463 n->suffix = 0; 1464 } 1465 1466 /* ---------- 1467 * DEBUG: Dump the FormatNode Tree (debug) 1468 * ---------- 1469 */ 1470 #ifdef DEBUG_TO_FROM_CHAR 1471 1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " ")) 1473 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ") 1474 1475 static void 1476 dump_node(FormatNode *node, int max) 1477 { 1478 FormatNode *n; 1479 int a; 1480 1481 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT"); 1482 1483 for (a = 0, n = node; a <= max; n++, a++) 1484 { 1485 if (n->type == NODE_TYPE_ACTION) 1486 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)", 1487 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix)); 1488 else if (n->type == NODE_TYPE_CHAR) 1489 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'", 1490 a, n->character); 1491 else if (n->type == NODE_TYPE_END) 1492 { 1493 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a); 1494 return; 1495 } 1496 else 1497 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a); 1498 } 1499 } 1500 #endif /* DEBUG */ 1501 1502 /***************************************************************************** 1503 * Private utils 1504 *****************************************************************************/ 1505 1506 /* ---------- 1507 * Return ST/ND/RD/TH for simple (1..9) numbers 1508 * type --> 0 upper, 1 lower 1509 * ---------- 1510 */ 1511 static const char * 1512 get_th(char *num, int type) 1513 { 1514 int len = strlen(num), 1515 last, 1516 seclast; 1517 1518 last = *(num + (len - 1)); 1519 if (!isdigit((unsigned char) last)) 1520 ereport(ERROR, 1521 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1522 errmsg("\"%s\" is not a number", num))); 1523 1524 /* 1525 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get 1526 * 'ST/st', 'ND/nd', 'RD/rd', respectively 1527 */ 1528 if ((len > 1) && ((seclast = num[len - 2]) == '1')) 1529 last = 0; 1530 1531 switch (last) 1532 { 1533 case '1': 1534 if (type == TH_UPPER) 1535 return numTH[0]; 1536 return numth[0]; 1537 case '2': 1538 if (type == TH_UPPER) 1539 return numTH[1]; 1540 return numth[1]; 1541 case '3': 1542 if (type == TH_UPPER) 1543 return numTH[2]; 1544 return numth[2]; 1545 default: 1546 if (type == TH_UPPER) 1547 return numTH[3]; 1548 return numth[3]; 1549 } 1550 } 1551 1552 /* ---------- 1553 * Convert string-number to ordinal string-number 1554 * type --> 0 upper, 1 lower 1555 * ---------- 1556 */ 1557 static char * 1558 str_numth(char *dest, char *num, int type) 1559 { 1560 if (dest != num) 1561 strcpy(dest, num); 1562 strcat(dest, get_th(num, type)); 1563 return dest; 1564 } 1565 1566 /***************************************************************************** 1567 * upper/lower/initcap functions 1568 *****************************************************************************/ 1569 1570 #ifdef USE_ICU 1571 1572 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, 1573 const UChar *src, int32_t srcLength, 1574 const char *locale, 1575 UErrorCode *pErrorCode); 1576 1577 static int32_t 1578 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, 1579 UChar **buff_dest, UChar *buff_source, int32_t len_source) 1580 { 1581 UErrorCode status; 1582 int32_t len_dest; 1583 1584 len_dest = len_source; /* try first with same length */ 1585 *buff_dest = palloc(len_dest * sizeof(**buff_dest)); 1586 status = U_ZERO_ERROR; 1587 len_dest = func(*buff_dest, len_dest, buff_source, len_source, 1588 mylocale->info.icu.locale, &status); 1589 if (status == U_BUFFER_OVERFLOW_ERROR) 1590 { 1591 /* try again with adjusted length */ 1592 pfree(*buff_dest); 1593 *buff_dest = palloc(len_dest * sizeof(**buff_dest)); 1594 status = U_ZERO_ERROR; 1595 len_dest = func(*buff_dest, len_dest, buff_source, len_source, 1596 mylocale->info.icu.locale, &status); 1597 } 1598 if (U_FAILURE(status)) 1599 ereport(ERROR, 1600 (errmsg("case conversion failed: %s", u_errorName(status)))); 1601 return len_dest; 1602 } 1603 1604 static int32_t 1605 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, 1606 const UChar *src, int32_t srcLength, 1607 const char *locale, 1608 UErrorCode *pErrorCode) 1609 { 1610 return u_strToTitle(dest, destCapacity, src, srcLength, 1611 NULL, locale, pErrorCode); 1612 } 1613 1614 #endif /* USE_ICU */ 1615 1616 /* 1617 * If the system provides the needed functions for wide-character manipulation 1618 * (which are all standardized by C99), then we implement upper/lower/initcap 1619 * using wide-character functions, if necessary. Otherwise we use the 1620 * traditional <ctype.h> functions, which of course will not work as desired 1621 * in multibyte character sets. Note that in either case we are effectively 1622 * assuming that the database character encoding matches the encoding implied 1623 * by LC_CTYPE. 1624 * 1625 * If the system provides locale_t and associated functions (which are 1626 * standardized by Open Group's XBD), we can support collations that are 1627 * neither default nor C. The code is written to handle both combinations 1628 * of have-wide-characters and have-locale_t, though it's rather unlikely 1629 * a platform would have the latter without the former. 1630 */ 1631 1632 /* 1633 * collation-aware, wide-character-aware lower function 1634 * 1635 * We pass the number of bytes so we can pass varlena and char* 1636 * to this function. The result is a palloc'd, null-terminated string. 1637 */ 1638 char * 1639 str_tolower(const char *buff, size_t nbytes, Oid collid) 1640 { 1641 char *result; 1642 1643 if (!buff) 1644 return NULL; 1645 1646 /* C/POSIX collations use this path regardless of database encoding */ 1647 if (lc_ctype_is_c(collid)) 1648 { 1649 result = asc_tolower(buff, nbytes); 1650 } 1651 else 1652 { 1653 pg_locale_t mylocale = 0; 1654 1655 if (collid != DEFAULT_COLLATION_OID) 1656 { 1657 if (!OidIsValid(collid)) 1658 { 1659 /* 1660 * This typically means that the parser could not resolve a 1661 * conflict of implicit collations, so report it that way. 1662 */ 1663 ereport(ERROR, 1664 (errcode(ERRCODE_INDETERMINATE_COLLATION), 1665 errmsg("could not determine which collation to use for %s function", 1666 "lower()"), 1667 errhint("Use the COLLATE clause to set the collation explicitly."))); 1668 } 1669 mylocale = pg_newlocale_from_collation(collid); 1670 } 1671 1672 #ifdef USE_ICU 1673 if (mylocale && mylocale->provider == COLLPROVIDER_ICU) 1674 { 1675 int32_t len_uchar; 1676 int32_t len_conv; 1677 UChar *buff_uchar; 1678 UChar *buff_conv; 1679 1680 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); 1681 len_conv = icu_convert_case(u_strToLower, mylocale, 1682 &buff_conv, buff_uchar, len_uchar); 1683 icu_from_uchar(&result, buff_conv, len_conv); 1684 pfree(buff_uchar); 1685 pfree(buff_conv); 1686 } 1687 else 1688 #endif 1689 { 1690 if (pg_database_encoding_max_length() > 1) 1691 { 1692 wchar_t *workspace; 1693 size_t curr_char; 1694 size_t result_size; 1695 1696 /* Overflow paranoia */ 1697 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) 1698 ereport(ERROR, 1699 (errcode(ERRCODE_OUT_OF_MEMORY), 1700 errmsg("out of memory"))); 1701 1702 /* Output workspace cannot have more codes than input bytes */ 1703 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); 1704 1705 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); 1706 1707 for (curr_char = 0; workspace[curr_char] != 0; curr_char++) 1708 { 1709 #ifdef HAVE_LOCALE_T 1710 if (mylocale) 1711 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); 1712 else 1713 #endif 1714 workspace[curr_char] = towlower(workspace[curr_char]); 1715 } 1716 1717 /* 1718 * Make result large enough; case change might change number 1719 * of bytes 1720 */ 1721 result_size = curr_char * pg_database_encoding_max_length() + 1; 1722 result = palloc(result_size); 1723 1724 wchar2char(result, workspace, result_size, mylocale); 1725 pfree(workspace); 1726 } 1727 else 1728 { 1729 char *p; 1730 1731 result = pnstrdup(buff, nbytes); 1732 1733 /* 1734 * Note: we assume that tolower_l() will not be so broken as 1735 * to need an isupper_l() guard test. When using the default 1736 * collation, we apply the traditional Postgres behavior that 1737 * forces ASCII-style treatment of I/i, but in non-default 1738 * collations you get exactly what the collation says. 1739 */ 1740 for (p = result; *p; p++) 1741 { 1742 #ifdef HAVE_LOCALE_T 1743 if (mylocale) 1744 *p = tolower_l((unsigned char) *p, mylocale->info.lt); 1745 else 1746 #endif 1747 *p = pg_tolower((unsigned char) *p); 1748 } 1749 } 1750 } 1751 } 1752 1753 return result; 1754 } 1755 1756 /* 1757 * collation-aware, wide-character-aware upper function 1758 * 1759 * We pass the number of bytes so we can pass varlena and char* 1760 * to this function. The result is a palloc'd, null-terminated string. 1761 */ 1762 char * 1763 str_toupper(const char *buff, size_t nbytes, Oid collid) 1764 { 1765 char *result; 1766 1767 if (!buff) 1768 return NULL; 1769 1770 /* C/POSIX collations use this path regardless of database encoding */ 1771 if (lc_ctype_is_c(collid)) 1772 { 1773 result = asc_toupper(buff, nbytes); 1774 } 1775 else 1776 { 1777 pg_locale_t mylocale = 0; 1778 1779 if (collid != DEFAULT_COLLATION_OID) 1780 { 1781 if (!OidIsValid(collid)) 1782 { 1783 /* 1784 * This typically means that the parser could not resolve a 1785 * conflict of implicit collations, so report it that way. 1786 */ 1787 ereport(ERROR, 1788 (errcode(ERRCODE_INDETERMINATE_COLLATION), 1789 errmsg("could not determine which collation to use for %s function", 1790 "upper()"), 1791 errhint("Use the COLLATE clause to set the collation explicitly."))); 1792 } 1793 mylocale = pg_newlocale_from_collation(collid); 1794 } 1795 1796 #ifdef USE_ICU 1797 if (mylocale && mylocale->provider == COLLPROVIDER_ICU) 1798 { 1799 int32_t len_uchar, 1800 len_conv; 1801 UChar *buff_uchar; 1802 UChar *buff_conv; 1803 1804 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); 1805 len_conv = icu_convert_case(u_strToUpper, mylocale, 1806 &buff_conv, buff_uchar, len_uchar); 1807 icu_from_uchar(&result, buff_conv, len_conv); 1808 pfree(buff_uchar); 1809 pfree(buff_conv); 1810 } 1811 else 1812 #endif 1813 { 1814 if (pg_database_encoding_max_length() > 1) 1815 { 1816 wchar_t *workspace; 1817 size_t curr_char; 1818 size_t result_size; 1819 1820 /* Overflow paranoia */ 1821 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) 1822 ereport(ERROR, 1823 (errcode(ERRCODE_OUT_OF_MEMORY), 1824 errmsg("out of memory"))); 1825 1826 /* Output workspace cannot have more codes than input bytes */ 1827 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); 1828 1829 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); 1830 1831 for (curr_char = 0; workspace[curr_char] != 0; curr_char++) 1832 { 1833 #ifdef HAVE_LOCALE_T 1834 if (mylocale) 1835 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); 1836 else 1837 #endif 1838 workspace[curr_char] = towupper(workspace[curr_char]); 1839 } 1840 1841 /* 1842 * Make result large enough; case change might change number 1843 * of bytes 1844 */ 1845 result_size = curr_char * pg_database_encoding_max_length() + 1; 1846 result = palloc(result_size); 1847 1848 wchar2char(result, workspace, result_size, mylocale); 1849 pfree(workspace); 1850 } 1851 else 1852 { 1853 char *p; 1854 1855 result = pnstrdup(buff, nbytes); 1856 1857 /* 1858 * Note: we assume that toupper_l() will not be so broken as 1859 * to need an islower_l() guard test. When using the default 1860 * collation, we apply the traditional Postgres behavior that 1861 * forces ASCII-style treatment of I/i, but in non-default 1862 * collations you get exactly what the collation says. 1863 */ 1864 for (p = result; *p; p++) 1865 { 1866 #ifdef HAVE_LOCALE_T 1867 if (mylocale) 1868 *p = toupper_l((unsigned char) *p, mylocale->info.lt); 1869 else 1870 #endif 1871 *p = pg_toupper((unsigned char) *p); 1872 } 1873 } 1874 } 1875 } 1876 1877 return result; 1878 } 1879 1880 /* 1881 * collation-aware, wide-character-aware initcap function 1882 * 1883 * We pass the number of bytes so we can pass varlena and char* 1884 * to this function. The result is a palloc'd, null-terminated string. 1885 */ 1886 char * 1887 str_initcap(const char *buff, size_t nbytes, Oid collid) 1888 { 1889 char *result; 1890 int wasalnum = false; 1891 1892 if (!buff) 1893 return NULL; 1894 1895 /* C/POSIX collations use this path regardless of database encoding */ 1896 if (lc_ctype_is_c(collid)) 1897 { 1898 result = asc_initcap(buff, nbytes); 1899 } 1900 else 1901 { 1902 pg_locale_t mylocale = 0; 1903 1904 if (collid != DEFAULT_COLLATION_OID) 1905 { 1906 if (!OidIsValid(collid)) 1907 { 1908 /* 1909 * This typically means that the parser could not resolve a 1910 * conflict of implicit collations, so report it that way. 1911 */ 1912 ereport(ERROR, 1913 (errcode(ERRCODE_INDETERMINATE_COLLATION), 1914 errmsg("could not determine which collation to use for %s function", 1915 "initcap()"), 1916 errhint("Use the COLLATE clause to set the collation explicitly."))); 1917 } 1918 mylocale = pg_newlocale_from_collation(collid); 1919 } 1920 1921 #ifdef USE_ICU 1922 if (mylocale && mylocale->provider == COLLPROVIDER_ICU) 1923 { 1924 int32_t len_uchar, 1925 len_conv; 1926 UChar *buff_uchar; 1927 UChar *buff_conv; 1928 1929 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); 1930 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, 1931 &buff_conv, buff_uchar, len_uchar); 1932 icu_from_uchar(&result, buff_conv, len_conv); 1933 pfree(buff_uchar); 1934 pfree(buff_conv); 1935 } 1936 else 1937 #endif 1938 { 1939 if (pg_database_encoding_max_length() > 1) 1940 { 1941 wchar_t *workspace; 1942 size_t curr_char; 1943 size_t result_size; 1944 1945 /* Overflow paranoia */ 1946 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) 1947 ereport(ERROR, 1948 (errcode(ERRCODE_OUT_OF_MEMORY), 1949 errmsg("out of memory"))); 1950 1951 /* Output workspace cannot have more codes than input bytes */ 1952 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); 1953 1954 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); 1955 1956 for (curr_char = 0; workspace[curr_char] != 0; curr_char++) 1957 { 1958 #ifdef HAVE_LOCALE_T 1959 if (mylocale) 1960 { 1961 if (wasalnum) 1962 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); 1963 else 1964 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); 1965 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); 1966 } 1967 else 1968 #endif 1969 { 1970 if (wasalnum) 1971 workspace[curr_char] = towlower(workspace[curr_char]); 1972 else 1973 workspace[curr_char] = towupper(workspace[curr_char]); 1974 wasalnum = iswalnum(workspace[curr_char]); 1975 } 1976 } 1977 1978 /* 1979 * Make result large enough; case change might change number 1980 * of bytes 1981 */ 1982 result_size = curr_char * pg_database_encoding_max_length() + 1; 1983 result = palloc(result_size); 1984 1985 wchar2char(result, workspace, result_size, mylocale); 1986 pfree(workspace); 1987 } 1988 else 1989 { 1990 char *p; 1991 1992 result = pnstrdup(buff, nbytes); 1993 1994 /* 1995 * Note: we assume that toupper_l()/tolower_l() will not be so 1996 * broken as to need guard tests. When using the default 1997 * collation, we apply the traditional Postgres behavior that 1998 * forces ASCII-style treatment of I/i, but in non-default 1999 * collations you get exactly what the collation says. 2000 */ 2001 for (p = result; *p; p++) 2002 { 2003 #ifdef HAVE_LOCALE_T 2004 if (mylocale) 2005 { 2006 if (wasalnum) 2007 *p = tolower_l((unsigned char) *p, mylocale->info.lt); 2008 else 2009 *p = toupper_l((unsigned char) *p, mylocale->info.lt); 2010 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); 2011 } 2012 else 2013 #endif 2014 { 2015 if (wasalnum) 2016 *p = pg_tolower((unsigned char) *p); 2017 else 2018 *p = pg_toupper((unsigned char) *p); 2019 wasalnum = isalnum((unsigned char) *p); 2020 } 2021 } 2022 } 2023 } 2024 } 2025 2026 return result; 2027 } 2028 2029 /* 2030 * ASCII-only lower function 2031 * 2032 * We pass the number of bytes so we can pass varlena and char* 2033 * to this function. The result is a palloc'd, null-terminated string. 2034 */ 2035 char * 2036 asc_tolower(const char *buff, size_t nbytes) 2037 { 2038 char *result; 2039 char *p; 2040 2041 if (!buff) 2042 return NULL; 2043 2044 result = pnstrdup(buff, nbytes); 2045 2046 for (p = result; *p; p++) 2047 *p = pg_ascii_tolower((unsigned char) *p); 2048 2049 return result; 2050 } 2051 2052 /* 2053 * ASCII-only upper function 2054 * 2055 * We pass the number of bytes so we can pass varlena and char* 2056 * to this function. The result is a palloc'd, null-terminated string. 2057 */ 2058 char * 2059 asc_toupper(const char *buff, size_t nbytes) 2060 { 2061 char *result; 2062 char *p; 2063 2064 if (!buff) 2065 return NULL; 2066 2067 result = pnstrdup(buff, nbytes); 2068 2069 for (p = result; *p; p++) 2070 *p = pg_ascii_toupper((unsigned char) *p); 2071 2072 return result; 2073 } 2074 2075 /* 2076 * ASCII-only initcap function 2077 * 2078 * We pass the number of bytes so we can pass varlena and char* 2079 * to this function. The result is a palloc'd, null-terminated string. 2080 */ 2081 char * 2082 asc_initcap(const char *buff, size_t nbytes) 2083 { 2084 char *result; 2085 char *p; 2086 int wasalnum = false; 2087 2088 if (!buff) 2089 return NULL; 2090 2091 result = pnstrdup(buff, nbytes); 2092 2093 for (p = result; *p; p++) 2094 { 2095 char c; 2096 2097 if (wasalnum) 2098 *p = c = pg_ascii_tolower((unsigned char) *p); 2099 else 2100 *p = c = pg_ascii_toupper((unsigned char) *p); 2101 /* we don't trust isalnum() here */ 2102 wasalnum = ((c >= 'A' && c <= 'Z') || 2103 (c >= 'a' && c <= 'z') || 2104 (c >= '0' && c <= '9')); 2105 } 2106 2107 return result; 2108 } 2109 2110 /* convenience routines for when the input is null-terminated */ 2111 2112 static char * 2113 str_tolower_z(const char *buff, Oid collid) 2114 { 2115 return str_tolower(buff, strlen(buff), collid); 2116 } 2117 2118 static char * 2119 str_toupper_z(const char *buff, Oid collid) 2120 { 2121 return str_toupper(buff, strlen(buff), collid); 2122 } 2123 2124 static char * 2125 str_initcap_z(const char *buff, Oid collid) 2126 { 2127 return str_initcap(buff, strlen(buff), collid); 2128 } 2129 2130 static char * 2131 asc_tolower_z(const char *buff) 2132 { 2133 return asc_tolower(buff, strlen(buff)); 2134 } 2135 2136 static char * 2137 asc_toupper_z(const char *buff) 2138 { 2139 return asc_toupper(buff, strlen(buff)); 2140 } 2141 2142 /* asc_initcap_z is not currently needed */ 2143 2144 2145 /* ---------- 2146 * Skip TM / th in FROM_CHAR 2147 * 2148 * If S_THth is on, skip two chars, assuming there are two available 2149 * ---------- 2150 */ 2151 #define SKIP_THth(ptr, _suf) \ 2152 do { \ 2153 if (S_THth(_suf)) \ 2154 { \ 2155 if (*(ptr)) (ptr) += pg_mblen(ptr); \ 2156 if (*(ptr)) (ptr) += pg_mblen(ptr); \ 2157 } \ 2158 } while (0) 2159 2160 2161 #ifdef DEBUG_TO_FROM_CHAR 2162 /* ----------- 2163 * DEBUG: Call for debug and for index checking; (Show ASCII char 2164 * and defined keyword for each used position 2165 * ---------- 2166 */ 2167 static void 2168 dump_index(const KeyWord *k, const int *index) 2169 { 2170 int i, 2171 count = 0, 2172 free_i = 0; 2173 2174 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:"); 2175 2176 for (i = 0; i < KeyWord_INDEX_SIZE; i++) 2177 { 2178 if (index[i] != -1) 2179 { 2180 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name); 2181 count++; 2182 } 2183 else 2184 { 2185 free_i++; 2186 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]); 2187 } 2188 } 2189 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d", 2190 count, free_i); 2191 } 2192 #endif /* DEBUG */ 2193 2194 /* ---------- 2195 * Return true if next format picture is not digit value 2196 * ---------- 2197 */ 2198 static bool 2199 is_next_separator(FormatNode *n) 2200 { 2201 if (n->type == NODE_TYPE_END) 2202 return false; 2203 2204 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix)) 2205 return true; 2206 2207 /* 2208 * Next node 2209 */ 2210 n++; 2211 2212 /* end of format string is treated like a non-digit separator */ 2213 if (n->type == NODE_TYPE_END) 2214 return true; 2215 2216 if (n->type == NODE_TYPE_ACTION) 2217 { 2218 if (n->key->is_digit) 2219 return false; 2220 2221 return true; 2222 } 2223 else if (n->character[1] == '\0' && 2224 isdigit((unsigned char) n->character[0])) 2225 return false; 2226 2227 return true; /* some non-digit input (separator) */ 2228 } 2229 2230 2231 static int 2232 adjust_partial_year_to_2020(int year) 2233 { 2234 /* 2235 * Adjust all dates toward 2020; this is effectively what happens when we 2236 * assume '70' is 1970 and '69' is 2069. 2237 */ 2238 /* Force 0-69 into the 2000's */ 2239 if (year < 70) 2240 return year + 2000; 2241 /* Force 70-99 into the 1900's */ 2242 else if (year < 100) 2243 return year + 1900; 2244 /* Force 100-519 into the 2000's */ 2245 else if (year < 520) 2246 return year + 2000; 2247 /* Force 520-999 into the 1000's */ 2248 else if (year < 1000) 2249 return year + 1000; 2250 else 2251 return year; 2252 } 2253 2254 2255 static int 2256 strspace_len(const char *str) 2257 { 2258 int len = 0; 2259 2260 while (*str && isspace((unsigned char) *str)) 2261 { 2262 str++; 2263 len++; 2264 } 2265 return len; 2266 } 2267 2268 /* 2269 * Set the date mode of a from-char conversion. 2270 * 2271 * Puke if the date mode has already been set, and the caller attempts to set 2272 * it to a conflicting mode. 2273 * 2274 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. 2275 */ 2276 static void 2277 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error) 2278 { 2279 if (mode != FROM_CHAR_DATE_NONE) 2280 { 2281 if (tmfc->mode == FROM_CHAR_DATE_NONE) 2282 tmfc->mode = mode; 2283 else if (tmfc->mode != mode) 2284 RETURN_ERROR(ereport(ERROR, 2285 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2286 errmsg("invalid combination of date conventions"), 2287 errhint("Do not mix Gregorian and ISO week date " 2288 "conventions in a formatting template.")))); 2289 } 2290 2291 on_error: 2292 return; 2293 } 2294 2295 /* 2296 * Set the integer pointed to by 'dest' to the given value. 2297 * 2298 * Puke if the destination integer has previously been set to some other 2299 * non-zero value. 2300 * 2301 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. 2302 */ 2303 static void 2304 from_char_set_int(int *dest, const int value, const FormatNode *node, 2305 bool *have_error) 2306 { 2307 if (*dest != 0 && *dest != value) 2308 RETURN_ERROR(ereport(ERROR, 2309 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2310 errmsg("conflicting values for \"%s\" field in " 2311 "formatting string", 2312 node->key->name), 2313 errdetail("This value contradicts a previous setting " 2314 "for the same field type.")))); 2315 *dest = value; 2316 2317 on_error: 2318 return; 2319 } 2320 2321 /* 2322 * Read a single integer from the source string, into the int pointed to by 2323 * 'dest'. If 'dest' is NULL, the result is discarded. 2324 * 2325 * In fixed-width mode (the node does not have the FM suffix), consume at most 2326 * 'len' characters. However, any leading whitespace isn't counted in 'len'. 2327 * 2328 * We use strtol() to recover the integer value from the source string, in 2329 * accordance with the given FormatNode. 2330 * 2331 * If the conversion completes successfully, src will have been advanced to 2332 * point at the character immediately following the last character used in the 2333 * conversion. 2334 * 2335 * Return the number of characters consumed. 2336 * 2337 * Note that from_char_parse_int() provides a more convenient wrapper where 2338 * the length of the field is the same as the length of the format keyword (as 2339 * with DD and MI). 2340 * 2341 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set 2342 * and -1 is returned. 2343 */ 2344 static int 2345 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node, 2346 bool *have_error) 2347 { 2348 long result; 2349 char copy[DCH_MAX_ITEM_SIZ + 1]; 2350 const char *init = *src; 2351 int used; 2352 2353 /* 2354 * Skip any whitespace before parsing the integer. 2355 */ 2356 *src += strspace_len(*src); 2357 2358 Assert(len <= DCH_MAX_ITEM_SIZ); 2359 used = (int) strlcpy(copy, *src, len + 1); 2360 2361 if (S_FM(node->suffix) || is_next_separator(node)) 2362 { 2363 /* 2364 * This node is in Fill Mode, or the next node is known to be a 2365 * non-digit value, so we just slurp as many characters as we can get. 2366 */ 2367 char *endptr; 2368 2369 errno = 0; 2370 result = strtol(init, &endptr, 10); 2371 *src = endptr; 2372 } 2373 else 2374 { 2375 /* 2376 * We need to pull exactly the number of characters given in 'len' out 2377 * of the string, and convert those. 2378 */ 2379 char *last; 2380 2381 if (used < len) 2382 RETURN_ERROR(ereport(ERROR, 2383 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2384 errmsg("source string too short for \"%s\" " 2385 "formatting field", 2386 node->key->name), 2387 errdetail("Field requires %d characters, " 2388 "but only %d remain.", 2389 len, used), 2390 errhint("If your source string is not fixed-width, " 2391 "try using the \"FM\" modifier.")))); 2392 2393 errno = 0; 2394 result = strtol(copy, &last, 10); 2395 used = last - copy; 2396 2397 if (used > 0 && used < len) 2398 RETURN_ERROR(ereport(ERROR, 2399 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2400 errmsg("invalid value \"%s\" for \"%s\"", 2401 copy, node->key->name), 2402 errdetail("Field requires %d characters, " 2403 "but only %d could be parsed.", 2404 len, used), 2405 errhint("If your source string is not fixed-width, " 2406 "try using the \"FM\" modifier.")))); 2407 2408 *src += used; 2409 } 2410 2411 if (*src == init) 2412 RETURN_ERROR(ereport(ERROR, 2413 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2414 errmsg("invalid value \"%s\" for \"%s\"", 2415 copy, node->key->name), 2416 errdetail("Value must be an integer.")))); 2417 2418 if (errno == ERANGE || result < INT_MIN || result > INT_MAX) 2419 RETURN_ERROR(ereport(ERROR, 2420 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2421 errmsg("value for \"%s\" in source string is out of range", 2422 node->key->name), 2423 errdetail("Value must be in the range %d to %d.", 2424 INT_MIN, INT_MAX)))); 2425 2426 if (dest != NULL) 2427 { 2428 from_char_set_int(dest, (int) result, node, have_error); 2429 CHECK_ERROR; 2430 } 2431 2432 return *src - init; 2433 2434 on_error: 2435 return -1; 2436 } 2437 2438 /* 2439 * Call from_char_parse_int_len(), using the length of the format keyword as 2440 * the expected length of the field. 2441 * 2442 * Don't call this function if the field differs in length from the format 2443 * keyword (as with HH24; the keyword length is 4, but the field length is 2). 2444 * In such cases, call from_char_parse_int_len() instead to specify the 2445 * required length explicitly. 2446 */ 2447 static int 2448 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error) 2449 { 2450 return from_char_parse_int_len(dest, src, node->key->len, node, have_error); 2451 } 2452 2453 /* 2454 * Sequentially search null-terminated "array" for a case-insensitive match 2455 * to the initial character(s) of "name". 2456 * 2457 * Returns array index of match, or -1 for no match. 2458 * 2459 * *len is set to the length of the match, or 0 for no match. 2460 * 2461 * Case-insensitivity is defined per pg_ascii_tolower, so this is only 2462 * suitable for comparisons to ASCII strings. 2463 */ 2464 static int 2465 seq_search_ascii(const char *name, const char *const *array, int *len) 2466 { 2467 unsigned char firstc; 2468 const char *const *a; 2469 2470 *len = 0; 2471 2472 /* empty string can't match anything */ 2473 if (!*name) 2474 return -1; 2475 2476 /* we handle first char specially to gain some speed */ 2477 firstc = pg_ascii_tolower((unsigned char) *name); 2478 2479 for (a = array; *a != NULL; a++) 2480 { 2481 const char *p; 2482 const char *n; 2483 2484 /* compare first chars */ 2485 if (pg_ascii_tolower((unsigned char) **a) != firstc) 2486 continue; 2487 2488 /* compare rest of string */ 2489 for (p = *a + 1, n = name + 1;; p++, n++) 2490 { 2491 /* return success if we matched whole array entry */ 2492 if (*p == '\0') 2493 { 2494 *len = n - name; 2495 return a - array; 2496 } 2497 /* else, must have another character in "name" ... */ 2498 if (*n == '\0') 2499 break; 2500 /* ... and it must match */ 2501 if (pg_ascii_tolower((unsigned char) *p) != 2502 pg_ascii_tolower((unsigned char) *n)) 2503 break; 2504 } 2505 } 2506 2507 return -1; 2508 } 2509 2510 /* 2511 * Sequentially search an array of possibly non-English words for 2512 * a case-insensitive match to the initial character(s) of "name". 2513 * 2514 * This has the same API as seq_search_ascii(), but we use a more general 2515 * case-folding transformation to achieve case-insensitivity. Case folding 2516 * is done per the rules of the collation identified by "collid". 2517 * 2518 * The array is treated as const, but we don't declare it that way because 2519 * the arrays exported by pg_locale.c aren't const. 2520 */ 2521 static int 2522 seq_search_localized(const char *name, char **array, int *len, Oid collid) 2523 { 2524 char **a; 2525 char *upper_name; 2526 char *lower_name; 2527 2528 *len = 0; 2529 2530 /* empty string can't match anything */ 2531 if (!*name) 2532 return -1; 2533 2534 /* 2535 * The case-folding processing done below is fairly expensive, so before 2536 * doing that, make a quick pass to see if there is an exact match. 2537 */ 2538 for (a = array; *a != NULL; a++) 2539 { 2540 int element_len = strlen(*a); 2541 2542 if (strncmp(name, *a, element_len) == 0) 2543 { 2544 *len = element_len; 2545 return a - array; 2546 } 2547 } 2548 2549 /* 2550 * Fold to upper case, then to lower case, so that we can match reliably 2551 * even in languages in which case conversions are not injective. 2552 */ 2553 upper_name = str_toupper(unconstify(char *, name), strlen(name), collid); 2554 lower_name = str_tolower(upper_name, strlen(upper_name), collid); 2555 pfree(upper_name); 2556 2557 for (a = array; *a != NULL; a++) 2558 { 2559 char *upper_element; 2560 char *lower_element; 2561 int element_len; 2562 2563 /* Likewise upper/lower-case array element */ 2564 upper_element = str_toupper(*a, strlen(*a), collid); 2565 lower_element = str_tolower(upper_element, strlen(upper_element), 2566 collid); 2567 pfree(upper_element); 2568 element_len = strlen(lower_element); 2569 2570 /* Match? */ 2571 if (strncmp(lower_name, lower_element, element_len) == 0) 2572 { 2573 *len = element_len; 2574 pfree(lower_element); 2575 pfree(lower_name); 2576 return a - array; 2577 } 2578 pfree(lower_element); 2579 } 2580 2581 pfree(lower_name); 2582 return -1; 2583 } 2584 2585 /* 2586 * Perform a sequential search in 'array' (or 'localized_array', if that's 2587 * not NULL) for an entry matching the first character(s) of the 'src' 2588 * string case-insensitively. 2589 * 2590 * The 'array' is presumed to be English words (all-ASCII), but 2591 * if 'localized_array' is supplied, that might be non-English 2592 * so we need a more expensive case-folding transformation 2593 * (which will follow the rules of the collation 'collid'). 2594 * 2595 * If a match is found, copy the array index of the match into the integer 2596 * pointed to by 'dest', advance 'src' to the end of the part of the string 2597 * which matched, and return the number of characters consumed. 2598 * 2599 * If the string doesn't match, throw an error if 'have_error' is NULL, 2600 * otherwise set '*have_error' and return -1. 2601 * 2602 * 'node' is used only for error reports: node->key->name identifies the 2603 * field type we were searching for. 2604 */ 2605 static int 2606 from_char_seq_search(int *dest, const char **src, const char *const *array, 2607 char **localized_array, Oid collid, 2608 FormatNode *node, bool *have_error) 2609 { 2610 int len; 2611 2612 if (localized_array == NULL) 2613 *dest = seq_search_ascii(*src, array, &len); 2614 else 2615 *dest = seq_search_localized(*src, localized_array, &len, collid); 2616 2617 if (len <= 0) 2618 { 2619 /* 2620 * In the error report, truncate the string at the next whitespace (if 2621 * any) to avoid including irrelevant data. 2622 */ 2623 char *copy = pstrdup(*src); 2624 char *c; 2625 2626 for (c = copy; *c; c++) 2627 { 2628 if (scanner_isspace(*c)) 2629 { 2630 *c = '\0'; 2631 break; 2632 } 2633 } 2634 2635 RETURN_ERROR(ereport(ERROR, 2636 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2637 errmsg("invalid value \"%s\" for \"%s\"", 2638 copy, node->key->name), 2639 errdetail("The given value did not match any of " 2640 "the allowed values for this field.")))); 2641 } 2642 *src += len; 2643 return len; 2644 2645 on_error: 2646 return -1; 2647 } 2648 2649 /* ---------- 2650 * Process a TmToChar struct as denoted by a list of FormatNodes. 2651 * The formatted data is written to the string pointed to by 'out'. 2652 * ---------- 2653 */ 2654 static void 2655 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid) 2656 { 2657 FormatNode *n; 2658 char *s; 2659 struct pg_tm *tm = &in->tm; 2660 int i; 2661 2662 /* cache localized days and months */ 2663 cache_locale_time(); 2664 2665 s = out; 2666 for (n = node; n->type != NODE_TYPE_END; n++) 2667 { 2668 if (n->type != NODE_TYPE_ACTION) 2669 { 2670 strcpy(s, n->character); 2671 s += strlen(s); 2672 continue; 2673 } 2674 2675 switch (n->key->id) 2676 { 2677 case DCH_A_M: 2678 case DCH_P_M: 2679 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2680 ? P_M_STR : A_M_STR); 2681 s += strlen(s); 2682 break; 2683 case DCH_AM: 2684 case DCH_PM: 2685 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2686 ? PM_STR : AM_STR); 2687 s += strlen(s); 2688 break; 2689 case DCH_a_m: 2690 case DCH_p_m: 2691 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2692 ? p_m_STR : a_m_STR); 2693 s += strlen(s); 2694 break; 2695 case DCH_am: 2696 case DCH_pm: 2697 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2698 ? pm_STR : am_STR); 2699 s += strlen(s); 2700 break; 2701 case DCH_HH: 2702 case DCH_HH12: 2703 2704 /* 2705 * display time as shown on a 12-hour clock, even for 2706 * intervals 2707 */ 2708 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, 2709 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 : 2710 tm->tm_hour % (HOURS_PER_DAY / 2)); 2711 if (S_THth(n->suffix)) 2712 str_numth(s, s, S_TH_TYPE(n->suffix)); 2713 s += strlen(s); 2714 break; 2715 case DCH_HH24: 2716 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, 2717 tm->tm_hour); 2718 if (S_THth(n->suffix)) 2719 str_numth(s, s, S_TH_TYPE(n->suffix)); 2720 s += strlen(s); 2721 break; 2722 case DCH_MI: 2723 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3, 2724 tm->tm_min); 2725 if (S_THth(n->suffix)) 2726 str_numth(s, s, S_TH_TYPE(n->suffix)); 2727 s += strlen(s); 2728 break; 2729 case DCH_SS: 2730 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3, 2731 tm->tm_sec); 2732 if (S_THth(n->suffix)) 2733 str_numth(s, s, S_TH_TYPE(n->suffix)); 2734 s += strlen(s); 2735 break; 2736 2737 #define DCH_to_char_fsec(frac_fmt, frac_val) \ 2738 sprintf(s, frac_fmt, (int) (frac_val)); \ 2739 if (S_THth(n->suffix)) \ 2740 str_numth(s, s, S_TH_TYPE(n->suffix)); \ 2741 s += strlen(s) 2742 2743 case DCH_FF1: /* tenth of second */ 2744 DCH_to_char_fsec("%01d", in->fsec / 100000); 2745 break; 2746 case DCH_FF2: /* hundredth of second */ 2747 DCH_to_char_fsec("%02d", in->fsec / 10000); 2748 break; 2749 case DCH_FF3: 2750 case DCH_MS: /* millisecond */ 2751 DCH_to_char_fsec("%03d", in->fsec / 1000); 2752 break; 2753 case DCH_FF4: /* tenth of a millisecond */ 2754 DCH_to_char_fsec("%04d", in->fsec / 100); 2755 break; 2756 case DCH_FF5: /* hundredth of a millisecond */ 2757 DCH_to_char_fsec("%05d", in->fsec / 10); 2758 break; 2759 case DCH_FF6: 2760 case DCH_US: /* microsecond */ 2761 DCH_to_char_fsec("%06d", in->fsec); 2762 break; 2763 #undef DCH_to_char_fsec 2764 case DCH_SSSS: 2765 sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR + 2766 tm->tm_min * SECS_PER_MINUTE + 2767 tm->tm_sec); 2768 if (S_THth(n->suffix)) 2769 str_numth(s, s, S_TH_TYPE(n->suffix)); 2770 s += strlen(s); 2771 break; 2772 case DCH_tz: 2773 INVALID_FOR_INTERVAL; 2774 if (tmtcTzn(in)) 2775 { 2776 /* We assume here that timezone names aren't localized */ 2777 char *p = asc_tolower_z(tmtcTzn(in)); 2778 2779 strcpy(s, p); 2780 pfree(p); 2781 s += strlen(s); 2782 } 2783 break; 2784 case DCH_TZ: 2785 INVALID_FOR_INTERVAL; 2786 if (tmtcTzn(in)) 2787 { 2788 strcpy(s, tmtcTzn(in)); 2789 s += strlen(s); 2790 } 2791 break; 2792 case DCH_TZH: 2793 INVALID_FOR_INTERVAL; 2794 sprintf(s, "%c%02d", 2795 (tm->tm_gmtoff >= 0) ? '+' : '-', 2796 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); 2797 s += strlen(s); 2798 break; 2799 case DCH_TZM: 2800 INVALID_FOR_INTERVAL; 2801 sprintf(s, "%02d", 2802 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); 2803 s += strlen(s); 2804 break; 2805 case DCH_OF: 2806 INVALID_FOR_INTERVAL; 2807 sprintf(s, "%c%0*d", 2808 (tm->tm_gmtoff >= 0) ? '+' : '-', 2809 S_FM(n->suffix) ? 0 : 2, 2810 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); 2811 s += strlen(s); 2812 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0) 2813 { 2814 sprintf(s, ":%02d", 2815 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); 2816 s += strlen(s); 2817 } 2818 break; 2819 case DCH_A_D: 2820 case DCH_B_C: 2821 INVALID_FOR_INTERVAL; 2822 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR)); 2823 s += strlen(s); 2824 break; 2825 case DCH_AD: 2826 case DCH_BC: 2827 INVALID_FOR_INTERVAL; 2828 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR)); 2829 s += strlen(s); 2830 break; 2831 case DCH_a_d: 2832 case DCH_b_c: 2833 INVALID_FOR_INTERVAL; 2834 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR)); 2835 s += strlen(s); 2836 break; 2837 case DCH_ad: 2838 case DCH_bc: 2839 INVALID_FOR_INTERVAL; 2840 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR)); 2841 s += strlen(s); 2842 break; 2843 case DCH_MONTH: 2844 INVALID_FOR_INTERVAL; 2845 if (!tm->tm_mon) 2846 break; 2847 if (S_TM(n->suffix)) 2848 { 2849 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid); 2850 2851 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2852 strcpy(s, str); 2853 else 2854 ereport(ERROR, 2855 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2856 errmsg("localized string format value too long"))); 2857 } 2858 else 2859 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2860 asc_toupper_z(months_full[tm->tm_mon - 1])); 2861 s += strlen(s); 2862 break; 2863 case DCH_Month: 2864 INVALID_FOR_INTERVAL; 2865 if (!tm->tm_mon) 2866 break; 2867 if (S_TM(n->suffix)) 2868 { 2869 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid); 2870 2871 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2872 strcpy(s, str); 2873 else 2874 ereport(ERROR, 2875 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2876 errmsg("localized string format value too long"))); 2877 } 2878 else 2879 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2880 months_full[tm->tm_mon - 1]); 2881 s += strlen(s); 2882 break; 2883 case DCH_month: 2884 INVALID_FOR_INTERVAL; 2885 if (!tm->tm_mon) 2886 break; 2887 if (S_TM(n->suffix)) 2888 { 2889 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid); 2890 2891 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2892 strcpy(s, str); 2893 else 2894 ereport(ERROR, 2895 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2896 errmsg("localized string format value too long"))); 2897 } 2898 else 2899 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2900 asc_tolower_z(months_full[tm->tm_mon - 1])); 2901 s += strlen(s); 2902 break; 2903 case DCH_MON: 2904 INVALID_FOR_INTERVAL; 2905 if (!tm->tm_mon) 2906 break; 2907 if (S_TM(n->suffix)) 2908 { 2909 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid); 2910 2911 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2912 strcpy(s, str); 2913 else 2914 ereport(ERROR, 2915 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2916 errmsg("localized string format value too long"))); 2917 } 2918 else 2919 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1])); 2920 s += strlen(s); 2921 break; 2922 case DCH_Mon: 2923 INVALID_FOR_INTERVAL; 2924 if (!tm->tm_mon) 2925 break; 2926 if (S_TM(n->suffix)) 2927 { 2928 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid); 2929 2930 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2931 strcpy(s, str); 2932 else 2933 ereport(ERROR, 2934 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2935 errmsg("localized string format value too long"))); 2936 } 2937 else 2938 strcpy(s, months[tm->tm_mon - 1]); 2939 s += strlen(s); 2940 break; 2941 case DCH_mon: 2942 INVALID_FOR_INTERVAL; 2943 if (!tm->tm_mon) 2944 break; 2945 if (S_TM(n->suffix)) 2946 { 2947 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid); 2948 2949 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2950 strcpy(s, str); 2951 else 2952 ereport(ERROR, 2953 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2954 errmsg("localized string format value too long"))); 2955 } 2956 else 2957 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1])); 2958 s += strlen(s); 2959 break; 2960 case DCH_MM: 2961 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3, 2962 tm->tm_mon); 2963 if (S_THth(n->suffix)) 2964 str_numth(s, s, S_TH_TYPE(n->suffix)); 2965 s += strlen(s); 2966 break; 2967 case DCH_DAY: 2968 INVALID_FOR_INTERVAL; 2969 if (S_TM(n->suffix)) 2970 { 2971 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid); 2972 2973 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2974 strcpy(s, str); 2975 else 2976 ereport(ERROR, 2977 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2978 errmsg("localized string format value too long"))); 2979 } 2980 else 2981 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2982 asc_toupper_z(days[tm->tm_wday])); 2983 s += strlen(s); 2984 break; 2985 case DCH_Day: 2986 INVALID_FOR_INTERVAL; 2987 if (S_TM(n->suffix)) 2988 { 2989 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid); 2990 2991 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2992 strcpy(s, str); 2993 else 2994 ereport(ERROR, 2995 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2996 errmsg("localized string format value too long"))); 2997 } 2998 else 2999 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 3000 days[tm->tm_wday]); 3001 s += strlen(s); 3002 break; 3003 case DCH_day: 3004 INVALID_FOR_INTERVAL; 3005 if (S_TM(n->suffix)) 3006 { 3007 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid); 3008 3009 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 3010 strcpy(s, str); 3011 else 3012 ereport(ERROR, 3013 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3014 errmsg("localized string format value too long"))); 3015 } 3016 else 3017 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 3018 asc_tolower_z(days[tm->tm_wday])); 3019 s += strlen(s); 3020 break; 3021 case DCH_DY: 3022 INVALID_FOR_INTERVAL; 3023 if (S_TM(n->suffix)) 3024 { 3025 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid); 3026 3027 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 3028 strcpy(s, str); 3029 else 3030 ereport(ERROR, 3031 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3032 errmsg("localized string format value too long"))); 3033 } 3034 else 3035 strcpy(s, asc_toupper_z(days_short[tm->tm_wday])); 3036 s += strlen(s); 3037 break; 3038 case DCH_Dy: 3039 INVALID_FOR_INTERVAL; 3040 if (S_TM(n->suffix)) 3041 { 3042 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid); 3043 3044 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 3045 strcpy(s, str); 3046 else 3047 ereport(ERROR, 3048 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3049 errmsg("localized string format value too long"))); 3050 } 3051 else 3052 strcpy(s, days_short[tm->tm_wday]); 3053 s += strlen(s); 3054 break; 3055 case DCH_dy: 3056 INVALID_FOR_INTERVAL; 3057 if (S_TM(n->suffix)) 3058 { 3059 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid); 3060 3061 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 3062 strcpy(s, str); 3063 else 3064 ereport(ERROR, 3065 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3066 errmsg("localized string format value too long"))); 3067 } 3068 else 3069 strcpy(s, asc_tolower_z(days_short[tm->tm_wday])); 3070 s += strlen(s); 3071 break; 3072 case DCH_DDD: 3073 case DCH_IDDD: 3074 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3, 3075 (n->key->id == DCH_DDD) ? 3076 tm->tm_yday : 3077 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday)); 3078 if (S_THth(n->suffix)) 3079 str_numth(s, s, S_TH_TYPE(n->suffix)); 3080 s += strlen(s); 3081 break; 3082 case DCH_DD: 3083 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday); 3084 if (S_THth(n->suffix)) 3085 str_numth(s, s, S_TH_TYPE(n->suffix)); 3086 s += strlen(s); 3087 break; 3088 case DCH_D: 3089 INVALID_FOR_INTERVAL; 3090 sprintf(s, "%d", tm->tm_wday + 1); 3091 if (S_THth(n->suffix)) 3092 str_numth(s, s, S_TH_TYPE(n->suffix)); 3093 s += strlen(s); 3094 break; 3095 case DCH_ID: 3096 INVALID_FOR_INTERVAL; 3097 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday); 3098 if (S_THth(n->suffix)) 3099 str_numth(s, s, S_TH_TYPE(n->suffix)); 3100 s += strlen(s); 3101 break; 3102 case DCH_WW: 3103 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, 3104 (tm->tm_yday - 1) / 7 + 1); 3105 if (S_THth(n->suffix)) 3106 str_numth(s, s, S_TH_TYPE(n->suffix)); 3107 s += strlen(s); 3108 break; 3109 case DCH_IW: 3110 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, 3111 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday)); 3112 if (S_THth(n->suffix)) 3113 str_numth(s, s, S_TH_TYPE(n->suffix)); 3114 s += strlen(s); 3115 break; 3116 case DCH_Q: 3117 if (!tm->tm_mon) 3118 break; 3119 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1); 3120 if (S_THth(n->suffix)) 3121 str_numth(s, s, S_TH_TYPE(n->suffix)); 3122 s += strlen(s); 3123 break; 3124 case DCH_CC: 3125 if (is_interval) /* straight calculation */ 3126 i = tm->tm_year / 100; 3127 else 3128 { 3129 if (tm->tm_year > 0) 3130 /* Century 20 == 1901 - 2000 */ 3131 i = (tm->tm_year - 1) / 100 + 1; 3132 else 3133 /* Century 6BC == 600BC - 501BC */ 3134 i = tm->tm_year / 100 - 1; 3135 } 3136 if (i <= 99 && i >= -99) 3137 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i); 3138 else 3139 sprintf(s, "%d", i); 3140 if (S_THth(n->suffix)) 3141 str_numth(s, s, S_TH_TYPE(n->suffix)); 3142 s += strlen(s); 3143 break; 3144 case DCH_Y_YYY: 3145 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000; 3146 sprintf(s, "%d,%03d", i, 3147 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000)); 3148 if (S_THth(n->suffix)) 3149 str_numth(s, s, S_TH_TYPE(n->suffix)); 3150 s += strlen(s); 3151 break; 3152 case DCH_YYYY: 3153 case DCH_IYYY: 3154 sprintf(s, "%0*d", 3155 S_FM(n->suffix) ? 0 : 3156 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5, 3157 (n->key->id == DCH_YYYY ? 3158 ADJUST_YEAR(tm->tm_year, is_interval) : 3159 ADJUST_YEAR(date2isoyear(tm->tm_year, 3160 tm->tm_mon, 3161 tm->tm_mday), 3162 is_interval))); 3163 if (S_THth(n->suffix)) 3164 str_numth(s, s, S_TH_TYPE(n->suffix)); 3165 s += strlen(s); 3166 break; 3167 case DCH_YYY: 3168 case DCH_IYY: 3169 sprintf(s, "%0*d", 3170 S_FM(n->suffix) ? 0 : 3171 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4, 3172 (n->key->id == DCH_YYY ? 3173 ADJUST_YEAR(tm->tm_year, is_interval) : 3174 ADJUST_YEAR(date2isoyear(tm->tm_year, 3175 tm->tm_mon, 3176 tm->tm_mday), 3177 is_interval)) % 1000); 3178 if (S_THth(n->suffix)) 3179 str_numth(s, s, S_TH_TYPE(n->suffix)); 3180 s += strlen(s); 3181 break; 3182 case DCH_YY: 3183 case DCH_IY: 3184 sprintf(s, "%0*d", 3185 S_FM(n->suffix) ? 0 : 3186 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3, 3187 (n->key->id == DCH_YY ? 3188 ADJUST_YEAR(tm->tm_year, is_interval) : 3189 ADJUST_YEAR(date2isoyear(tm->tm_year, 3190 tm->tm_mon, 3191 tm->tm_mday), 3192 is_interval)) % 100); 3193 if (S_THth(n->suffix)) 3194 str_numth(s, s, S_TH_TYPE(n->suffix)); 3195 s += strlen(s); 3196 break; 3197 case DCH_Y: 3198 case DCH_I: 3199 sprintf(s, "%1d", 3200 (n->key->id == DCH_Y ? 3201 ADJUST_YEAR(tm->tm_year, is_interval) : 3202 ADJUST_YEAR(date2isoyear(tm->tm_year, 3203 tm->tm_mon, 3204 tm->tm_mday), 3205 is_interval)) % 10); 3206 if (S_THth(n->suffix)) 3207 str_numth(s, s, S_TH_TYPE(n->suffix)); 3208 s += strlen(s); 3209 break; 3210 case DCH_RM: 3211 /* FALLTHROUGH */ 3212 case DCH_rm: 3213 3214 /* 3215 * For intervals, values like '12 month' will be reduced to 0 3216 * month and some years. These should be processed. 3217 */ 3218 if (!tm->tm_mon && !tm->tm_year) 3219 break; 3220 else 3221 { 3222 int mon = 0; 3223 const char *const *months; 3224 3225 if (n->key->id == DCH_RM) 3226 months = rm_months_upper; 3227 else 3228 months = rm_months_lower; 3229 3230 /* 3231 * Compute the position in the roman-numeral array. Note 3232 * that the contents of the array are reversed, December 3233 * being first and January last. 3234 */ 3235 if (tm->tm_mon == 0) 3236 { 3237 /* 3238 * This case is special, and tracks the case of full 3239 * interval years. 3240 */ 3241 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1; 3242 } 3243 else if (tm->tm_mon < 0) 3244 { 3245 /* 3246 * Negative case. In this case, the calculation is 3247 * reversed, where -1 means December, -2 November, 3248 * etc. 3249 */ 3250 mon = -1 * (tm->tm_mon + 1); 3251 } 3252 else 3253 { 3254 /* 3255 * Common case, with a strictly positive value. The 3256 * position in the array matches with the value of 3257 * tm_mon. 3258 */ 3259 mon = MONTHS_PER_YEAR - tm->tm_mon; 3260 } 3261 3262 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4, 3263 months[mon]); 3264 s += strlen(s); 3265 } 3266 break; 3267 case DCH_W: 3268 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1); 3269 if (S_THth(n->suffix)) 3270 str_numth(s, s, S_TH_TYPE(n->suffix)); 3271 s += strlen(s); 3272 break; 3273 case DCH_J: 3274 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)); 3275 if (S_THth(n->suffix)) 3276 str_numth(s, s, S_TH_TYPE(n->suffix)); 3277 s += strlen(s); 3278 break; 3279 } 3280 } 3281 3282 *s = '\0'; 3283 } 3284 3285 /* 3286 * Process the string 'in' as denoted by the array of FormatNodes 'node[]'. 3287 * The TmFromChar struct pointed to by 'out' is populated with the results. 3288 * 3289 * 'collid' identifies the collation to use, if needed. 3290 * 'std' specifies standard parsing mode. 3291 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. 3292 * 3293 * Note: we currently don't have any to_interval() function, so there 3294 * is no need here for INVALID_FOR_INTERVAL checks. 3295 */ 3296 static void 3297 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, 3298 Oid collid, bool std, bool *have_error) 3299 { 3300 FormatNode *n; 3301 const char *s; 3302 int len, 3303 value; 3304 bool fx_mode = std; 3305 3306 /* number of extra skipped characters (more than given in format string) */ 3307 int extra_skip = 0; 3308 3309 /* cache localized days and months */ 3310 cache_locale_time(); 3311 3312 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) 3313 { 3314 /* 3315 * Ignore spaces at the beginning of the string and before fields when 3316 * not in FX (fixed width) mode. 3317 */ 3318 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && 3319 (n->type == NODE_TYPE_ACTION || n == node)) 3320 { 3321 while (*s != '\0' && isspace((unsigned char) *s)) 3322 { 3323 s++; 3324 extra_skip++; 3325 } 3326 } 3327 3328 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) 3329 { 3330 if (std) 3331 { 3332 /* 3333 * Standard mode requires strict matching between format 3334 * string separators/spaces and input string. 3335 */ 3336 Assert(n->character[0] && !n->character[1]); 3337 3338 if (*s == n->character[0]) 3339 s++; 3340 else 3341 RETURN_ERROR(ereport(ERROR, 3342 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3343 errmsg("unmatched format separator \"%c\"", 3344 n->character[0])))); 3345 } 3346 else if (!fx_mode) 3347 { 3348 /* 3349 * In non FX (fixed format) mode one format string space or 3350 * separator match to one space or separator in input string. 3351 * Or match nothing if there is no space or separator in the 3352 * current position of input string. 3353 */ 3354 extra_skip--; 3355 if (isspace((unsigned char) *s) || is_separator_char(s)) 3356 { 3357 s++; 3358 extra_skip++; 3359 } 3360 } 3361 else 3362 { 3363 /* 3364 * In FX mode, on format string space or separator we consume 3365 * exactly one character from input string. Notice we don't 3366 * insist that the consumed character match the format's 3367 * character. 3368 */ 3369 s += pg_mblen(s); 3370 } 3371 continue; 3372 } 3373 else if (n->type != NODE_TYPE_ACTION) 3374 { 3375 /* 3376 * Text character, so consume one character from input string. 3377 * Notice we don't insist that the consumed character match the 3378 * format's character. 3379 */ 3380 if (!fx_mode) 3381 { 3382 /* 3383 * In non FX mode we might have skipped some extra characters 3384 * (more than specified in format string) before. In this 3385 * case we don't skip input string character, because it might 3386 * be part of field. 3387 */ 3388 if (extra_skip > 0) 3389 extra_skip--; 3390 else 3391 s += pg_mblen(s); 3392 } 3393 else 3394 { 3395 int chlen = pg_mblen(s); 3396 3397 /* 3398 * Standard mode requires strict match of format characters. 3399 */ 3400 if (std && n->type == NODE_TYPE_CHAR && 3401 strncmp(s, n->character, chlen) != 0) 3402 RETURN_ERROR(ereport(ERROR, 3403 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3404 errmsg("unmatched format character \"%s\"", 3405 n->character)))); 3406 3407 s += chlen; 3408 } 3409 continue; 3410 } 3411 3412 from_char_set_mode(out, n->key->date_mode, have_error); 3413 CHECK_ERROR; 3414 3415 switch (n->key->id) 3416 { 3417 case DCH_FX: 3418 fx_mode = true; 3419 break; 3420 case DCH_A_M: 3421 case DCH_P_M: 3422 case DCH_a_m: 3423 case DCH_p_m: 3424 from_char_seq_search(&value, &s, ampm_strings_long, 3425 NULL, InvalidOid, 3426 n, have_error); 3427 CHECK_ERROR; 3428 from_char_set_int(&out->pm, value % 2, n, have_error); 3429 CHECK_ERROR; 3430 out->clock = CLOCK_12_HOUR; 3431 break; 3432 case DCH_AM: 3433 case DCH_PM: 3434 case DCH_am: 3435 case DCH_pm: 3436 from_char_seq_search(&value, &s, ampm_strings, 3437 NULL, InvalidOid, 3438 n, have_error); 3439 CHECK_ERROR; 3440 from_char_set_int(&out->pm, value % 2, n, have_error); 3441 CHECK_ERROR; 3442 out->clock = CLOCK_12_HOUR; 3443 break; 3444 case DCH_HH: 3445 case DCH_HH12: 3446 from_char_parse_int_len(&out->hh, &s, 2, n, have_error); 3447 CHECK_ERROR; 3448 out->clock = CLOCK_12_HOUR; 3449 SKIP_THth(s, n->suffix); 3450 break; 3451 case DCH_HH24: 3452 from_char_parse_int_len(&out->hh, &s, 2, n, have_error); 3453 CHECK_ERROR; 3454 SKIP_THth(s, n->suffix); 3455 break; 3456 case DCH_MI: 3457 from_char_parse_int(&out->mi, &s, n, have_error); 3458 CHECK_ERROR; 3459 SKIP_THth(s, n->suffix); 3460 break; 3461 case DCH_SS: 3462 from_char_parse_int(&out->ss, &s, n, have_error); 3463 CHECK_ERROR; 3464 SKIP_THth(s, n->suffix); 3465 break; 3466 case DCH_MS: /* millisecond */ 3467 len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error); 3468 CHECK_ERROR; 3469 3470 /* 3471 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25 3472 */ 3473 out->ms *= len == 1 ? 100 : 3474 len == 2 ? 10 : 1; 3475 3476 SKIP_THth(s, n->suffix); 3477 break; 3478 case DCH_FF1: 3479 case DCH_FF2: 3480 case DCH_FF3: 3481 case DCH_FF4: 3482 case DCH_FF5: 3483 case DCH_FF6: 3484 out->ff = n->key->id - DCH_FF1 + 1; 3485 /* fall through */ 3486 case DCH_US: /* microsecond */ 3487 len = from_char_parse_int_len(&out->us, &s, 3488 n->key->id == DCH_US ? 6 : 3489 out->ff, n, have_error); 3490 CHECK_ERROR; 3491 3492 out->us *= len == 1 ? 100000 : 3493 len == 2 ? 10000 : 3494 len == 3 ? 1000 : 3495 len == 4 ? 100 : 3496 len == 5 ? 10 : 1; 3497 3498 SKIP_THth(s, n->suffix); 3499 break; 3500 case DCH_SSSS: 3501 from_char_parse_int(&out->ssss, &s, n, have_error); 3502 CHECK_ERROR; 3503 SKIP_THth(s, n->suffix); 3504 break; 3505 case DCH_tz: 3506 case DCH_TZ: 3507 case DCH_OF: 3508 RETURN_ERROR(ereport(ERROR, 3509 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 3510 errmsg("formatting field \"%s\" is only supported in to_char", 3511 n->key->name)))); 3512 CHECK_ERROR; 3513 break; 3514 case DCH_TZH: 3515 3516 /* 3517 * Value of TZH might be negative. And the issue is that we 3518 * might swallow minus sign as the separator. So, if we have 3519 * skipped more characters than specified in the format 3520 * string, then we consider prepending last skipped minus to 3521 * TZH. 3522 */ 3523 if (*s == '+' || *s == '-' || *s == ' ') 3524 { 3525 out->tzsign = *s == '-' ? -1 : +1; 3526 s++; 3527 } 3528 else 3529 { 3530 if (extra_skip > 0 && *(s - 1) == '-') 3531 out->tzsign = -1; 3532 else 3533 out->tzsign = +1; 3534 } 3535 3536 from_char_parse_int_len(&out->tzh, &s, 2, n, have_error); 3537 CHECK_ERROR; 3538 break; 3539 case DCH_TZM: 3540 /* assign positive timezone sign if TZH was not seen before */ 3541 if (!out->tzsign) 3542 out->tzsign = +1; 3543 from_char_parse_int_len(&out->tzm, &s, 2, n, have_error); 3544 CHECK_ERROR; 3545 break; 3546 case DCH_A_D: 3547 case DCH_B_C: 3548 case DCH_a_d: 3549 case DCH_b_c: 3550 from_char_seq_search(&value, &s, adbc_strings_long, 3551 NULL, InvalidOid, 3552 n, have_error); 3553 CHECK_ERROR; 3554 from_char_set_int(&out->bc, value % 2, n, have_error); 3555 CHECK_ERROR; 3556 break; 3557 case DCH_AD: 3558 case DCH_BC: 3559 case DCH_ad: 3560 case DCH_bc: 3561 from_char_seq_search(&value, &s, adbc_strings, 3562 NULL, InvalidOid, 3563 n, have_error); 3564 CHECK_ERROR; 3565 from_char_set_int(&out->bc, value % 2, n, have_error); 3566 CHECK_ERROR; 3567 break; 3568 case DCH_MONTH: 3569 case DCH_Month: 3570 case DCH_month: 3571 from_char_seq_search(&value, &s, months_full, 3572 S_TM(n->suffix) ? localized_full_months : NULL, 3573 collid, 3574 n, have_error); 3575 CHECK_ERROR; 3576 from_char_set_int(&out->mm, value + 1, n, have_error); 3577 CHECK_ERROR; 3578 break; 3579 case DCH_MON: 3580 case DCH_Mon: 3581 case DCH_mon: 3582 from_char_seq_search(&value, &s, months, 3583 S_TM(n->suffix) ? localized_abbrev_months : NULL, 3584 collid, 3585 n, have_error); 3586 CHECK_ERROR; 3587 from_char_set_int(&out->mm, value + 1, n, have_error); 3588 CHECK_ERROR; 3589 break; 3590 case DCH_MM: 3591 from_char_parse_int(&out->mm, &s, n, have_error); 3592 CHECK_ERROR; 3593 SKIP_THth(s, n->suffix); 3594 break; 3595 case DCH_DAY: 3596 case DCH_Day: 3597 case DCH_day: 3598 from_char_seq_search(&value, &s, days, 3599 S_TM(n->suffix) ? localized_full_days : NULL, 3600 collid, 3601 n, have_error); 3602 CHECK_ERROR; 3603 from_char_set_int(&out->d, value, n, have_error); 3604 CHECK_ERROR; 3605 out->d++; 3606 break; 3607 case DCH_DY: 3608 case DCH_Dy: 3609 case DCH_dy: 3610 from_char_seq_search(&value, &s, days_short, 3611 S_TM(n->suffix) ? localized_abbrev_days : NULL, 3612 collid, 3613 n, have_error); 3614 CHECK_ERROR; 3615 from_char_set_int(&out->d, value, n, have_error); 3616 CHECK_ERROR; 3617 out->d++; 3618 break; 3619 case DCH_DDD: 3620 from_char_parse_int(&out->ddd, &s, n, have_error); 3621 CHECK_ERROR; 3622 SKIP_THth(s, n->suffix); 3623 break; 3624 case DCH_IDDD: 3625 from_char_parse_int_len(&out->ddd, &s, 3, n, have_error); 3626 CHECK_ERROR; 3627 SKIP_THth(s, n->suffix); 3628 break; 3629 case DCH_DD: 3630 from_char_parse_int(&out->dd, &s, n, have_error); 3631 CHECK_ERROR; 3632 SKIP_THth(s, n->suffix); 3633 break; 3634 case DCH_D: 3635 from_char_parse_int(&out->d, &s, n, have_error); 3636 CHECK_ERROR; 3637 SKIP_THth(s, n->suffix); 3638 break; 3639 case DCH_ID: 3640 from_char_parse_int_len(&out->d, &s, 1, n, have_error); 3641 CHECK_ERROR; 3642 /* Shift numbering to match Gregorian where Sunday = 1 */ 3643 if (++out->d > 7) 3644 out->d = 1; 3645 SKIP_THth(s, n->suffix); 3646 break; 3647 case DCH_WW: 3648 case DCH_IW: 3649 from_char_parse_int(&out->ww, &s, n, have_error); 3650 CHECK_ERROR; 3651 SKIP_THth(s, n->suffix); 3652 break; 3653 case DCH_Q: 3654 3655 /* 3656 * We ignore 'Q' when converting to date because it is unclear 3657 * which date in the quarter to use, and some people specify 3658 * both quarter and month, so if it was honored it might 3659 * conflict with the supplied month. That is also why we don't 3660 * throw an error. 3661 * 3662 * We still parse the source string for an integer, but it 3663 * isn't stored anywhere in 'out'. 3664 */ 3665 from_char_parse_int((int *) NULL, &s, n, have_error); 3666 CHECK_ERROR; 3667 SKIP_THth(s, n->suffix); 3668 break; 3669 case DCH_CC: 3670 from_char_parse_int(&out->cc, &s, n, have_error); 3671 CHECK_ERROR; 3672 SKIP_THth(s, n->suffix); 3673 break; 3674 case DCH_Y_YYY: 3675 { 3676 int matched, 3677 years, 3678 millennia, 3679 nch; 3680 3681 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch); 3682 if (matched < 2) 3683 RETURN_ERROR(ereport(ERROR, 3684 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3685 errmsg("invalid input string for \"Y,YYY\"")))); 3686 years += (millennia * 1000); 3687 from_char_set_int(&out->year, years, n, have_error); 3688 CHECK_ERROR; 3689 out->yysz = 4; 3690 s += nch; 3691 SKIP_THth(s, n->suffix); 3692 } 3693 break; 3694 case DCH_YYYY: 3695 case DCH_IYYY: 3696 from_char_parse_int(&out->year, &s, n, have_error); 3697 CHECK_ERROR; 3698 out->yysz = 4; 3699 SKIP_THth(s, n->suffix); 3700 break; 3701 case DCH_YYY: 3702 case DCH_IYY: 3703 len = from_char_parse_int(&out->year, &s, n, have_error); 3704 CHECK_ERROR; 3705 if (len < 4) 3706 out->year = adjust_partial_year_to_2020(out->year); 3707 out->yysz = 3; 3708 SKIP_THth(s, n->suffix); 3709 break; 3710 case DCH_YY: 3711 case DCH_IY: 3712 len = from_char_parse_int(&out->year, &s, n, have_error); 3713 CHECK_ERROR; 3714 if (len < 4) 3715 out->year = adjust_partial_year_to_2020(out->year); 3716 out->yysz = 2; 3717 SKIP_THth(s, n->suffix); 3718 break; 3719 case DCH_Y: 3720 case DCH_I: 3721 len = from_char_parse_int(&out->year, &s, n, have_error); 3722 CHECK_ERROR; 3723 if (len < 4) 3724 out->year = adjust_partial_year_to_2020(out->year); 3725 out->yysz = 1; 3726 SKIP_THth(s, n->suffix); 3727 break; 3728 case DCH_RM: 3729 case DCH_rm: 3730 from_char_seq_search(&value, &s, rm_months_lower, 3731 NULL, InvalidOid, 3732 n, have_error); 3733 CHECK_ERROR; 3734 from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, 3735 n, have_error); 3736 CHECK_ERROR; 3737 break; 3738 case DCH_W: 3739 from_char_parse_int(&out->w, &s, n, have_error); 3740 CHECK_ERROR; 3741 SKIP_THth(s, n->suffix); 3742 break; 3743 case DCH_J: 3744 from_char_parse_int(&out->j, &s, n, have_error); 3745 CHECK_ERROR; 3746 SKIP_THth(s, n->suffix); 3747 break; 3748 } 3749 3750 /* Ignore all spaces after fields */ 3751 if (!fx_mode) 3752 { 3753 extra_skip = 0; 3754 while (*s != '\0' && isspace((unsigned char) *s)) 3755 { 3756 s++; 3757 extra_skip++; 3758 } 3759 } 3760 } 3761 3762 /* 3763 * Standard parsing mode doesn't allow unmatched format patterns or 3764 * trailing characters in the input string. 3765 */ 3766 if (std) 3767 { 3768 if (n->type != NODE_TYPE_END) 3769 RETURN_ERROR(ereport(ERROR, 3770 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3771 errmsg("input string is too short for datetime format")))); 3772 3773 while (*s != '\0' && isspace((unsigned char) *s)) 3774 s++; 3775 3776 if (*s != '\0') 3777 RETURN_ERROR(ereport(ERROR, 3778 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3779 errmsg("trailing characters remain in input string " 3780 "after datetime format")))); 3781 } 3782 3783 on_error: 3784 return; 3785 } 3786 3787 /* 3788 * The invariant for DCH cache entry management is that DCHCounter is equal 3789 * to the maximum age value among the existing entries, and we increment it 3790 * whenever an access occurs. If we approach overflow, deal with that by 3791 * halving all the age values, so that we retain a fairly accurate idea of 3792 * which entries are oldest. 3793 */ 3794 static inline void 3795 DCH_prevent_counter_overflow(void) 3796 { 3797 if (DCHCounter >= (INT_MAX - 1)) 3798 { 3799 for (int i = 0; i < n_DCHCache; i++) 3800 DCHCache[i]->age >>= 1; 3801 DCHCounter >>= 1; 3802 } 3803 } 3804 3805 /* 3806 * Get mask of date/time/zone components present in format nodes. 3807 * 3808 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. 3809 */ 3810 static int 3811 DCH_datetime_type(FormatNode *node, bool *have_error) 3812 { 3813 FormatNode *n; 3814 int flags = 0; 3815 3816 for (n = node; n->type != NODE_TYPE_END; n++) 3817 { 3818 if (n->type != NODE_TYPE_ACTION) 3819 continue; 3820 3821 switch (n->key->id) 3822 { 3823 case DCH_FX: 3824 break; 3825 case DCH_A_M: 3826 case DCH_P_M: 3827 case DCH_a_m: 3828 case DCH_p_m: 3829 case DCH_AM: 3830 case DCH_PM: 3831 case DCH_am: 3832 case DCH_pm: 3833 case DCH_HH: 3834 case DCH_HH12: 3835 case DCH_HH24: 3836 case DCH_MI: 3837 case DCH_SS: 3838 case DCH_MS: /* millisecond */ 3839 case DCH_US: /* microsecond */ 3840 case DCH_FF1: 3841 case DCH_FF2: 3842 case DCH_FF3: 3843 case DCH_FF4: 3844 case DCH_FF5: 3845 case DCH_FF6: 3846 case DCH_SSSS: 3847 flags |= DCH_TIMED; 3848 break; 3849 case DCH_tz: 3850 case DCH_TZ: 3851 case DCH_OF: 3852 RETURN_ERROR(ereport(ERROR, 3853 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 3854 errmsg("formatting field \"%s\" is only supported in to_char", 3855 n->key->name)))); 3856 flags |= DCH_ZONED; 3857 break; 3858 case DCH_TZH: 3859 case DCH_TZM: 3860 flags |= DCH_ZONED; 3861 break; 3862 case DCH_A_D: 3863 case DCH_B_C: 3864 case DCH_a_d: 3865 case DCH_b_c: 3866 case DCH_AD: 3867 case DCH_BC: 3868 case DCH_ad: 3869 case DCH_bc: 3870 case DCH_MONTH: 3871 case DCH_Month: 3872 case DCH_month: 3873 case DCH_MON: 3874 case DCH_Mon: 3875 case DCH_mon: 3876 case DCH_MM: 3877 case DCH_DAY: 3878 case DCH_Day: 3879 case DCH_day: 3880 case DCH_DY: 3881 case DCH_Dy: 3882 case DCH_dy: 3883 case DCH_DDD: 3884 case DCH_IDDD: 3885 case DCH_DD: 3886 case DCH_D: 3887 case DCH_ID: 3888 case DCH_WW: 3889 case DCH_Q: 3890 case DCH_CC: 3891 case DCH_Y_YYY: 3892 case DCH_YYYY: 3893 case DCH_IYYY: 3894 case DCH_YYY: 3895 case DCH_IYY: 3896 case DCH_YY: 3897 case DCH_IY: 3898 case DCH_Y: 3899 case DCH_I: 3900 case DCH_RM: 3901 case DCH_rm: 3902 case DCH_W: 3903 case DCH_J: 3904 flags |= DCH_DATED; 3905 break; 3906 } 3907 } 3908 3909 on_error: 3910 return flags; 3911 } 3912 3913 /* select a DCHCacheEntry to hold the given format picture */ 3914 static DCHCacheEntry * 3915 DCH_cache_getnew(const char *str, bool std) 3916 { 3917 DCHCacheEntry *ent; 3918 3919 /* Ensure we can advance DCHCounter below */ 3920 DCH_prevent_counter_overflow(); 3921 3922 /* 3923 * If cache is full, remove oldest entry (or recycle first not-valid one) 3924 */ 3925 if (n_DCHCache >= DCH_CACHE_ENTRIES) 3926 { 3927 DCHCacheEntry *old = DCHCache[0]; 3928 3929 #ifdef DEBUG_TO_FROM_CHAR 3930 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache); 3931 #endif 3932 if (old->valid) 3933 { 3934 for (int i = 1; i < DCH_CACHE_ENTRIES; i++) 3935 { 3936 ent = DCHCache[i]; 3937 if (!ent->valid) 3938 { 3939 old = ent; 3940 break; 3941 } 3942 if (ent->age < old->age) 3943 old = ent; 3944 } 3945 } 3946 #ifdef DEBUG_TO_FROM_CHAR 3947 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age); 3948 #endif 3949 old->valid = false; 3950 StrNCpy(old->str, str, DCH_CACHE_SIZE + 1); 3951 old->age = (++DCHCounter); 3952 /* caller is expected to fill format, then set valid */ 3953 return old; 3954 } 3955 else 3956 { 3957 #ifdef DEBUG_TO_FROM_CHAR 3958 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache); 3959 #endif 3960 Assert(DCHCache[n_DCHCache] == NULL); 3961 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *) 3962 MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry)); 3963 ent->valid = false; 3964 StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1); 3965 ent->std = std; 3966 ent->age = (++DCHCounter); 3967 /* caller is expected to fill format, then set valid */ 3968 ++n_DCHCache; 3969 return ent; 3970 } 3971 } 3972 3973 /* look for an existing DCHCacheEntry matching the given format picture */ 3974 static DCHCacheEntry * 3975 DCH_cache_search(const char *str, bool std) 3976 { 3977 /* Ensure we can advance DCHCounter below */ 3978 DCH_prevent_counter_overflow(); 3979 3980 for (int i = 0; i < n_DCHCache; i++) 3981 { 3982 DCHCacheEntry *ent = DCHCache[i]; 3983 3984 if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std) 3985 { 3986 ent->age = (++DCHCounter); 3987 return ent; 3988 } 3989 } 3990 3991 return NULL; 3992 } 3993 3994 /* Find or create a DCHCacheEntry for the given format picture */ 3995 static DCHCacheEntry * 3996 DCH_cache_fetch(const char *str, bool std) 3997 { 3998 DCHCacheEntry *ent; 3999 4000 if ((ent = DCH_cache_search(str, std)) == NULL) 4001 { 4002 /* 4003 * Not in the cache, must run parser and save a new format-picture to 4004 * the cache. Do not mark the cache entry valid until parsing 4005 * succeeds. 4006 */ 4007 ent = DCH_cache_getnew(str, std); 4008 4009 parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index, 4010 DCH_FLAG | (std ? STD_FLAG : 0), NULL); 4011 4012 ent->valid = true; 4013 } 4014 return ent; 4015 } 4016 4017 /* 4018 * Format a date/time or interval into a string according to fmt. 4019 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char 4020 * for formatting. 4021 */ 4022 static text * 4023 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) 4024 { 4025 FormatNode *format; 4026 char *fmt_str, 4027 *result; 4028 bool incache; 4029 int fmt_len; 4030 text *res; 4031 4032 /* 4033 * Convert fmt to C string 4034 */ 4035 fmt_str = text_to_cstring(fmt); 4036 fmt_len = strlen(fmt_str); 4037 4038 /* 4039 * Allocate workspace for result as C string 4040 */ 4041 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1); 4042 *result = '\0'; 4043 4044 if (fmt_len > DCH_CACHE_SIZE) 4045 { 4046 /* 4047 * Allocate new memory if format picture is bigger than static cache 4048 * and do not use cache (call parser always) 4049 */ 4050 incache = false; 4051 4052 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); 4053 4054 parse_format(format, fmt_str, DCH_keywords, 4055 DCH_suff, DCH_index, DCH_FLAG, NULL); 4056 } 4057 else 4058 { 4059 /* 4060 * Use cache buffers 4061 */ 4062 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false); 4063 4064 incache = true; 4065 format = ent->format; 4066 } 4067 4068 /* The real work is here */ 4069 DCH_to_char(format, is_interval, tmtc, result, collid); 4070 4071 if (!incache) 4072 pfree(format); 4073 4074 pfree(fmt_str); 4075 4076 /* convert C-string result to TEXT format */ 4077 res = cstring_to_text(result); 4078 4079 pfree(result); 4080 return res; 4081 } 4082 4083 /**************************************************************************** 4084 * Public routines 4085 ***************************************************************************/ 4086 4087 /* ------------------- 4088 * TIMESTAMP to_char() 4089 * ------------------- 4090 */ 4091 Datum 4092 timestamp_to_char(PG_FUNCTION_ARGS) 4093 { 4094 Timestamp dt = PG_GETARG_TIMESTAMP(0); 4095 text *fmt = PG_GETARG_TEXT_PP(1), 4096 *res; 4097 TmToChar tmtc; 4098 struct pg_tm *tm; 4099 int thisdate; 4100 4101 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) 4102 PG_RETURN_NULL(); 4103 4104 ZERO_tmtc(&tmtc); 4105 tm = tmtcTm(&tmtc); 4106 4107 if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0) 4108 ereport(ERROR, 4109 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4110 errmsg("timestamp out of range"))); 4111 4112 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); 4113 tm->tm_wday = (thisdate + 1) % 7; 4114 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; 4115 4116 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) 4117 PG_RETURN_NULL(); 4118 4119 PG_RETURN_TEXT_P(res); 4120 } 4121 4122 Datum 4123 timestamptz_to_char(PG_FUNCTION_ARGS) 4124 { 4125 TimestampTz dt = PG_GETARG_TIMESTAMP(0); 4126 text *fmt = PG_GETARG_TEXT_PP(1), 4127 *res; 4128 TmToChar tmtc; 4129 int tz; 4130 struct pg_tm *tm; 4131 int thisdate; 4132 4133 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) 4134 PG_RETURN_NULL(); 4135 4136 ZERO_tmtc(&tmtc); 4137 tm = tmtcTm(&tmtc); 4138 4139 if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0) 4140 ereport(ERROR, 4141 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4142 errmsg("timestamp out of range"))); 4143 4144 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); 4145 tm->tm_wday = (thisdate + 1) % 7; 4146 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; 4147 4148 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) 4149 PG_RETURN_NULL(); 4150 4151 PG_RETURN_TEXT_P(res); 4152 } 4153 4154 4155 /* ------------------- 4156 * INTERVAL to_char() 4157 * ------------------- 4158 */ 4159 Datum 4160 interval_to_char(PG_FUNCTION_ARGS) 4161 { 4162 Interval *it = PG_GETARG_INTERVAL_P(0); 4163 text *fmt = PG_GETARG_TEXT_PP(1), 4164 *res; 4165 TmToChar tmtc; 4166 struct pg_tm *tm; 4167 4168 if (VARSIZE_ANY_EXHDR(fmt) <= 0) 4169 PG_RETURN_NULL(); 4170 4171 ZERO_tmtc(&tmtc); 4172 tm = tmtcTm(&tmtc); 4173 4174 if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0) 4175 PG_RETURN_NULL(); 4176 4177 /* wday is meaningless, yday approximates the total span in days */ 4178 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday; 4179 4180 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION()))) 4181 PG_RETURN_NULL(); 4182 4183 PG_RETURN_TEXT_P(res); 4184 } 4185 4186 /* --------------------- 4187 * TO_TIMESTAMP() 4188 * 4189 * Make Timestamp from date_str which is formatted at argument 'fmt' 4190 * ( to_timestamp is reverse to_char() ) 4191 * --------------------- 4192 */ 4193 Datum 4194 to_timestamp(PG_FUNCTION_ARGS) 4195 { 4196 text *date_txt = PG_GETARG_TEXT_PP(0); 4197 text *fmt = PG_GETARG_TEXT_PP(1); 4198 Oid collid = PG_GET_COLLATION(); 4199 Timestamp result; 4200 int tz; 4201 struct pg_tm tm; 4202 fsec_t fsec; 4203 int fprec; 4204 4205 do_to_timestamp(date_txt, fmt, collid, false, 4206 &tm, &fsec, &fprec, NULL, NULL); 4207 4208 /* Use the specified time zone, if any. */ 4209 if (tm.tm_zone) 4210 { 4211 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz); 4212 4213 if (dterr) 4214 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz"); 4215 } 4216 else 4217 tz = DetermineTimeZoneOffset(&tm, session_timezone); 4218 4219 if (tm2timestamp(&tm, fsec, &tz, &result) != 0) 4220 ereport(ERROR, 4221 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4222 errmsg("timestamp out of range"))); 4223 4224 /* Use the specified fractional precision, if any. */ 4225 if (fprec) 4226 AdjustTimestampForTypmod(&result, fprec); 4227 4228 PG_RETURN_TIMESTAMP(result); 4229 } 4230 4231 /* ---------- 4232 * TO_DATE 4233 * Make Date from date_str which is formatted at argument 'fmt' 4234 * ---------- 4235 */ 4236 Datum 4237 to_date(PG_FUNCTION_ARGS) 4238 { 4239 text *date_txt = PG_GETARG_TEXT_PP(0); 4240 text *fmt = PG_GETARG_TEXT_PP(1); 4241 Oid collid = PG_GET_COLLATION(); 4242 DateADT result; 4243 struct pg_tm tm; 4244 fsec_t fsec; 4245 4246 do_to_timestamp(date_txt, fmt, collid, false, 4247 &tm, &fsec, NULL, NULL, NULL); 4248 4249 /* Prevent overflow in Julian-day routines */ 4250 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) 4251 ereport(ERROR, 4252 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4253 errmsg("date out of range: \"%s\"", 4254 text_to_cstring(date_txt)))); 4255 4256 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; 4257 4258 /* Now check for just-out-of-range dates */ 4259 if (!IS_VALID_DATE(result)) 4260 ereport(ERROR, 4261 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4262 errmsg("date out of range: \"%s\"", 4263 text_to_cstring(date_txt)))); 4264 4265 PG_RETURN_DATEADT(result); 4266 } 4267 4268 /* 4269 * Convert the 'date_txt' input to a datetime type using argument 'fmt' 4270 * as a format string. The collation 'collid' may be used for case-folding 4271 * rules in some cases. 'strict' specifies standard parsing mode. 4272 * 4273 * The actual data type (returned in 'typid', 'typmod') is determined by 4274 * the presence of date/time/zone components in the format string. 4275 * 4276 * When timezone component is present, the corresponding offset is 4277 * returned in '*tz'. 4278 * 4279 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set 4280 * and zero value is returned. 4281 */ 4282 Datum 4283 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict, 4284 Oid *typid, int32 *typmod, int *tz, 4285 bool *have_error) 4286 { 4287 struct pg_tm tm; 4288 fsec_t fsec; 4289 int fprec; 4290 uint32 flags; 4291 4292 do_to_timestamp(date_txt, fmt, collid, strict, 4293 &tm, &fsec, &fprec, &flags, have_error); 4294 CHECK_ERROR; 4295 4296 *typmod = fprec ? fprec : -1; /* fractional part precision */ 4297 4298 if (flags & DCH_DATED) 4299 { 4300 if (flags & DCH_TIMED) 4301 { 4302 if (flags & DCH_ZONED) 4303 { 4304 TimestampTz result; 4305 4306 if (tm.tm_zone) 4307 { 4308 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz); 4309 4310 if (dterr) 4311 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz"); 4312 } 4313 else 4314 { 4315 /* 4316 * Time zone is present in format string, but not in input 4317 * string. Assuming do_to_timestamp() triggers no error 4318 * this should be possible only in non-strict case. 4319 */ 4320 Assert(!strict); 4321 4322 RETURN_ERROR(ereport(ERROR, 4323 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 4324 errmsg("missing time zone in input string for type timestamptz")))); 4325 } 4326 4327 if (tm2timestamp(&tm, fsec, tz, &result) != 0) 4328 RETURN_ERROR(ereport(ERROR, 4329 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4330 errmsg("timestamptz out of range")))); 4331 4332 AdjustTimestampForTypmod(&result, *typmod); 4333 4334 *typid = TIMESTAMPTZOID; 4335 return TimestampTzGetDatum(result); 4336 } 4337 else 4338 { 4339 Timestamp result; 4340 4341 if (tm2timestamp(&tm, fsec, NULL, &result) != 0) 4342 RETURN_ERROR(ereport(ERROR, 4343 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4344 errmsg("timestamp out of range")))); 4345 4346 AdjustTimestampForTypmod(&result, *typmod); 4347 4348 *typid = TIMESTAMPOID; 4349 return TimestampGetDatum(result); 4350 } 4351 } 4352 else 4353 { 4354 if (flags & DCH_ZONED) 4355 { 4356 RETURN_ERROR(ereport(ERROR, 4357 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 4358 errmsg("datetime format is zoned but not timed")))); 4359 } 4360 else 4361 { 4362 DateADT result; 4363 4364 /* Prevent overflow in Julian-day routines */ 4365 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) 4366 RETURN_ERROR(ereport(ERROR, 4367 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4368 errmsg("date out of range: \"%s\"", 4369 text_to_cstring(date_txt))))); 4370 4371 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - 4372 POSTGRES_EPOCH_JDATE; 4373 4374 /* Now check for just-out-of-range dates */ 4375 if (!IS_VALID_DATE(result)) 4376 RETURN_ERROR(ereport(ERROR, 4377 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4378 errmsg("date out of range: \"%s\"", 4379 text_to_cstring(date_txt))))); 4380 4381 *typid = DATEOID; 4382 return DateADTGetDatum(result); 4383 } 4384 } 4385 } 4386 else if (flags & DCH_TIMED) 4387 { 4388 if (flags & DCH_ZONED) 4389 { 4390 TimeTzADT *result = palloc(sizeof(TimeTzADT)); 4391 4392 if (tm.tm_zone) 4393 { 4394 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz); 4395 4396 if (dterr) 4397 RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz")); 4398 } 4399 else 4400 { 4401 /* 4402 * Time zone is present in format string, but not in input 4403 * string. Assuming do_to_timestamp() triggers no error this 4404 * should be possible only in non-strict case. 4405 */ 4406 Assert(!strict); 4407 4408 RETURN_ERROR(ereport(ERROR, 4409 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 4410 errmsg("missing time zone in input string for type timetz")))); 4411 } 4412 4413 if (tm2timetz(&tm, fsec, *tz, result) != 0) 4414 RETURN_ERROR(ereport(ERROR, 4415 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4416 errmsg("timetz out of range")))); 4417 4418 AdjustTimeForTypmod(&result->time, *typmod); 4419 4420 *typid = TIMETZOID; 4421 return TimeTzADTPGetDatum(result); 4422 } 4423 else 4424 { 4425 TimeADT result; 4426 4427 if (tm2time(&tm, fsec, &result) != 0) 4428 RETURN_ERROR(ereport(ERROR, 4429 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 4430 errmsg("time out of range")))); 4431 4432 AdjustTimeForTypmod(&result, *typmod); 4433 4434 *typid = TIMEOID; 4435 return TimeADTGetDatum(result); 4436 } 4437 } 4438 else 4439 { 4440 RETURN_ERROR(ereport(ERROR, 4441 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 4442 errmsg("datetime format is not dated and not timed")))); 4443 } 4444 4445 on_error: 4446 return (Datum) 0; 4447 } 4448 4449 /* 4450 * do_to_timestamp: shared code for to_timestamp and to_date 4451 * 4452 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm, 4453 * fractional seconds, and fractional precision. 4454 * 4455 * 'collid' identifies the collation to use, if needed. 4456 * 'std' specifies standard parsing mode. 4457 * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags', 4458 * if that is not NULL. 4459 * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set. 4460 * 4461 * We parse 'fmt' into a list of FormatNodes, which is then passed to 4462 * DCH_from_char to populate a TmFromChar with the parsed contents of 4463 * 'date_txt'. 4464 * 4465 * The TmFromChar is then analysed and converted into the final results in 4466 * struct 'tm', 'fsec', and 'fprec'. 4467 */ 4468 static void 4469 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, 4470 struct pg_tm *tm, fsec_t *fsec, int *fprec, 4471 uint32 *flags, bool *have_error) 4472 { 4473 FormatNode *format = NULL; 4474 TmFromChar tmfc; 4475 int fmt_len; 4476 char *date_str; 4477 int fmask; 4478 bool incache = false; 4479 4480 Assert(tm != NULL); 4481 Assert(fsec != NULL); 4482 4483 date_str = text_to_cstring(date_txt); 4484 4485 ZERO_tmfc(&tmfc); 4486 ZERO_tm(tm); 4487 *fsec = 0; 4488 if (fprec) 4489 *fprec = 0; 4490 if (flags) 4491 *flags = 0; 4492 fmask = 0; /* bit mask for ValidateDate() */ 4493 4494 fmt_len = VARSIZE_ANY_EXHDR(fmt); 4495 4496 if (fmt_len) 4497 { 4498 char *fmt_str; 4499 4500 fmt_str = text_to_cstring(fmt); 4501 4502 if (fmt_len > DCH_CACHE_SIZE) 4503 { 4504 /* 4505 * Allocate new memory if format picture is bigger than static 4506 * cache and do not use cache (call parser always) 4507 */ 4508 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); 4509 4510 parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, 4511 DCH_FLAG | (std ? STD_FLAG : 0), NULL); 4512 } 4513 else 4514 { 4515 /* 4516 * Use cache buffers 4517 */ 4518 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std); 4519 4520 incache = true; 4521 format = ent->format; 4522 } 4523 4524 #ifdef DEBUG_TO_FROM_CHAR 4525 /* dump_node(format, fmt_len); */ 4526 /* dump_index(DCH_keywords, DCH_index); */ 4527 #endif 4528 4529 DCH_from_char(format, date_str, &tmfc, collid, std, have_error); 4530 CHECK_ERROR; 4531 4532 pfree(fmt_str); 4533 4534 if (flags) 4535 *flags = DCH_datetime_type(format, have_error); 4536 4537 if (!incache) 4538 { 4539 pfree(format); 4540 format = NULL; 4541 } 4542 4543 CHECK_ERROR; 4544 } 4545 4546 DEBUG_TMFC(&tmfc); 4547 4548 /* 4549 * Convert to_date/to_timestamp input fields to standard 'tm' 4550 */ 4551 if (tmfc.ssss) 4552 { 4553 int x = tmfc.ssss; 4554 4555 tm->tm_hour = x / SECS_PER_HOUR; 4556 x %= SECS_PER_HOUR; 4557 tm->tm_min = x / SECS_PER_MINUTE; 4558 x %= SECS_PER_MINUTE; 4559 tm->tm_sec = x; 4560 } 4561 4562 if (tmfc.ss) 4563 tm->tm_sec = tmfc.ss; 4564 if (tmfc.mi) 4565 tm->tm_min = tmfc.mi; 4566 if (tmfc.hh) 4567 tm->tm_hour = tmfc.hh; 4568 4569 if (tmfc.clock == CLOCK_12_HOUR) 4570 { 4571 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2) 4572 { 4573 RETURN_ERROR(ereport(ERROR, 4574 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 4575 errmsg("hour \"%d\" is invalid for the 12-hour clock", 4576 tm->tm_hour), 4577 errhint("Use the 24-hour clock, or give an hour between 1 and 12.")))); 4578 } 4579 4580 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2) 4581 tm->tm_hour += HOURS_PER_DAY / 2; 4582 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2) 4583 tm->tm_hour = 0; 4584 } 4585 4586 if (tmfc.year) 4587 { 4588 /* 4589 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for 4590 * the year in the given century. Keep in mind that the 21st century 4591 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from 4592 * 600BC to 501BC. 4593 */ 4594 if (tmfc.cc && tmfc.yysz <= 2) 4595 { 4596 if (tmfc.bc) 4597 tmfc.cc = -tmfc.cc; 4598 tm->tm_year = tmfc.year % 100; 4599 if (tm->tm_year) 4600 { 4601 if (tmfc.cc >= 0) 4602 tm->tm_year += (tmfc.cc - 1) * 100; 4603 else 4604 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1; 4605 } 4606 else 4607 { 4608 /* find century year for dates ending in "00" */ 4609 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1); 4610 } 4611 } 4612 else 4613 { 4614 /* If a 4-digit year is provided, we use that and ignore CC. */ 4615 tm->tm_year = tmfc.year; 4616 if (tmfc.bc) 4617 tm->tm_year = -tm->tm_year; 4618 /* correct for our representation of BC years */ 4619 if (tm->tm_year < 0) 4620 tm->tm_year++; 4621 } 4622 fmask |= DTK_M(YEAR); 4623 } 4624 else if (tmfc.cc) 4625 { 4626 /* use first year of century */ 4627 if (tmfc.bc) 4628 tmfc.cc = -tmfc.cc; 4629 if (tmfc.cc >= 0) 4630 /* +1 because 21st century started in 2001 */ 4631 tm->tm_year = (tmfc.cc - 1) * 100 + 1; 4632 else 4633 /* +1 because year == 599 is 600 BC */ 4634 tm->tm_year = tmfc.cc * 100 + 1; 4635 fmask |= DTK_M(YEAR); 4636 } 4637 4638 if (tmfc.j) 4639 { 4640 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 4641 fmask |= DTK_DATE_M; 4642 } 4643 4644 if (tmfc.ww) 4645 { 4646 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) 4647 { 4648 /* 4649 * If tmfc.d is not set, then the date is left at the beginning of 4650 * the ISO week (Monday). 4651 */ 4652 if (tmfc.d) 4653 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 4654 else 4655 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 4656 fmask |= DTK_DATE_M; 4657 } 4658 else 4659 tmfc.ddd = (tmfc.ww - 1) * 7 + 1; 4660 } 4661 4662 if (tmfc.w) 4663 tmfc.dd = (tmfc.w - 1) * 7 + 1; 4664 if (tmfc.dd) 4665 { 4666 tm->tm_mday = tmfc.dd; 4667 fmask |= DTK_M(DAY); 4668 } 4669 if (tmfc.mm) 4670 { 4671 tm->tm_mon = tmfc.mm; 4672 fmask |= DTK_M(MONTH); 4673 } 4674 4675 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1)) 4676 { 4677 /* 4678 * The month and day field have not been set, so we use the 4679 * day-of-year field to populate them. Depending on the date mode, 4680 * this field may be interpreted as a Gregorian day-of-year, or an ISO 4681 * week date day-of-year. 4682 */ 4683 4684 if (!tm->tm_year && !tmfc.bc) 4685 { 4686 RETURN_ERROR(ereport(ERROR, 4687 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 4688 errmsg("cannot calculate day of year without year information")))); 4689 } 4690 4691 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) 4692 { 4693 int j0; /* zeroth day of the ISO year, in Julian */ 4694 4695 j0 = isoweek2j(tm->tm_year, 1) - 1; 4696 4697 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 4698 fmask |= DTK_DATE_M; 4699 } 4700 else 4701 { 4702 const int *y; 4703 int i; 4704 4705 static const int ysum[2][13] = { 4706 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, 4707 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; 4708 4709 y = ysum[isleap(tm->tm_year)]; 4710 4711 for (i = 1; i <= MONTHS_PER_YEAR; i++) 4712 { 4713 if (tmfc.ddd <= y[i]) 4714 break; 4715 } 4716 if (tm->tm_mon <= 1) 4717 tm->tm_mon = i; 4718 4719 if (tm->tm_mday <= 1) 4720 tm->tm_mday = tmfc.ddd - y[i - 1]; 4721 4722 fmask |= DTK_M(MONTH) | DTK_M(DAY); 4723 } 4724 } 4725 4726 if (tmfc.ms) 4727 *fsec += tmfc.ms * 1000; 4728 if (tmfc.us) 4729 *fsec += tmfc.us; 4730 if (fprec) 4731 *fprec = tmfc.ff; /* fractional precision, if specified */ 4732 4733 /* Range-check date fields according to bit mask computed above */ 4734 if (fmask != 0) 4735 { 4736 /* We already dealt with AD/BC, so pass isjulian = true */ 4737 int dterr = ValidateDate(fmask, true, false, false, tm); 4738 4739 if (dterr != 0) 4740 { 4741 /* 4742 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate 4743 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an 4744 * irrelevant hint about datestyle. 4745 */ 4746 RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp")); 4747 } 4748 } 4749 4750 /* Range-check time fields too */ 4751 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY || 4752 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR || 4753 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE || 4754 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC) 4755 { 4756 RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp")); 4757 } 4758 4759 /* Save parsed time-zone into tm->tm_zone if it was specified */ 4760 if (tmfc.tzsign) 4761 { 4762 char *tz; 4763 4764 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR || 4765 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR) 4766 { 4767 RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp")); 4768 } 4769 4770 tz = psprintf("%c%02d:%02d", 4771 tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm); 4772 4773 tm->tm_zone = tz; 4774 } 4775 4776 DEBUG_TM(tm); 4777 4778 on_error: 4779 4780 if (format && !incache) 4781 pfree(format); 4782 4783 pfree(date_str); 4784 } 4785 4786 4787 /********************************************************************** 4788 * the NUMBER version part 4789 *********************************************************************/ 4790 4791 4792 static char * 4793 fill_str(char *str, int c, int max) 4794 { 4795 memset(str, c, max); 4796 *(str + max) = '\0'; 4797 return str; 4798 } 4799 4800 #define zeroize_NUM(_n) \ 4801 do { \ 4802 (_n)->flag = 0; \ 4803 (_n)->lsign = 0; \ 4804 (_n)->pre = 0; \ 4805 (_n)->post = 0; \ 4806 (_n)->pre_lsign_num = 0; \ 4807 (_n)->need_locale = 0; \ 4808 (_n)->multi = 0; \ 4809 (_n)->zero_start = 0; \ 4810 (_n)->zero_end = 0; \ 4811 } while(0) 4812 4813 /* This works the same as DCH_prevent_counter_overflow */ 4814 static inline void 4815 NUM_prevent_counter_overflow(void) 4816 { 4817 if (NUMCounter >= (INT_MAX - 1)) 4818 { 4819 for (int i = 0; i < n_NUMCache; i++) 4820 NUMCache[i]->age >>= 1; 4821 NUMCounter >>= 1; 4822 } 4823 } 4824 4825 /* select a NUMCacheEntry to hold the given format picture */ 4826 static NUMCacheEntry * 4827 NUM_cache_getnew(const char *str) 4828 { 4829 NUMCacheEntry *ent; 4830 4831 /* Ensure we can advance NUMCounter below */ 4832 NUM_prevent_counter_overflow(); 4833 4834 /* 4835 * If cache is full, remove oldest entry (or recycle first not-valid one) 4836 */ 4837 if (n_NUMCache >= NUM_CACHE_ENTRIES) 4838 { 4839 NUMCacheEntry *old = NUMCache[0]; 4840 4841 #ifdef DEBUG_TO_FROM_CHAR 4842 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache); 4843 #endif 4844 if (old->valid) 4845 { 4846 for (int i = 1; i < NUM_CACHE_ENTRIES; i++) 4847 { 4848 ent = NUMCache[i]; 4849 if (!ent->valid) 4850 { 4851 old = ent; 4852 break; 4853 } 4854 if (ent->age < old->age) 4855 old = ent; 4856 } 4857 } 4858 #ifdef DEBUG_TO_FROM_CHAR 4859 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age); 4860 #endif 4861 old->valid = false; 4862 StrNCpy(old->str, str, NUM_CACHE_SIZE + 1); 4863 old->age = (++NUMCounter); 4864 /* caller is expected to fill format and Num, then set valid */ 4865 return old; 4866 } 4867 else 4868 { 4869 #ifdef DEBUG_TO_FROM_CHAR 4870 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache); 4871 #endif 4872 Assert(NUMCache[n_NUMCache] == NULL); 4873 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *) 4874 MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry)); 4875 ent->valid = false; 4876 StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1); 4877 ent->age = (++NUMCounter); 4878 /* caller is expected to fill format and Num, then set valid */ 4879 ++n_NUMCache; 4880 return ent; 4881 } 4882 } 4883 4884 /* look for an existing NUMCacheEntry matching the given format picture */ 4885 static NUMCacheEntry * 4886 NUM_cache_search(const char *str) 4887 { 4888 /* Ensure we can advance NUMCounter below */ 4889 NUM_prevent_counter_overflow(); 4890 4891 for (int i = 0; i < n_NUMCache; i++) 4892 { 4893 NUMCacheEntry *ent = NUMCache[i]; 4894 4895 if (ent->valid && strcmp(ent->str, str) == 0) 4896 { 4897 ent->age = (++NUMCounter); 4898 return ent; 4899 } 4900 } 4901 4902 return NULL; 4903 } 4904 4905 /* Find or create a NUMCacheEntry for the given format picture */ 4906 static NUMCacheEntry * 4907 NUM_cache_fetch(const char *str) 4908 { 4909 NUMCacheEntry *ent; 4910 4911 if ((ent = NUM_cache_search(str)) == NULL) 4912 { 4913 /* 4914 * Not in the cache, must run parser and save a new format-picture to 4915 * the cache. Do not mark the cache entry valid until parsing 4916 * succeeds. 4917 */ 4918 ent = NUM_cache_getnew(str); 4919 4920 zeroize_NUM(&ent->Num); 4921 4922 parse_format(ent->format, str, NUM_keywords, 4923 NULL, NUM_index, NUM_FLAG, &ent->Num); 4924 4925 ent->valid = true; 4926 } 4927 return ent; 4928 } 4929 4930 /* ---------- 4931 * Cache routine for NUM to_char version 4932 * ---------- 4933 */ 4934 static FormatNode * 4935 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) 4936 { 4937 FormatNode *format = NULL; 4938 char *str; 4939 4940 str = text_to_cstring(pars_str); 4941 4942 if (len > NUM_CACHE_SIZE) 4943 { 4944 /* 4945 * Allocate new memory if format picture is bigger than static cache 4946 * and do not use cache (call parser always) 4947 */ 4948 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode)); 4949 4950 *shouldFree = true; 4951 4952 zeroize_NUM(Num); 4953 4954 parse_format(format, str, NUM_keywords, 4955 NULL, NUM_index, NUM_FLAG, Num); 4956 } 4957 else 4958 { 4959 /* 4960 * Use cache buffers 4961 */ 4962 NUMCacheEntry *ent = NUM_cache_fetch(str); 4963 4964 *shouldFree = false; 4965 4966 format = ent->format; 4967 4968 /* 4969 * Copy cache to used struct 4970 */ 4971 Num->flag = ent->Num.flag; 4972 Num->lsign = ent->Num.lsign; 4973 Num->pre = ent->Num.pre; 4974 Num->post = ent->Num.post; 4975 Num->pre_lsign_num = ent->Num.pre_lsign_num; 4976 Num->need_locale = ent->Num.need_locale; 4977 Num->multi = ent->Num.multi; 4978 Num->zero_start = ent->Num.zero_start; 4979 Num->zero_end = ent->Num.zero_end; 4980 } 4981 4982 #ifdef DEBUG_TO_FROM_CHAR 4983 /* dump_node(format, len); */ 4984 dump_index(NUM_keywords, NUM_index); 4985 #endif 4986 4987 pfree(str); 4988 return format; 4989 } 4990 4991 4992 static char * 4993 int_to_roman(int number) 4994 { 4995 int len = 0, 4996 num = 0; 4997 char *p = NULL, 4998 *result, 4999 numstr[12]; 5000 5001 result = (char *) palloc(16); 5002 *result = '\0'; 5003 5004 if (number > 3999 || number < 1) 5005 { 5006 fill_str(result, '#', 15); 5007 return result; 5008 } 5009 len = snprintf(numstr, sizeof(numstr), "%d", number); 5010 5011 for (p = numstr; *p != '\0'; p++, --len) 5012 { 5013 num = *p - 49; /* 48 ascii + 1 */ 5014 if (num < 0) 5015 continue; 5016 5017 if (len > 3) 5018 { 5019 while (num-- != -1) 5020 strcat(result, "M"); 5021 } 5022 else 5023 { 5024 if (len == 3) 5025 strcat(result, rm100[num]); 5026 else if (len == 2) 5027 strcat(result, rm10[num]); 5028 else if (len == 1) 5029 strcat(result, rm1[num]); 5030 } 5031 } 5032 return result; 5033 } 5034 5035 5036 5037 /* ---------- 5038 * Locale 5039 * ---------- 5040 */ 5041 static void 5042 NUM_prepare_locale(NUMProc *Np) 5043 { 5044 if (Np->Num->need_locale) 5045 { 5046 struct lconv *lconv; 5047 5048 /* 5049 * Get locales 5050 */ 5051 lconv = PGLC_localeconv(); 5052 5053 /* 5054 * Positive / Negative number sign 5055 */ 5056 if (lconv->negative_sign && *lconv->negative_sign) 5057 Np->L_negative_sign = lconv->negative_sign; 5058 else 5059 Np->L_negative_sign = "-"; 5060 5061 if (lconv->positive_sign && *lconv->positive_sign) 5062 Np->L_positive_sign = lconv->positive_sign; 5063 else 5064 Np->L_positive_sign = "+"; 5065 5066 /* 5067 * Number decimal point 5068 */ 5069 if (lconv->decimal_point && *lconv->decimal_point) 5070 Np->decimal = lconv->decimal_point; 5071 5072 else 5073 Np->decimal = "."; 5074 5075 if (!IS_LDECIMAL(Np->Num)) 5076 Np->decimal = "."; 5077 5078 /* 5079 * Number thousands separator 5080 * 5081 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal, 5082 * but "" for thousands_sep, so we set the thousands_sep too. 5083 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php 5084 */ 5085 if (lconv->thousands_sep && *lconv->thousands_sep) 5086 Np->L_thousands_sep = lconv->thousands_sep; 5087 /* Make sure thousands separator doesn't match decimal point symbol. */ 5088 else if (strcmp(Np->decimal, ",") != 0) 5089 Np->L_thousands_sep = ","; 5090 else 5091 Np->L_thousands_sep = "."; 5092 5093 /* 5094 * Currency symbol 5095 */ 5096 if (lconv->currency_symbol && *lconv->currency_symbol) 5097 Np->L_currency_symbol = lconv->currency_symbol; 5098 else 5099 Np->L_currency_symbol = " "; 5100 } 5101 else 5102 { 5103 /* 5104 * Default values 5105 */ 5106 Np->L_negative_sign = "-"; 5107 Np->L_positive_sign = "+"; 5108 Np->decimal = "."; 5109 5110 Np->L_thousands_sep = ","; 5111 Np->L_currency_symbol = " "; 5112 } 5113 } 5114 5115 /* ---------- 5116 * Return pointer of last relevant number after decimal point 5117 * 12.0500 --> last relevant is '5' 5118 * 12.0000 --> last relevant is '.' 5119 * If there is no decimal point, return NULL (which will result in same 5120 * behavior as if FM hadn't been specified). 5121 * ---------- 5122 */ 5123 static char * 5124 get_last_relevant_decnum(char *num) 5125 { 5126 char *result, 5127 *p = strchr(num, '.'); 5128 5129 #ifdef DEBUG_TO_FROM_CHAR 5130 elog(DEBUG_elog_output, "get_last_relevant_decnum()"); 5131 #endif 5132 5133 if (!p) 5134 return NULL; 5135 5136 result = p; 5137 5138 while (*(++p)) 5139 { 5140 if (*p != '0') 5141 result = p; 5142 } 5143 5144 return result; 5145 } 5146 5147 /* 5148 * These macros are used in NUM_processor() and its subsidiary routines. 5149 * OVERLOAD_TEST: true if we've reached end of input string 5150 * AMOUNT_TEST(s): true if at least s bytes remain in string 5151 */ 5152 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len) 5153 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s))) 5154 5155 /* ---------- 5156 * Number extraction for TO_NUMBER() 5157 * ---------- 5158 */ 5159 static void 5160 NUM_numpart_from_char(NUMProc *Np, int id, int input_len) 5161 { 5162 bool isread = false; 5163 5164 #ifdef DEBUG_TO_FROM_CHAR 5165 elog(DEBUG_elog_output, " --- scan start --- id=%s", 5166 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???"); 5167 #endif 5168 5169 if (OVERLOAD_TEST) 5170 return; 5171 5172 if (*Np->inout_p == ' ') 5173 Np->inout_p++; 5174 5175 if (OVERLOAD_TEST) 5176 return; 5177 5178 /* 5179 * read sign before number 5180 */ 5181 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) && 5182 (Np->read_pre + Np->read_post) == 0) 5183 { 5184 #ifdef DEBUG_TO_FROM_CHAR 5185 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s", 5186 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign); 5187 #endif 5188 5189 /* 5190 * locale sign 5191 */ 5192 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE) 5193 { 5194 int x = 0; 5195 5196 #ifdef DEBUG_TO_FROM_CHAR 5197 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p); 5198 #endif 5199 if ((x = strlen(Np->L_negative_sign)) && 5200 AMOUNT_TEST(x) && 5201 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) 5202 { 5203 Np->inout_p += x; 5204 *Np->number = '-'; 5205 } 5206 else if ((x = strlen(Np->L_positive_sign)) && 5207 AMOUNT_TEST(x) && 5208 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) 5209 { 5210 Np->inout_p += x; 5211 *Np->number = '+'; 5212 } 5213 } 5214 else 5215 { 5216 #ifdef DEBUG_TO_FROM_CHAR 5217 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p); 5218 #endif 5219 5220 /* 5221 * simple + - < > 5222 */ 5223 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) && 5224 *Np->inout_p == '<')) 5225 { 5226 *Np->number = '-'; /* set - */ 5227 Np->inout_p++; 5228 } 5229 else if (*Np->inout_p == '+') 5230 { 5231 *Np->number = '+'; /* set + */ 5232 Np->inout_p++; 5233 } 5234 } 5235 } 5236 5237 if (OVERLOAD_TEST) 5238 return; 5239 5240 #ifdef DEBUG_TO_FROM_CHAR 5241 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number); 5242 #endif 5243 5244 /* 5245 * read digit or decimal point 5246 */ 5247 if (isdigit((unsigned char) *Np->inout_p)) 5248 { 5249 if (Np->read_dec && Np->read_post == Np->Num->post) 5250 return; 5251 5252 *Np->number_p = *Np->inout_p; 5253 Np->number_p++; 5254 5255 if (Np->read_dec) 5256 Np->read_post++; 5257 else 5258 Np->read_pre++; 5259 5260 isread = true; 5261 5262 #ifdef DEBUG_TO_FROM_CHAR 5263 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p); 5264 #endif 5265 } 5266 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false) 5267 { 5268 /* 5269 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because 5270 * Np->decimal is always just "." if we don't have a D format token. 5271 * So we just unconditionally match to Np->decimal. 5272 */ 5273 int x = strlen(Np->decimal); 5274 5275 #ifdef DEBUG_TO_FROM_CHAR 5276 elog(DEBUG_elog_output, "Try read decimal point (%c)", 5277 *Np->inout_p); 5278 #endif 5279 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0) 5280 { 5281 Np->inout_p += x - 1; 5282 *Np->number_p = '.'; 5283 Np->number_p++; 5284 Np->read_dec = true; 5285 isread = true; 5286 } 5287 } 5288 5289 if (OVERLOAD_TEST) 5290 return; 5291 5292 /* 5293 * Read sign behind "last" number 5294 * 5295 * We need sign detection because determine exact position of post-sign is 5296 * difficult: 5297 * 5298 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI -> 5299 * 5.01- 5300 */ 5301 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0) 5302 { 5303 /* 5304 * locale sign (NUM_S) is always anchored behind a last number, if: - 5305 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and 5306 * next char is not digit 5307 */ 5308 if (IS_LSIGN(Np->Num) && isread && 5309 (Np->inout_p + 1) < Np->inout + input_len && 5310 !isdigit((unsigned char) *(Np->inout_p + 1))) 5311 { 5312 int x; 5313 char *tmp = Np->inout_p++; 5314 5315 #ifdef DEBUG_TO_FROM_CHAR 5316 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p); 5317 #endif 5318 if ((x = strlen(Np->L_negative_sign)) && 5319 AMOUNT_TEST(x) && 5320 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) 5321 { 5322 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ 5323 *Np->number = '-'; 5324 } 5325 else if ((x = strlen(Np->L_positive_sign)) && 5326 AMOUNT_TEST(x) && 5327 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) 5328 { 5329 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ 5330 *Np->number = '+'; 5331 } 5332 if (*Np->number == ' ') 5333 /* no sign read */ 5334 Np->inout_p = tmp; 5335 } 5336 5337 /* 5338 * try read non-locale sign, it's happen only if format is not exact 5339 * and we cannot determine sign position of MI/PL/SG, an example: 5340 * 5341 * FM9.999999MI -> 5.01- 5342 * 5343 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats 5344 * like to_number('1 -', '9S') where sign is not anchored to last 5345 * number. 5346 */ 5347 else if (isread == false && IS_LSIGN(Np->Num) == false && 5348 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))) 5349 { 5350 #ifdef DEBUG_TO_FROM_CHAR 5351 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p); 5352 #endif 5353 5354 /* 5355 * simple + - 5356 */ 5357 if (*Np->inout_p == '-' || *Np->inout_p == '+') 5358 /* NUM_processor() do inout_p++ */ 5359 *Np->number = *Np->inout_p; 5360 } 5361 } 5362 } 5363 5364 #define IS_PREDEC_SPACE(_n) \ 5365 (IS_ZERO((_n)->Num)==false && \ 5366 (_n)->number == (_n)->number_p && \ 5367 *(_n)->number == '0' && \ 5368 (_n)->Num->post != 0) 5369 5370 /* ---------- 5371 * Add digit or sign to number-string 5372 * ---------- 5373 */ 5374 static void 5375 NUM_numpart_to_char(NUMProc *Np, int id) 5376 { 5377 int end; 5378 5379 if (IS_ROMAN(Np->Num)) 5380 return; 5381 5382 /* Note: in this elog() output not set '\0' in 'inout' */ 5383 5384 #ifdef DEBUG_TO_FROM_CHAR 5385 5386 /* 5387 * Np->num_curr is number of current item in format-picture, it is not 5388 * current position in inout! 5389 */ 5390 elog(DEBUG_elog_output, 5391 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"", 5392 Np->sign_wrote, 5393 Np->num_curr, 5394 Np->number_p, 5395 Np->inout); 5396 #endif 5397 Np->num_in = false; 5398 5399 /* 5400 * Write sign if real number will write to output Note: IS_PREDEC_SPACE() 5401 * handle "9.9" --> " .1" 5402 */ 5403 if (Np->sign_wrote == false && 5404 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) && 5405 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.'))) 5406 { 5407 if (IS_LSIGN(Np->Num)) 5408 { 5409 if (Np->Num->lsign == NUM_LSIGN_PRE) 5410 { 5411 if (Np->sign == '-') 5412 strcpy(Np->inout_p, Np->L_negative_sign); 5413 else 5414 strcpy(Np->inout_p, Np->L_positive_sign); 5415 Np->inout_p += strlen(Np->inout_p); 5416 Np->sign_wrote = true; 5417 } 5418 } 5419 else if (IS_BRACKET(Np->Num)) 5420 { 5421 *Np->inout_p = Np->sign == '+' ? ' ' : '<'; 5422 ++Np->inout_p; 5423 Np->sign_wrote = true; 5424 } 5425 else if (Np->sign == '+') 5426 { 5427 if (!IS_FILLMODE(Np->Num)) 5428 { 5429 *Np->inout_p = ' '; /* Write + */ 5430 ++Np->inout_p; 5431 } 5432 Np->sign_wrote = true; 5433 } 5434 else if (Np->sign == '-') 5435 { /* Write - */ 5436 *Np->inout_p = '-'; 5437 ++Np->inout_p; 5438 Np->sign_wrote = true; 5439 } 5440 } 5441 5442 5443 /* 5444 * digits / FM / Zero / Dec. point 5445 */ 5446 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC) 5447 { 5448 if (Np->num_curr < Np->out_pre_spaces && 5449 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num))) 5450 { 5451 /* 5452 * Write blank space 5453 */ 5454 if (!IS_FILLMODE(Np->Num)) 5455 { 5456 *Np->inout_p = ' '; /* Write ' ' */ 5457 ++Np->inout_p; 5458 } 5459 } 5460 else if (IS_ZERO(Np->Num) && 5461 Np->num_curr < Np->out_pre_spaces && 5462 Np->Num->zero_start <= Np->num_curr) 5463 { 5464 /* 5465 * Write ZERO 5466 */ 5467 *Np->inout_p = '0'; /* Write '0' */ 5468 ++Np->inout_p; 5469 Np->num_in = true; 5470 } 5471 else 5472 { 5473 /* 5474 * Write Decimal point 5475 */ 5476 if (*Np->number_p == '.') 5477 { 5478 if (!Np->last_relevant || *Np->last_relevant != '.') 5479 { 5480 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ 5481 Np->inout_p += strlen(Np->inout_p); 5482 } 5483 5484 /* 5485 * Ora 'n' -- FM9.9 --> 'n.' 5486 */ 5487 else if (IS_FILLMODE(Np->Num) && 5488 Np->last_relevant && *Np->last_relevant == '.') 5489 { 5490 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ 5491 Np->inout_p += strlen(Np->inout_p); 5492 } 5493 } 5494 else 5495 { 5496 /* 5497 * Write Digits 5498 */ 5499 if (Np->last_relevant && Np->number_p > Np->last_relevant && 5500 id != NUM_0) 5501 ; 5502 5503 /* 5504 * '0.1' -- 9.9 --> ' .1' 5505 */ 5506 else if (IS_PREDEC_SPACE(Np)) 5507 { 5508 if (!IS_FILLMODE(Np->Num)) 5509 { 5510 *Np->inout_p = ' '; 5511 ++Np->inout_p; 5512 } 5513 5514 /* 5515 * '0' -- FM9.9 --> '0.' 5516 */ 5517 else if (Np->last_relevant && *Np->last_relevant == '.') 5518 { 5519 *Np->inout_p = '0'; 5520 ++Np->inout_p; 5521 } 5522 } 5523 else 5524 { 5525 *Np->inout_p = *Np->number_p; /* Write DIGIT */ 5526 ++Np->inout_p; 5527 Np->num_in = true; 5528 } 5529 } 5530 /* do no exceed string length */ 5531 if (*Np->number_p) 5532 ++Np->number_p; 5533 } 5534 5535 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0); 5536 5537 if (Np->last_relevant && Np->last_relevant == Np->number_p) 5538 end = Np->num_curr; 5539 5540 if (Np->num_curr + 1 == end) 5541 { 5542 if (Np->sign_wrote == true && IS_BRACKET(Np->Num)) 5543 { 5544 *Np->inout_p = Np->sign == '+' ? ' ' : '>'; 5545 ++Np->inout_p; 5546 } 5547 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST) 5548 { 5549 if (Np->sign == '-') 5550 strcpy(Np->inout_p, Np->L_negative_sign); 5551 else 5552 strcpy(Np->inout_p, Np->L_positive_sign); 5553 Np->inout_p += strlen(Np->inout_p); 5554 } 5555 } 5556 } 5557 5558 ++Np->num_curr; 5559 } 5560 5561 /* 5562 * Skip over "n" input characters, but only if they aren't numeric data 5563 */ 5564 static void 5565 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) 5566 { 5567 while (n-- > 0) 5568 { 5569 if (OVERLOAD_TEST) 5570 break; /* end of input */ 5571 if (strchr("0123456789.,+-", *Np->inout_p) != NULL) 5572 break; /* it's a data character */ 5573 Np->inout_p += pg_mblen(Np->inout_p); 5574 } 5575 } 5576 5577 static char * 5578 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, 5579 char *number, int input_len, int to_char_out_pre_spaces, 5580 int sign, bool is_to_char, Oid collid) 5581 { 5582 FormatNode *n; 5583 NUMProc _Np, 5584 *Np = &_Np; 5585 const char *pattern; 5586 int pattern_len; 5587 5588 MemSet(Np, 0, sizeof(NUMProc)); 5589 5590 Np->Num = Num; 5591 Np->is_to_char = is_to_char; 5592 Np->number = number; 5593 Np->inout = inout; 5594 Np->last_relevant = NULL; 5595 Np->read_post = 0; 5596 Np->read_pre = 0; 5597 Np->read_dec = false; 5598 5599 if (Np->Num->zero_start) 5600 --Np->Num->zero_start; 5601 5602 if (IS_EEEE(Np->Num)) 5603 { 5604 if (!Np->is_to_char) 5605 ereport(ERROR, 5606 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 5607 errmsg("\"EEEE\" not supported for input"))); 5608 return strcpy(inout, number); 5609 } 5610 5611 /* 5612 * Roman correction 5613 */ 5614 if (IS_ROMAN(Np->Num)) 5615 { 5616 if (!Np->is_to_char) 5617 ereport(ERROR, 5618 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 5619 errmsg("\"RN\" not supported for input"))); 5620 5621 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post = 5622 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0; 5623 5624 if (IS_FILLMODE(Np->Num)) 5625 { 5626 Np->Num->flag = 0; 5627 Np->Num->flag |= NUM_F_FILLMODE; 5628 } 5629 else 5630 Np->Num->flag = 0; 5631 Np->Num->flag |= NUM_F_ROMAN; 5632 } 5633 5634 /* 5635 * Sign 5636 */ 5637 if (is_to_char) 5638 { 5639 Np->sign = sign; 5640 5641 /* MI/PL/SG - write sign itself and not in number */ 5642 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)) 5643 { 5644 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false) 5645 Np->sign_wrote = false; /* need sign */ 5646 else 5647 Np->sign_wrote = true; /* needn't sign */ 5648 } 5649 else 5650 { 5651 if (Np->sign != '-') 5652 { 5653 if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num)) 5654 Np->Num->flag &= ~NUM_F_BRACKET; 5655 if (IS_MINUS(Np->Num)) 5656 Np->Num->flag &= ~NUM_F_MINUS; 5657 } 5658 else if (Np->sign != '+' && IS_PLUS(Np->Num)) 5659 Np->Num->flag &= ~NUM_F_PLUS; 5660 5661 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false) 5662 Np->sign_wrote = true; /* needn't sign */ 5663 else 5664 Np->sign_wrote = false; /* need sign */ 5665 5666 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num) 5667 Np->Num->lsign = NUM_LSIGN_POST; 5668 } 5669 } 5670 else 5671 Np->sign = false; 5672 5673 /* 5674 * Count 5675 */ 5676 Np->num_count = Np->Num->post + Np->Num->pre - 1; 5677 5678 if (is_to_char) 5679 { 5680 Np->out_pre_spaces = to_char_out_pre_spaces; 5681 5682 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num)) 5683 { 5684 Np->last_relevant = get_last_relevant_decnum(Np->number); 5685 5686 /* 5687 * If any '0' specifiers are present, make sure we don't strip 5688 * those digits. 5689 */ 5690 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) 5691 { 5692 char *last_zero; 5693 5694 last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces); 5695 if (Np->last_relevant < last_zero) 5696 Np->last_relevant = last_zero; 5697 } 5698 } 5699 5700 if (Np->sign_wrote == false && Np->out_pre_spaces == 0) 5701 ++Np->num_count; 5702 } 5703 else 5704 { 5705 Np->out_pre_spaces = 0; 5706 *Np->number = ' '; /* sign space */ 5707 *(Np->number + 1) = '\0'; 5708 } 5709 5710 Np->num_in = 0; 5711 Np->num_curr = 0; 5712 5713 #ifdef DEBUG_TO_FROM_CHAR 5714 elog(DEBUG_elog_output, 5715 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s", 5716 Np->sign, 5717 Np->number, 5718 Np->Num->pre, 5719 Np->Num->post, 5720 Np->num_count, 5721 Np->out_pre_spaces, 5722 Np->sign_wrote ? "Yes" : "No", 5723 IS_ZERO(Np->Num) ? "Yes" : "No", 5724 Np->Num->zero_start, 5725 Np->Num->zero_end, 5726 Np->last_relevant ? Np->last_relevant : "<not set>", 5727 IS_BRACKET(Np->Num) ? "Yes" : "No", 5728 IS_PLUS(Np->Num) ? "Yes" : "No", 5729 IS_MINUS(Np->Num) ? "Yes" : "No", 5730 IS_FILLMODE(Np->Num) ? "Yes" : "No", 5731 IS_ROMAN(Np->Num) ? "Yes" : "No", 5732 IS_EEEE(Np->Num) ? "Yes" : "No" 5733 ); 5734 #endif 5735 5736 /* 5737 * Locale 5738 */ 5739 NUM_prepare_locale(Np); 5740 5741 /* 5742 * Processor direct cycle 5743 */ 5744 if (Np->is_to_char) 5745 Np->number_p = Np->number; 5746 else 5747 Np->number_p = Np->number + 1; /* first char is space for sign */ 5748 5749 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++) 5750 { 5751 if (!Np->is_to_char) 5752 { 5753 /* 5754 * Check at least one byte remains to be scanned. (In actions 5755 * below, must use AMOUNT_TEST if we want to read more bytes than 5756 * that.) 5757 */ 5758 if (OVERLOAD_TEST) 5759 break; 5760 } 5761 5762 /* 5763 * Format pictures actions 5764 */ 5765 if (n->type == NODE_TYPE_ACTION) 5766 { 5767 /* 5768 * Create/read digit/zero/blank/sign/special-case 5769 * 5770 * 'NUM_S' note: The locale sign is anchored to number and we 5771 * read/write it when we work with first or last number 5772 * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). 5773 * 5774 * Notice the "Np->inout_p++" at the bottom of the loop. This is 5775 * why most of the actions advance inout_p one less than you might 5776 * expect. In cases where we don't want that increment to happen, 5777 * a switch case ends with "continue" not "break". 5778 */ 5779 switch (n->key->id) 5780 { 5781 case NUM_9: 5782 case NUM_0: 5783 case NUM_DEC: 5784 case NUM_D: 5785 if (Np->is_to_char) 5786 { 5787 NUM_numpart_to_char(Np, n->key->id); 5788 continue; /* for() */ 5789 } 5790 else 5791 { 5792 NUM_numpart_from_char(Np, n->key->id, input_len); 5793 break; /* switch() case: */ 5794 } 5795 5796 case NUM_COMMA: 5797 if (Np->is_to_char) 5798 { 5799 if (!Np->num_in) 5800 { 5801 if (IS_FILLMODE(Np->Num)) 5802 continue; 5803 else 5804 *Np->inout_p = ' '; 5805 } 5806 else 5807 *Np->inout_p = ','; 5808 } 5809 else 5810 { 5811 if (!Np->num_in) 5812 { 5813 if (IS_FILLMODE(Np->Num)) 5814 continue; 5815 } 5816 if (*Np->inout_p != ',') 5817 continue; 5818 } 5819 break; 5820 5821 case NUM_G: 5822 pattern = Np->L_thousands_sep; 5823 pattern_len = strlen(pattern); 5824 if (Np->is_to_char) 5825 { 5826 if (!Np->num_in) 5827 { 5828 if (IS_FILLMODE(Np->Num)) 5829 continue; 5830 else 5831 { 5832 /* just in case there are MB chars */ 5833 pattern_len = pg_mbstrlen(pattern); 5834 memset(Np->inout_p, ' ', pattern_len); 5835 Np->inout_p += pattern_len - 1; 5836 } 5837 } 5838 else 5839 { 5840 strcpy(Np->inout_p, pattern); 5841 Np->inout_p += pattern_len - 1; 5842 } 5843 } 5844 else 5845 { 5846 if (!Np->num_in) 5847 { 5848 if (IS_FILLMODE(Np->Num)) 5849 continue; 5850 } 5851 5852 /* 5853 * Because L_thousands_sep typically contains data 5854 * characters (either '.' or ','), we can't use 5855 * NUM_eat_non_data_chars here. Instead skip only if 5856 * the input matches L_thousands_sep. 5857 */ 5858 if (AMOUNT_TEST(pattern_len) && 5859 strncmp(Np->inout_p, pattern, pattern_len) == 0) 5860 Np->inout_p += pattern_len - 1; 5861 else 5862 continue; 5863 } 5864 break; 5865 5866 case NUM_L: 5867 pattern = Np->L_currency_symbol; 5868 if (Np->is_to_char) 5869 { 5870 strcpy(Np->inout_p, pattern); 5871 Np->inout_p += strlen(pattern) - 1; 5872 } 5873 else 5874 { 5875 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); 5876 continue; 5877 } 5878 break; 5879 5880 case NUM_RN: 5881 if (IS_FILLMODE(Np->Num)) 5882 { 5883 strcpy(Np->inout_p, Np->number_p); 5884 Np->inout_p += strlen(Np->inout_p) - 1; 5885 } 5886 else 5887 { 5888 sprintf(Np->inout_p, "%15s", Np->number_p); 5889 Np->inout_p += strlen(Np->inout_p) - 1; 5890 } 5891 break; 5892 5893 case NUM_rn: 5894 if (IS_FILLMODE(Np->Num)) 5895 { 5896 strcpy(Np->inout_p, asc_tolower_z(Np->number_p)); 5897 Np->inout_p += strlen(Np->inout_p) - 1; 5898 } 5899 else 5900 { 5901 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p)); 5902 Np->inout_p += strlen(Np->inout_p) - 1; 5903 } 5904 break; 5905 5906 case NUM_th: 5907 if (IS_ROMAN(Np->Num) || *Np->number == '#' || 5908 Np->sign == '-' || IS_DECIMAL(Np->Num)) 5909 continue; 5910 5911 if (Np->is_to_char) 5912 { 5913 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); 5914 Np->inout_p += 1; 5915 } 5916 else 5917 { 5918 /* All variants of 'th' occupy 2 characters */ 5919 NUM_eat_non_data_chars(Np, 2, input_len); 5920 continue; 5921 } 5922 break; 5923 5924 case NUM_TH: 5925 if (IS_ROMAN(Np->Num) || *Np->number == '#' || 5926 Np->sign == '-' || IS_DECIMAL(Np->Num)) 5927 continue; 5928 5929 if (Np->is_to_char) 5930 { 5931 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); 5932 Np->inout_p += 1; 5933 } 5934 else 5935 { 5936 /* All variants of 'TH' occupy 2 characters */ 5937 NUM_eat_non_data_chars(Np, 2, input_len); 5938 continue; 5939 } 5940 break; 5941 5942 case NUM_MI: 5943 if (Np->is_to_char) 5944 { 5945 if (Np->sign == '-') 5946 *Np->inout_p = '-'; 5947 else if (IS_FILLMODE(Np->Num)) 5948 continue; 5949 else 5950 *Np->inout_p = ' '; 5951 } 5952 else 5953 { 5954 if (*Np->inout_p == '-') 5955 *Np->number = '-'; 5956 else 5957 { 5958 NUM_eat_non_data_chars(Np, 1, input_len); 5959 continue; 5960 } 5961 } 5962 break; 5963 5964 case NUM_PL: 5965 if (Np->is_to_char) 5966 { 5967 if (Np->sign == '+') 5968 *Np->inout_p = '+'; 5969 else if (IS_FILLMODE(Np->Num)) 5970 continue; 5971 else 5972 *Np->inout_p = ' '; 5973 } 5974 else 5975 { 5976 if (*Np->inout_p == '+') 5977 *Np->number = '+'; 5978 else 5979 { 5980 NUM_eat_non_data_chars(Np, 1, input_len); 5981 continue; 5982 } 5983 } 5984 break; 5985 5986 case NUM_SG: 5987 if (Np->is_to_char) 5988 *Np->inout_p = Np->sign; 5989 else 5990 { 5991 if (*Np->inout_p == '-') 5992 *Np->number = '-'; 5993 else if (*Np->inout_p == '+') 5994 *Np->number = '+'; 5995 else 5996 { 5997 NUM_eat_non_data_chars(Np, 1, input_len); 5998 continue; 5999 } 6000 } 6001 break; 6002 6003 default: 6004 continue; 6005 break; 6006 } 6007 } 6008 else 6009 { 6010 /* 6011 * In TO_CHAR, non-pattern characters in the format are copied to 6012 * the output. In TO_NUMBER, we skip one input character for each 6013 * non-pattern format character, whether or not it matches the 6014 * format character. 6015 */ 6016 if (Np->is_to_char) 6017 { 6018 strcpy(Np->inout_p, n->character); 6019 Np->inout_p += strlen(Np->inout_p); 6020 } 6021 else 6022 { 6023 Np->inout_p += pg_mblen(Np->inout_p); 6024 } 6025 continue; 6026 } 6027 Np->inout_p++; 6028 } 6029 6030 if (Np->is_to_char) 6031 { 6032 *Np->inout_p = '\0'; 6033 return Np->inout; 6034 } 6035 else 6036 { 6037 if (*(Np->number_p - 1) == '.') 6038 *(Np->number_p - 1) = '\0'; 6039 else 6040 *Np->number_p = '\0'; 6041 6042 /* 6043 * Correction - precision of dec. number 6044 */ 6045 Np->Num->post = Np->read_post; 6046 6047 #ifdef DEBUG_TO_FROM_CHAR 6048 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number); 6049 #endif 6050 return Np->number; 6051 } 6052 } 6053 6054 /* ---------- 6055 * MACRO: Start part of NUM - for all NUM's to_char variants 6056 * (sorry, but I hate copy same code - macro is better..) 6057 * ---------- 6058 */ 6059 #define NUM_TOCHAR_prepare \ 6060 do { \ 6061 int len = VARSIZE_ANY_EXHDR(fmt); \ 6062 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \ 6063 PG_RETURN_TEXT_P(cstring_to_text("")); \ 6064 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \ 6065 format = NUM_cache(len, &Num, fmt, &shouldFree); \ 6066 } while (0) 6067 6068 /* ---------- 6069 * MACRO: Finish part of NUM 6070 * ---------- 6071 */ 6072 #define NUM_TOCHAR_finish \ 6073 do { \ 6074 int len; \ 6075 \ 6076 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \ 6077 \ 6078 if (shouldFree) \ 6079 pfree(format); \ 6080 \ 6081 /* \ 6082 * Convert null-terminated representation of result to standard text. \ 6083 * The result is usually much bigger than it needs to be, but there \ 6084 * seems little point in realloc'ing it smaller. \ 6085 */ \ 6086 len = strlen(VARDATA(result)); \ 6087 SET_VARSIZE(result, len + VARHDRSZ); \ 6088 } while (0) 6089 6090 /* ------------------- 6091 * NUMERIC to_number() (convert string to numeric) 6092 * ------------------- 6093 */ 6094 Datum 6095 numeric_to_number(PG_FUNCTION_ARGS) 6096 { 6097 text *value = PG_GETARG_TEXT_PP(0); 6098 text *fmt = PG_GETARG_TEXT_PP(1); 6099 NUMDesc Num; 6100 Datum result; 6101 FormatNode *format; 6102 char *numstr; 6103 bool shouldFree; 6104 int len = 0; 6105 int scale, 6106 precision; 6107 6108 len = VARSIZE_ANY_EXHDR(fmt); 6109 6110 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ) 6111 PG_RETURN_NULL(); 6112 6113 format = NUM_cache(len, &Num, fmt, &shouldFree); 6114 6115 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1); 6116 6117 NUM_processor(format, &Num, VARDATA_ANY(value), numstr, 6118 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION()); 6119 6120 scale = Num.post; 6121 precision = Num.pre + Num.multi + scale; 6122 6123 if (shouldFree) 6124 pfree(format); 6125 6126 result = DirectFunctionCall3(numeric_in, 6127 CStringGetDatum(numstr), 6128 ObjectIdGetDatum(InvalidOid), 6129 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ)); 6130 6131 if (IS_MULTI(&Num)) 6132 { 6133 Numeric x; 6134 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 6135 Int32GetDatum(10))); 6136 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 6137 Int32GetDatum(-Num.multi))); 6138 6139 x = DatumGetNumeric(DirectFunctionCall2(numeric_power, 6140 NumericGetDatum(a), 6141 NumericGetDatum(b))); 6142 result = DirectFunctionCall2(numeric_mul, 6143 result, 6144 NumericGetDatum(x)); 6145 } 6146 6147 pfree(numstr); 6148 return result; 6149 } 6150 6151 /* ------------------ 6152 * NUMERIC to_char() 6153 * ------------------ 6154 */ 6155 Datum 6156 numeric_to_char(PG_FUNCTION_ARGS) 6157 { 6158 Numeric value = PG_GETARG_NUMERIC(0); 6159 text *fmt = PG_GETARG_TEXT_PP(1); 6160 NUMDesc Num; 6161 FormatNode *format; 6162 text *result; 6163 bool shouldFree; 6164 int out_pre_spaces = 0, 6165 sign = 0; 6166 char *numstr, 6167 *orgnum, 6168 *p; 6169 Numeric x; 6170 6171 NUM_TOCHAR_prepare; 6172 6173 /* 6174 * On DateType depend part (numeric) 6175 */ 6176 if (IS_ROMAN(&Num)) 6177 { 6178 x = DatumGetNumeric(DirectFunctionCall2(numeric_round, 6179 NumericGetDatum(value), 6180 Int32GetDatum(0))); 6181 numstr = orgnum = 6182 int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4, 6183 NumericGetDatum(x)))); 6184 } 6185 else if (IS_EEEE(&Num)) 6186 { 6187 orgnum = numeric_out_sci(value, Num.post); 6188 6189 /* 6190 * numeric_out_sci() does not emit a sign for positive numbers. We 6191 * need to add a space in this case so that positive and negative 6192 * numbers are aligned. We also have to do the right thing for NaN. 6193 */ 6194 if (strcmp(orgnum, "NaN") == 0) 6195 { 6196 /* 6197 * Allow 6 characters for the leading sign, the decimal point, 6198 * "e", the exponent's sign and two exponent digits. 6199 */ 6200 numstr = (char *) palloc(Num.pre + Num.post + 7); 6201 fill_str(numstr, '#', Num.pre + Num.post + 6); 6202 *numstr = ' '; 6203 *(numstr + Num.pre + 1) = '.'; 6204 } 6205 else if (*orgnum != '-') 6206 { 6207 numstr = (char *) palloc(strlen(orgnum) + 2); 6208 *numstr = ' '; 6209 strcpy(numstr + 1, orgnum); 6210 } 6211 else 6212 { 6213 numstr = orgnum; 6214 } 6215 } 6216 else 6217 { 6218 int numstr_pre_len; 6219 Numeric val = value; 6220 6221 if (IS_MULTI(&Num)) 6222 { 6223 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 6224 Int32GetDatum(10))); 6225 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 6226 Int32GetDatum(Num.multi))); 6227 6228 x = DatumGetNumeric(DirectFunctionCall2(numeric_power, 6229 NumericGetDatum(a), 6230 NumericGetDatum(b))); 6231 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul, 6232 NumericGetDatum(value), 6233 NumericGetDatum(x))); 6234 Num.pre += Num.multi; 6235 } 6236 6237 x = DatumGetNumeric(DirectFunctionCall2(numeric_round, 6238 NumericGetDatum(val), 6239 Int32GetDatum(Num.post))); 6240 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out, 6241 NumericGetDatum(x))); 6242 6243 if (*orgnum == '-') 6244 { 6245 sign = '-'; 6246 numstr = orgnum + 1; 6247 } 6248 else 6249 { 6250 sign = '+'; 6251 numstr = orgnum; 6252 } 6253 6254 if ((p = strchr(numstr, '.'))) 6255 numstr_pre_len = p - numstr; 6256 else 6257 numstr_pre_len = strlen(numstr); 6258 6259 /* needs padding? */ 6260 if (numstr_pre_len < Num.pre) 6261 out_pre_spaces = Num.pre - numstr_pre_len; 6262 /* overflowed prefix digit format? */ 6263 else if (numstr_pre_len > Num.pre) 6264 { 6265 numstr = (char *) palloc(Num.pre + Num.post + 2); 6266 fill_str(numstr, '#', Num.pre + Num.post + 1); 6267 *(numstr + Num.pre) = '.'; 6268 } 6269 } 6270 6271 NUM_TOCHAR_finish; 6272 PG_RETURN_TEXT_P(result); 6273 } 6274 6275 /* --------------- 6276 * INT4 to_char() 6277 * --------------- 6278 */ 6279 Datum 6280 int4_to_char(PG_FUNCTION_ARGS) 6281 { 6282 int32 value = PG_GETARG_INT32(0); 6283 text *fmt = PG_GETARG_TEXT_PP(1); 6284 NUMDesc Num; 6285 FormatNode *format; 6286 text *result; 6287 bool shouldFree; 6288 int out_pre_spaces = 0, 6289 sign = 0; 6290 char *numstr, 6291 *orgnum; 6292 6293 NUM_TOCHAR_prepare; 6294 6295 /* 6296 * On DateType depend part (int32) 6297 */ 6298 if (IS_ROMAN(&Num)) 6299 numstr = orgnum = int_to_roman(value); 6300 else if (IS_EEEE(&Num)) 6301 { 6302 /* we can do it easily because float8 won't lose any precision */ 6303 float8 val = (float8) value; 6304 6305 orgnum = (char *) psprintf("%+.*e", Num.post, val); 6306 6307 /* 6308 * Swap a leading positive sign for a space. 6309 */ 6310 if (*orgnum == '+') 6311 *orgnum = ' '; 6312 6313 numstr = orgnum; 6314 } 6315 else 6316 { 6317 int numstr_pre_len; 6318 6319 if (IS_MULTI(&Num)) 6320 { 6321 orgnum = DatumGetCString(DirectFunctionCall1(int4out, 6322 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi))))); 6323 Num.pre += Num.multi; 6324 } 6325 else 6326 { 6327 orgnum = DatumGetCString(DirectFunctionCall1(int4out, 6328 Int32GetDatum(value))); 6329 } 6330 6331 if (*orgnum == '-') 6332 { 6333 sign = '-'; 6334 orgnum++; 6335 } 6336 else 6337 sign = '+'; 6338 6339 numstr_pre_len = strlen(orgnum); 6340 6341 /* post-decimal digits? Pad out with zeros. */ 6342 if (Num.post) 6343 { 6344 numstr = (char *) palloc(numstr_pre_len + Num.post + 2); 6345 strcpy(numstr, orgnum); 6346 *(numstr + numstr_pre_len) = '.'; 6347 memset(numstr + numstr_pre_len + 1, '0', Num.post); 6348 *(numstr + numstr_pre_len + Num.post + 1) = '\0'; 6349 } 6350 else 6351 numstr = orgnum; 6352 6353 /* needs padding? */ 6354 if (numstr_pre_len < Num.pre) 6355 out_pre_spaces = Num.pre - numstr_pre_len; 6356 /* overflowed prefix digit format? */ 6357 else if (numstr_pre_len > Num.pre) 6358 { 6359 numstr = (char *) palloc(Num.pre + Num.post + 2); 6360 fill_str(numstr, '#', Num.pre + Num.post + 1); 6361 *(numstr + Num.pre) = '.'; 6362 } 6363 } 6364 6365 NUM_TOCHAR_finish; 6366 PG_RETURN_TEXT_P(result); 6367 } 6368 6369 /* --------------- 6370 * INT8 to_char() 6371 * --------------- 6372 */ 6373 Datum 6374 int8_to_char(PG_FUNCTION_ARGS) 6375 { 6376 int64 value = PG_GETARG_INT64(0); 6377 text *fmt = PG_GETARG_TEXT_PP(1); 6378 NUMDesc Num; 6379 FormatNode *format; 6380 text *result; 6381 bool shouldFree; 6382 int out_pre_spaces = 0, 6383 sign = 0; 6384 char *numstr, 6385 *orgnum; 6386 6387 NUM_TOCHAR_prepare; 6388 6389 /* 6390 * On DateType depend part (int32) 6391 */ 6392 if (IS_ROMAN(&Num)) 6393 { 6394 /* Currently don't support int8 conversion to roman... */ 6395 numstr = orgnum = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value)))); 6396 } 6397 else if (IS_EEEE(&Num)) 6398 { 6399 /* to avoid loss of precision, must go via numeric not float8 */ 6400 Numeric val; 6401 6402 val = DatumGetNumeric(DirectFunctionCall1(int8_numeric, 6403 Int64GetDatum(value))); 6404 orgnum = numeric_out_sci(val, Num.post); 6405 6406 /* 6407 * numeric_out_sci() does not emit a sign for positive numbers. We 6408 * need to add a space in this case so that positive and negative 6409 * numbers are aligned. We don't have to worry about NaN here. 6410 */ 6411 if (*orgnum != '-') 6412 { 6413 numstr = (char *) palloc(strlen(orgnum) + 2); 6414 *numstr = ' '; 6415 strcpy(numstr + 1, orgnum); 6416 } 6417 else 6418 { 6419 numstr = orgnum; 6420 } 6421 } 6422 else 6423 { 6424 int numstr_pre_len; 6425 6426 if (IS_MULTI(&Num)) 6427 { 6428 double multi = pow((double) 10, (double) Num.multi); 6429 6430 value = DatumGetInt64(DirectFunctionCall2(int8mul, 6431 Int64GetDatum(value), 6432 DirectFunctionCall1(dtoi8, 6433 Float8GetDatum(multi)))); 6434 Num.pre += Num.multi; 6435 } 6436 6437 orgnum = DatumGetCString(DirectFunctionCall1(int8out, 6438 Int64GetDatum(value))); 6439 6440 if (*orgnum == '-') 6441 { 6442 sign = '-'; 6443 orgnum++; 6444 } 6445 else 6446 sign = '+'; 6447 6448 numstr_pre_len = strlen(orgnum); 6449 6450 /* post-decimal digits? Pad out with zeros. */ 6451 if (Num.post) 6452 { 6453 numstr = (char *) palloc(numstr_pre_len + Num.post + 2); 6454 strcpy(numstr, orgnum); 6455 *(numstr + numstr_pre_len) = '.'; 6456 memset(numstr + numstr_pre_len + 1, '0', Num.post); 6457 *(numstr + numstr_pre_len + Num.post + 1) = '\0'; 6458 } 6459 else 6460 numstr = orgnum; 6461 6462 /* needs padding? */ 6463 if (numstr_pre_len < Num.pre) 6464 out_pre_spaces = Num.pre - numstr_pre_len; 6465 /* overflowed prefix digit format? */ 6466 else if (numstr_pre_len > Num.pre) 6467 { 6468 numstr = (char *) palloc(Num.pre + Num.post + 2); 6469 fill_str(numstr, '#', Num.pre + Num.post + 1); 6470 *(numstr + Num.pre) = '.'; 6471 } 6472 } 6473 6474 NUM_TOCHAR_finish; 6475 PG_RETURN_TEXT_P(result); 6476 } 6477 6478 /* ----------------- 6479 * FLOAT4 to_char() 6480 * ----------------- 6481 */ 6482 Datum 6483 float4_to_char(PG_FUNCTION_ARGS) 6484 { 6485 float4 value = PG_GETARG_FLOAT4(0); 6486 text *fmt = PG_GETARG_TEXT_PP(1); 6487 NUMDesc Num; 6488 FormatNode *format; 6489 text *result; 6490 bool shouldFree; 6491 int out_pre_spaces = 0, 6492 sign = 0; 6493 char *numstr, 6494 *orgnum, 6495 *p; 6496 6497 NUM_TOCHAR_prepare; 6498 6499 if (IS_ROMAN(&Num)) 6500 numstr = orgnum = int_to_roman((int) rint(value)); 6501 else if (IS_EEEE(&Num)) 6502 { 6503 if (isnan(value) || isinf(value)) 6504 { 6505 /* 6506 * Allow 6 characters for the leading sign, the decimal point, 6507 * "e", the exponent's sign and two exponent digits. 6508 */ 6509 numstr = (char *) palloc(Num.pre + Num.post + 7); 6510 fill_str(numstr, '#', Num.pre + Num.post + 6); 6511 *numstr = ' '; 6512 *(numstr + Num.pre + 1) = '.'; 6513 } 6514 else 6515 { 6516 numstr = orgnum = psprintf("%+.*e", Num.post, value); 6517 6518 /* 6519 * Swap a leading positive sign for a space. 6520 */ 6521 if (*orgnum == '+') 6522 *orgnum = ' '; 6523 6524 numstr = orgnum; 6525 } 6526 } 6527 else 6528 { 6529 float4 val = value; 6530 int numstr_pre_len; 6531 6532 if (IS_MULTI(&Num)) 6533 { 6534 float multi = pow((double) 10, (double) Num.multi); 6535 6536 val = value * multi; 6537 Num.pre += Num.multi; 6538 } 6539 6540 orgnum = (char *) psprintf("%.0f", fabs(val)); 6541 numstr_pre_len = strlen(orgnum); 6542 6543 /* adjust post digits to fit max float digits */ 6544 if (numstr_pre_len >= FLT_DIG) 6545 Num.post = 0; 6546 else if (numstr_pre_len + Num.post > FLT_DIG) 6547 Num.post = FLT_DIG - numstr_pre_len; 6548 orgnum = psprintf("%.*f", Num.post, val); 6549 6550 if (*orgnum == '-') 6551 { /* < 0 */ 6552 sign = '-'; 6553 numstr = orgnum + 1; 6554 } 6555 else 6556 { 6557 sign = '+'; 6558 numstr = orgnum; 6559 } 6560 6561 if ((p = strchr(numstr, '.'))) 6562 numstr_pre_len = p - numstr; 6563 else 6564 numstr_pre_len = strlen(numstr); 6565 6566 /* needs padding? */ 6567 if (numstr_pre_len < Num.pre) 6568 out_pre_spaces = Num.pre - numstr_pre_len; 6569 /* overflowed prefix digit format? */ 6570 else if (numstr_pre_len > Num.pre) 6571 { 6572 numstr = (char *) palloc(Num.pre + Num.post + 2); 6573 fill_str(numstr, '#', Num.pre + Num.post + 1); 6574 *(numstr + Num.pre) = '.'; 6575 } 6576 } 6577 6578 NUM_TOCHAR_finish; 6579 PG_RETURN_TEXT_P(result); 6580 } 6581 6582 /* ----------------- 6583 * FLOAT8 to_char() 6584 * ----------------- 6585 */ 6586 Datum 6587 float8_to_char(PG_FUNCTION_ARGS) 6588 { 6589 float8 value = PG_GETARG_FLOAT8(0); 6590 text *fmt = PG_GETARG_TEXT_PP(1); 6591 NUMDesc Num; 6592 FormatNode *format; 6593 text *result; 6594 bool shouldFree; 6595 int out_pre_spaces = 0, 6596 sign = 0; 6597 char *numstr, 6598 *orgnum, 6599 *p; 6600 6601 NUM_TOCHAR_prepare; 6602 6603 if (IS_ROMAN(&Num)) 6604 numstr = orgnum = int_to_roman((int) rint(value)); 6605 else if (IS_EEEE(&Num)) 6606 { 6607 if (isnan(value) || isinf(value)) 6608 { 6609 /* 6610 * Allow 6 characters for the leading sign, the decimal point, 6611 * "e", the exponent's sign and two exponent digits. 6612 */ 6613 numstr = (char *) palloc(Num.pre + Num.post + 7); 6614 fill_str(numstr, '#', Num.pre + Num.post + 6); 6615 *numstr = ' '; 6616 *(numstr + Num.pre + 1) = '.'; 6617 } 6618 else 6619 { 6620 numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value); 6621 6622 /* 6623 * Swap a leading positive sign for a space. 6624 */ 6625 if (*orgnum == '+') 6626 *orgnum = ' '; 6627 6628 numstr = orgnum; 6629 } 6630 } 6631 else 6632 { 6633 float8 val = value; 6634 int numstr_pre_len; 6635 6636 if (IS_MULTI(&Num)) 6637 { 6638 double multi = pow((double) 10, (double) Num.multi); 6639 6640 val = value * multi; 6641 Num.pre += Num.multi; 6642 } 6643 orgnum = psprintf("%.0f", fabs(val)); 6644 numstr_pre_len = strlen(orgnum); 6645 6646 /* adjust post digits to fit max double digits */ 6647 if (numstr_pre_len >= DBL_DIG) 6648 Num.post = 0; 6649 else if (numstr_pre_len + Num.post > DBL_DIG) 6650 Num.post = DBL_DIG - numstr_pre_len; 6651 orgnum = psprintf("%.*f", Num.post, val); 6652 6653 if (*orgnum == '-') 6654 { /* < 0 */ 6655 sign = '-'; 6656 numstr = orgnum + 1; 6657 } 6658 else 6659 { 6660 sign = '+'; 6661 numstr = orgnum; 6662 } 6663 6664 if ((p = strchr(numstr, '.'))) 6665 numstr_pre_len = p - numstr; 6666 else 6667 numstr_pre_len = strlen(numstr); 6668 6669 /* needs padding? */ 6670 if (numstr_pre_len < Num.pre) 6671 out_pre_spaces = Num.pre - numstr_pre_len; 6672 /* overflowed prefix digit format? */ 6673 else if (numstr_pre_len > Num.pre) 6674 { 6675 numstr = (char *) palloc(Num.pre + Num.post + 2); 6676 fill_str(numstr, '#', Num.pre + Num.post + 1); 6677 *(numstr + Num.pre) = '.'; 6678 } 6679 } 6680 6681 NUM_TOCHAR_finish; 6682 PG_RETURN_TEXT_P(result); 6683 } 6684