1 /* ----------------------------------------------------------------------- 2 * formatting.c 3 * 4 * src/backend/utils/adt/formatting.c 5 * 6 * 7 * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group 8 * 9 * 10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER(); 11 * 12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting, 13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines. 14 * 15 * 16 * Cache & Memory: 17 * Routines use (itself) internal cache for format pictures. 18 * 19 * The cache uses a static buffer and is persistent across transactions. If 20 * the format-picture is bigger than the cache buffer, the parser is called 21 * always. 22 * 23 * NOTE for Number version: 24 * All in this version is implemented as keywords ( => not used 25 * suffixes), because a format picture is for *one* item (number) 26 * only. It not is as a timestamp version, where each keyword (can) 27 * has suffix. 28 * 29 * NOTE for Timestamp routines: 30 * In this module the POSIX 'struct tm' type is *not* used, but rather 31 * PgSQL type, which has tm_mon based on one (*non* zero) and 32 * year *not* based on 1900, but is used full year number. 33 * Module supports AD / BC / AM / PM. 34 * 35 * Supported types for to_char(): 36 * 37 * Timestamp, Numeric, int4, int8, float4, float8 38 * 39 * Supported types for reverse conversion: 40 * 41 * Timestamp - to_timestamp() 42 * Date - to_date() 43 * Numeric - to_number() 44 * 45 * 46 * Karel Zak 47 * 48 * TODO 49 * - better number building (formatting) / parsing, now it isn't 50 * ideal code 51 * - use Assert() 52 * - add support for roman number to standard number conversion 53 * - add support for number spelling 54 * - add support for string to string formatting (we must be better 55 * than Oracle :-), 56 * to_char('Hello', 'X X X X X') -> 'H e l l o' 57 * 58 * ----------------------------------------------------------------------- 59 */ 60 61 #ifdef DEBUG_TO_FROM_CHAR 62 #define DEBUG_elog_output DEBUG3 63 #endif 64 65 #include "postgres.h" 66 67 #include <ctype.h> 68 #include <unistd.h> 69 #include <math.h> 70 #include <float.h> 71 #include <limits.h> 72 73 /* 74 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems 75 * declare them in <wchar.h>. 76 */ 77 #ifdef HAVE_WCHAR_H 78 #include <wchar.h> 79 #endif 80 #ifdef HAVE_WCTYPE_H 81 #include <wctype.h> 82 #endif 83 84 #ifdef USE_ICU 85 #include <unicode/ustring.h> 86 #endif 87 88 #include "catalog/pg_collation.h" 89 #include "mb/pg_wchar.h" 90 #include "parser/scansup.h" 91 #include "utils/builtins.h" 92 #include "utils/date.h" 93 #include "utils/datetime.h" 94 #include "utils/float.h" 95 #include "utils/formatting.h" 96 #include "utils/int8.h" 97 #include "utils/memutils.h" 98 #include "utils/numeric.h" 99 #include "utils/pg_locale.h" 100 101 /* ---------- 102 * Routines type 103 * ---------- 104 */ 105 #define DCH_TYPE 1 /* DATE-TIME version */ 106 #define NUM_TYPE 2 /* NUMBER version */ 107 108 /* ---------- 109 * KeyWord Index (ascii from position 32 (' ') to 126 (~)) 110 * ---------- 111 */ 112 #define KeyWord_INDEX_SIZE ('~' - ' ') 113 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1) 114 115 /* ---------- 116 * Maximal length of one node 117 * ---------- 118 */ 119 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */ 120 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */ 121 122 123 /* ---------- 124 * Format parser structs 125 * ---------- 126 */ 127 typedef struct 128 { 129 const char *name; /* suffix string */ 130 int len, /* suffix length */ 131 id, /* used in node->suffix */ 132 type; /* prefix / postfix */ 133 } KeySuffix; 134 135 /* ---------- 136 * FromCharDateMode 137 * ---------- 138 * 139 * This value is used to nominate one of several distinct (and mutually 140 * exclusive) date conventions that a keyword can belong to. 141 */ 142 typedef enum 143 { 144 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */ 145 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */ 146 FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */ 147 } FromCharDateMode; 148 149 typedef struct 150 { 151 const char *name; 152 int len; 153 int id; 154 bool is_digit; 155 FromCharDateMode date_mode; 156 } KeyWord; 157 158 typedef struct 159 { 160 uint8 type; /* NODE_TYPE_XXX, see below */ 161 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */ 162 uint8 suffix; /* keyword prefix/suffix code, if any */ 163 const KeyWord *key; /* if type is ACTION */ 164 } FormatNode; 165 166 #define NODE_TYPE_END 1 167 #define NODE_TYPE_ACTION 2 168 #define NODE_TYPE_CHAR 3 169 #define NODE_TYPE_SEPARATOR 4 170 #define NODE_TYPE_SPACE 5 171 172 #define SUFFTYPE_PREFIX 1 173 #define SUFFTYPE_POSTFIX 2 174 175 #define CLOCK_24_HOUR 0 176 #define CLOCK_12_HOUR 1 177 178 179 /* ---------- 180 * Full months 181 * ---------- 182 */ 183 static const char *const months_full[] = { 184 "January", "February", "March", "April", "May", "June", "July", 185 "August", "September", "October", "November", "December", NULL 186 }; 187 188 static const char *const days_short[] = { 189 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL 190 }; 191 192 /* ---------- 193 * AD / BC 194 * ---------- 195 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it 196 * positive and map year == -1 to year zero, and shift all negative 197 * years up one. For interval years, we just return the year. 198 */ 199 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year))) 200 201 #define A_D_STR "A.D." 202 #define a_d_STR "a.d." 203 #define AD_STR "AD" 204 #define ad_STR "ad" 205 206 #define B_C_STR "B.C." 207 #define b_c_STR "b.c." 208 #define BC_STR "BC" 209 #define bc_STR "bc" 210 211 /* 212 * AD / BC strings for seq_search. 213 * 214 * These are given in two variants, a long form with periods and a standard 215 * form without. 216 * 217 * The array is laid out such that matches for AD have an even index, and 218 * matches for BC have an odd index. So the boolean value for BC is given by 219 * taking the array index of the match, modulo 2. 220 */ 221 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL}; 222 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL}; 223 224 /* ---------- 225 * AM / PM 226 * ---------- 227 */ 228 #define A_M_STR "A.M." 229 #define a_m_STR "a.m." 230 #define AM_STR "AM" 231 #define am_STR "am" 232 233 #define P_M_STR "P.M." 234 #define p_m_STR "p.m." 235 #define PM_STR "PM" 236 #define pm_STR "pm" 237 238 /* 239 * AM / PM strings for seq_search. 240 * 241 * These are given in two variants, a long form with periods and a standard 242 * form without. 243 * 244 * The array is laid out such that matches for AM have an even index, and 245 * matches for PM have an odd index. So the boolean value for PM is given by 246 * taking the array index of the match, modulo 2. 247 */ 248 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL}; 249 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL}; 250 251 /* ---------- 252 * Months in roman-numeral 253 * (Must be in reverse order for seq_search (in FROM_CHAR), because 254 * 'VIII' must have higher precedence than 'V') 255 * ---------- 256 */ 257 static const char *const rm_months_upper[] = 258 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL}; 259 260 static const char *const rm_months_lower[] = 261 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL}; 262 263 /* ---------- 264 * Roman numbers 265 * ---------- 266 */ 267 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL}; 268 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL}; 269 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL}; 270 271 /* ---------- 272 * Ordinal postfixes 273 * ---------- 274 */ 275 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL}; 276 static const char *const numth[] = {"st", "nd", "rd", "th", NULL}; 277 278 /* ---------- 279 * Flags & Options: 280 * ---------- 281 */ 282 #define TH_UPPER 1 283 #define TH_LOWER 2 284 285 /* ---------- 286 * Number description struct 287 * ---------- 288 */ 289 typedef struct 290 { 291 int pre, /* (count) numbers before decimal */ 292 post, /* (count) numbers after decimal */ 293 lsign, /* want locales sign */ 294 flag, /* number parameters */ 295 pre_lsign_num, /* tmp value for lsign */ 296 multi, /* multiplier for 'V' */ 297 zero_start, /* position of first zero */ 298 zero_end, /* position of last zero */ 299 need_locale; /* needs it locale */ 300 } NUMDesc; 301 302 /* ---------- 303 * Flags for NUMBER version 304 * ---------- 305 */ 306 #define NUM_F_DECIMAL (1 << 1) 307 #define NUM_F_LDECIMAL (1 << 2) 308 #define NUM_F_ZERO (1 << 3) 309 #define NUM_F_BLANK (1 << 4) 310 #define NUM_F_FILLMODE (1 << 5) 311 #define NUM_F_LSIGN (1 << 6) 312 #define NUM_F_BRACKET (1 << 7) 313 #define NUM_F_MINUS (1 << 8) 314 #define NUM_F_PLUS (1 << 9) 315 #define NUM_F_ROMAN (1 << 10) 316 #define NUM_F_MULTI (1 << 11) 317 #define NUM_F_PLUS_POST (1 << 12) 318 #define NUM_F_MINUS_POST (1 << 13) 319 #define NUM_F_EEEE (1 << 14) 320 321 #define NUM_LSIGN_PRE (-1) 322 #define NUM_LSIGN_POST 1 323 #define NUM_LSIGN_NONE 0 324 325 /* ---------- 326 * Tests 327 * ---------- 328 */ 329 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL) 330 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL) 331 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO) 332 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK) 333 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE) 334 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET) 335 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS) 336 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN) 337 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS) 338 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN) 339 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI) 340 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE) 341 342 /* ---------- 343 * Format picture cache 344 * 345 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long; 346 * likewise number format pictures up to NUM_CACHE_SIZE bytes long. 347 * 348 * For simplicity, the cache entries are fixed-size, so they allow for the 349 * worst case of a FormatNode for each byte in the picture string. 350 * 351 * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and 352 * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that 353 * we don't waste too much space by palloc'ing them individually. Be sure 354 * to adjust those macros if you add fields to those structs. 355 * 356 * The max number of entries in each cache is DCH_CACHE_ENTRIES 357 * resp. NUM_CACHE_ENTRIES. 358 * ---------- 359 */ 360 #define DCH_CACHE_OVERHEAD \ 361 MAXALIGN(sizeof(bool) + sizeof(int)) 362 #define NUM_CACHE_OVERHEAD \ 363 MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc)) 364 365 #define DCH_CACHE_SIZE \ 366 ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) 367 #define NUM_CACHE_SIZE \ 368 ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1) 369 370 #define DCH_CACHE_ENTRIES 20 371 #define NUM_CACHE_ENTRIES 20 372 373 typedef struct 374 { 375 FormatNode format[DCH_CACHE_SIZE + 1]; 376 char str[DCH_CACHE_SIZE + 1]; 377 bool valid; 378 int age; 379 } DCHCacheEntry; 380 381 typedef struct 382 { 383 FormatNode format[NUM_CACHE_SIZE + 1]; 384 char str[NUM_CACHE_SIZE + 1]; 385 bool valid; 386 int age; 387 NUMDesc Num; 388 } NUMCacheEntry; 389 390 /* global cache for date/time format pictures */ 391 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES]; 392 static int n_DCHCache = 0; /* current number of entries */ 393 static int DCHCounter = 0; /* aging-event counter */ 394 395 /* global cache for number format pictures */ 396 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES]; 397 static int n_NUMCache = 0; /* current number of entries */ 398 static int NUMCounter = 0; /* aging-event counter */ 399 400 /* ---------- 401 * For char->date/time conversion 402 * ---------- 403 */ 404 typedef struct 405 { 406 FromCharDateMode mode; 407 int hh, 408 pm, 409 mi, 410 ss, 411 ssss, 412 d, /* stored as 1-7, Sunday = 1, 0 means missing */ 413 dd, 414 ddd, 415 mm, 416 ms, 417 year, 418 bc, 419 ww, 420 w, 421 cc, 422 j, 423 us, 424 yysz, /* is it YY or YYYY ? */ 425 clock, /* 12 or 24 hour clock? */ 426 tzsign, /* +1, -1 or 0 if timezone info is absent */ 427 tzh, 428 tzm; 429 } TmFromChar; 430 431 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar)) 432 433 /* ---------- 434 * Debug 435 * ---------- 436 */ 437 #ifdef DEBUG_TO_FROM_CHAR 438 #define DEBUG_TMFC(_X) \ 439 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \ 440 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \ 441 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \ 442 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \ 443 (_X)->yysz, (_X)->clock) 444 #define DEBUG_TM(_X) \ 445 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\ 446 (_X)->tm_sec, (_X)->tm_year,\ 447 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\ 448 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon) 449 #else 450 #define DEBUG_TMFC(_X) 451 #define DEBUG_TM(_X) 452 #endif 453 454 /* ---------- 455 * Datetime to char conversion 456 * ---------- 457 */ 458 typedef struct TmToChar 459 { 460 struct pg_tm tm; /* classic 'tm' struct */ 461 fsec_t fsec; /* fractional seconds */ 462 const char *tzn; /* timezone */ 463 } TmToChar; 464 465 #define tmtcTm(_X) (&(_X)->tm) 466 #define tmtcTzn(_X) ((_X)->tzn) 467 #define tmtcFsec(_X) ((_X)->fsec) 468 469 #define ZERO_tm(_X) \ 470 do { \ 471 (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \ 472 (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \ 473 (_X)->tm_mday = (_X)->tm_mon = 1; \ 474 (_X)->tm_zone = NULL; \ 475 } while(0) 476 477 #define ZERO_tmtc(_X) \ 478 do { \ 479 ZERO_tm( tmtcTm(_X) ); \ 480 tmtcFsec(_X) = 0; \ 481 tmtcTzn(_X) = NULL; \ 482 } while(0) 483 484 /* 485 * to_char(time) appears to to_char() as an interval, so this check 486 * is really for interval and time data types. 487 */ 488 #define INVALID_FOR_INTERVAL \ 489 do { \ 490 if (is_interval) \ 491 ereport(ERROR, \ 492 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \ 493 errmsg("invalid format specification for an interval value"), \ 494 errhint("Intervals are not tied to specific calendar dates."))); \ 495 } while(0) 496 497 /***************************************************************************** 498 * KeyWord definitions 499 *****************************************************************************/ 500 501 /* ---------- 502 * Suffixes (FormatNode.suffix is an OR of these codes) 503 * ---------- 504 */ 505 #define DCH_S_FM 0x01 506 #define DCH_S_TH 0x02 507 #define DCH_S_th 0x04 508 #define DCH_S_SP 0x08 509 #define DCH_S_TM 0x10 510 511 /* ---------- 512 * Suffix tests 513 * ---------- 514 */ 515 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0) 516 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0) 517 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0) 518 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER) 519 520 /* Oracle toggles FM behavior, we don't; see docs. */ 521 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0) 522 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0) 523 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0) 524 525 /* ---------- 526 * Suffixes definition for DATE-TIME TO/FROM CHAR 527 * ---------- 528 */ 529 #define TM_SUFFIX_LEN 2 530 531 static const KeySuffix DCH_suff[] = { 532 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX}, 533 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX}, 534 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX}, 535 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX}, 536 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX}, 537 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX}, 538 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX}, 539 /* last */ 540 {NULL, 0, 0, 0} 541 }; 542 543 544 /* ---------- 545 * Format-pictures (KeyWord). 546 * 547 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted 548 * complicated -to-> easy: 549 * 550 * (example: "DDD","DD","Day","D" ) 551 * 552 * (this specific sort needs the algorithm for sequential search for strings, 553 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH" 554 * or "HH12"? You must first try "HH12", because "HH" is in string, but 555 * it is not good. 556 * 557 * (!) 558 * - Position for the keyword is similar as position in the enum DCH/NUM_poz. 559 * (!) 560 * 561 * For fast search is used the 'int index[]', index is ascii table from position 562 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII 563 * position or -1 if char is not used in the KeyWord. Search example for 564 * string "MM": 565 * 1) see in index to index['M' - 32], 566 * 2) take keywords position (enum DCH_MI) from index 567 * 3) run sequential search in keywords[] from this position 568 * 569 * ---------- 570 */ 571 572 typedef enum 573 { 574 DCH_A_D, 575 DCH_A_M, 576 DCH_AD, 577 DCH_AM, 578 DCH_B_C, 579 DCH_BC, 580 DCH_CC, 581 DCH_DAY, 582 DCH_DDD, 583 DCH_DD, 584 DCH_DY, 585 DCH_Day, 586 DCH_Dy, 587 DCH_D, 588 DCH_FX, /* global suffix */ 589 DCH_HH24, 590 DCH_HH12, 591 DCH_HH, 592 DCH_IDDD, 593 DCH_ID, 594 DCH_IW, 595 DCH_IYYY, 596 DCH_IYY, 597 DCH_IY, 598 DCH_I, 599 DCH_J, 600 DCH_MI, 601 DCH_MM, 602 DCH_MONTH, 603 DCH_MON, 604 DCH_MS, 605 DCH_Month, 606 DCH_Mon, 607 DCH_OF, 608 DCH_P_M, 609 DCH_PM, 610 DCH_Q, 611 DCH_RM, 612 DCH_SSSS, 613 DCH_SS, 614 DCH_TZH, 615 DCH_TZM, 616 DCH_TZ, 617 DCH_US, 618 DCH_WW, 619 DCH_W, 620 DCH_Y_YYY, 621 DCH_YYYY, 622 DCH_YYY, 623 DCH_YY, 624 DCH_Y, 625 DCH_a_d, 626 DCH_a_m, 627 DCH_ad, 628 DCH_am, 629 DCH_b_c, 630 DCH_bc, 631 DCH_cc, 632 DCH_day, 633 DCH_ddd, 634 DCH_dd, 635 DCH_dy, 636 DCH_d, 637 DCH_fx, 638 DCH_hh24, 639 DCH_hh12, 640 DCH_hh, 641 DCH_iddd, 642 DCH_id, 643 DCH_iw, 644 DCH_iyyy, 645 DCH_iyy, 646 DCH_iy, 647 DCH_i, 648 DCH_j, 649 DCH_mi, 650 DCH_mm, 651 DCH_month, 652 DCH_mon, 653 DCH_ms, 654 DCH_p_m, 655 DCH_pm, 656 DCH_q, 657 DCH_rm, 658 DCH_ssss, 659 DCH_ss, 660 DCH_tz, 661 DCH_us, 662 DCH_ww, 663 DCH_w, 664 DCH_y_yyy, 665 DCH_yyyy, 666 DCH_yyy, 667 DCH_yy, 668 DCH_y, 669 670 /* last */ 671 _DCH_last_ 672 } DCH_poz; 673 674 typedef enum 675 { 676 NUM_COMMA, 677 NUM_DEC, 678 NUM_0, 679 NUM_9, 680 NUM_B, 681 NUM_C, 682 NUM_D, 683 NUM_E, 684 NUM_FM, 685 NUM_G, 686 NUM_L, 687 NUM_MI, 688 NUM_PL, 689 NUM_PR, 690 NUM_RN, 691 NUM_SG, 692 NUM_SP, 693 NUM_S, 694 NUM_TH, 695 NUM_V, 696 NUM_b, 697 NUM_c, 698 NUM_d, 699 NUM_e, 700 NUM_fm, 701 NUM_g, 702 NUM_l, 703 NUM_mi, 704 NUM_pl, 705 NUM_pr, 706 NUM_rn, 707 NUM_sg, 708 NUM_sp, 709 NUM_s, 710 NUM_th, 711 NUM_v, 712 713 /* last */ 714 _NUM_last_ 715 } NUM_poz; 716 717 /* ---------- 718 * KeyWords for DATE-TIME version 719 * ---------- 720 */ 721 static const KeyWord DCH_keywords[] = { 722 /* name, len, id, is_digit, date_mode */ 723 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */ 724 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE}, 725 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE}, 726 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE}, 727 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */ 728 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE}, 729 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */ 730 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */ 731 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, 732 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, 733 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE}, 734 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE}, 735 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE}, 736 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, 737 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* F */ 738 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */ 739 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, 740 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, 741 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */ 742 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, 743 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, 744 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, 745 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, 746 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, 747 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, 748 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */ 749 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */ 750 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, 751 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN}, 752 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN}, 753 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, 754 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN}, 755 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN}, 756 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */ 757 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */ 758 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE}, 759 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */ 760 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */ 761 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */ 762 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, 763 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */ 764 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE}, 765 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE}, 766 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */ 767 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */ 768 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, 769 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */ 770 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, 771 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, 772 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, 773 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, 774 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */ 775 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE}, 776 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE}, 777 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE}, 778 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */ 779 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE}, 780 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */ 781 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */ 782 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN}, 783 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN}, 784 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE}, 785 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN}, 786 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* f */ 787 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */ 788 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE}, 789 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE}, 790 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */ 791 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK}, 792 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK}, 793 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK}, 794 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK}, 795 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK}, 796 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK}, 797 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */ 798 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */ 799 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN}, 800 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN}, 801 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN}, 802 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE}, 803 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */ 804 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE}, 805 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */ 806 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */ 807 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */ 808 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE}, 809 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */ 810 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */ 811 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */ 812 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN}, 813 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */ 814 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN}, 815 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN}, 816 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN}, 817 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN}, 818 819 /* last */ 820 {NULL, 0, 0, 0, 0} 821 }; 822 823 /* ---------- 824 * KeyWords for NUMBER version 825 * 826 * The is_digit and date_mode fields are not relevant here. 827 * ---------- 828 */ 829 static const KeyWord NUM_keywords[] = { 830 /* name, len, id is in Index */ 831 {",", 1, NUM_COMMA}, /* , */ 832 {".", 1, NUM_DEC}, /* . */ 833 {"0", 1, NUM_0}, /* 0 */ 834 {"9", 1, NUM_9}, /* 9 */ 835 {"B", 1, NUM_B}, /* B */ 836 {"C", 1, NUM_C}, /* C */ 837 {"D", 1, NUM_D}, /* D */ 838 {"EEEE", 4, NUM_E}, /* E */ 839 {"FM", 2, NUM_FM}, /* F */ 840 {"G", 1, NUM_G}, /* G */ 841 {"L", 1, NUM_L}, /* L */ 842 {"MI", 2, NUM_MI}, /* M */ 843 {"PL", 2, NUM_PL}, /* P */ 844 {"PR", 2, NUM_PR}, 845 {"RN", 2, NUM_RN}, /* R */ 846 {"SG", 2, NUM_SG}, /* S */ 847 {"SP", 2, NUM_SP}, 848 {"S", 1, NUM_S}, 849 {"TH", 2, NUM_TH}, /* T */ 850 {"V", 1, NUM_V}, /* V */ 851 {"b", 1, NUM_B}, /* b */ 852 {"c", 1, NUM_C}, /* c */ 853 {"d", 1, NUM_D}, /* d */ 854 {"eeee", 4, NUM_E}, /* e */ 855 {"fm", 2, NUM_FM}, /* f */ 856 {"g", 1, NUM_G}, /* g */ 857 {"l", 1, NUM_L}, /* l */ 858 {"mi", 2, NUM_MI}, /* m */ 859 {"pl", 2, NUM_PL}, /* p */ 860 {"pr", 2, NUM_PR}, 861 {"rn", 2, NUM_rn}, /* r */ 862 {"sg", 2, NUM_SG}, /* s */ 863 {"sp", 2, NUM_SP}, 864 {"s", 1, NUM_S}, 865 {"th", 2, NUM_th}, /* t */ 866 {"v", 1, NUM_V}, /* v */ 867 868 /* last */ 869 {NULL, 0, 0} 870 }; 871 872 873 /* ---------- 874 * KeyWords index for DATE-TIME version 875 * ---------- 876 */ 877 static const int DCH_index[KeyWord_INDEX_SIZE] = { 878 /* 879 0 1 2 3 4 5 6 7 8 9 880 */ 881 /*---- first 0..31 chars are skipped ----*/ 882 883 -1, -1, -1, -1, -1, -1, -1, -1, 884 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 885 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 886 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1, 887 DCH_FX, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF, 888 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY, 889 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc, 890 DCH_day, -1, DCH_fx, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi, 891 -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_ssss, DCH_tz, DCH_us, -1, DCH_ww, 892 -1, DCH_y_yyy, -1, -1, -1, -1 893 894 /*---- chars over 126 are skipped ----*/ 895 }; 896 897 /* ---------- 898 * KeyWords index for NUMBER version 899 * ---------- 900 */ 901 static const int NUM_index[KeyWord_INDEX_SIZE] = { 902 /* 903 0 1 2 3 4 5 6 7 8 9 904 */ 905 /*---- first 0..31 chars are skipped ----*/ 906 907 -1, -1, -1, -1, -1, -1, -1, -1, 908 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1, 909 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1, 910 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E, 911 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1, 912 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1, 913 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c, 914 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi, 915 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1, 916 -1, -1, -1, -1, -1, -1 917 918 /*---- chars over 126 are skipped ----*/ 919 }; 920 921 /* ---------- 922 * Number processor struct 923 * ---------- 924 */ 925 typedef struct NUMProc 926 { 927 bool is_to_char; 928 NUMDesc *Num; /* number description */ 929 930 int sign, /* '-' or '+' */ 931 sign_wrote, /* was sign write */ 932 num_count, /* number of write digits */ 933 num_in, /* is inside number */ 934 num_curr, /* current position in number */ 935 out_pre_spaces, /* spaces before first digit */ 936 937 read_dec, /* to_number - was read dec. point */ 938 read_post, /* to_number - number of dec. digit */ 939 read_pre; /* to_number - number non-dec. digit */ 940 941 char *number, /* string with number */ 942 *number_p, /* pointer to current number position */ 943 *inout, /* in / out buffer */ 944 *inout_p, /* pointer to current inout position */ 945 *last_relevant, /* last relevant number after decimal point */ 946 947 *L_negative_sign, /* Locale */ 948 *L_positive_sign, 949 *decimal, 950 *L_thousands_sep, 951 *L_currency_symbol; 952 } NUMProc; 953 954 955 /* ---------- 956 * Functions 957 * ---------- 958 */ 959 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, 960 const int *index); 961 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); 962 static bool is_separator_char(const char *str); 963 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); 964 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, 965 const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); 966 967 static void DCH_to_char(FormatNode *node, bool is_interval, 968 TmToChar *in, char *out, Oid collid); 969 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out); 970 971 #ifdef DEBUG_TO_FROM_CHAR 972 static void dump_index(const KeyWord *k, const int *index); 973 static void dump_node(FormatNode *node, int max); 974 #endif 975 976 static const char *get_th(char *num, int type); 977 static char *str_numth(char *dest, char *num, int type); 978 static int adjust_partial_year_to_2020(int year); 979 static int strspace_len(const char *str); 980 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode); 981 static void from_char_set_int(int *dest, const int value, const FormatNode *node); 982 static int from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node); 983 static int from_char_parse_int(int *dest, const char **src, FormatNode *node); 984 static int seq_search(const char *name, const char *const *array, int *len); 985 static int from_char_seq_search(int *dest, const char **src, 986 const char *const *array, 987 FormatNode *node); 988 static void do_to_timestamp(text *date_txt, text *fmt, 989 struct pg_tm *tm, fsec_t *fsec); 990 static char *fill_str(char *str, int c, int max); 991 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); 992 static char *int_to_roman(int number); 993 static void NUM_prepare_locale(NUMProc *Np); 994 static char *get_last_relevant_decnum(char *num); 995 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); 996 static void NUM_numpart_to_char(NUMProc *Np, int id); 997 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, 998 char *number, int input_len, int to_char_out_pre_spaces, 999 int sign, bool is_to_char, Oid collid); 1000 static DCHCacheEntry *DCH_cache_getnew(const char *str); 1001 static DCHCacheEntry *DCH_cache_search(const char *str); 1002 static DCHCacheEntry *DCH_cache_fetch(const char *str); 1003 static NUMCacheEntry *NUM_cache_getnew(const char *str); 1004 static NUMCacheEntry *NUM_cache_search(const char *str); 1005 static NUMCacheEntry *NUM_cache_fetch(const char *str); 1006 1007 1008 /* ---------- 1009 * Fast sequential search, use index for data selection which 1010 * go to seq. cycle (it is very fast for unwanted strings) 1011 * (can't be used binary search in format parsing) 1012 * ---------- 1013 */ 1014 static const KeyWord * 1015 index_seq_search(const char *str, const KeyWord *kw, const int *index) 1016 { 1017 int poz; 1018 1019 if (!KeyWord_INDEX_FILTER(*str)) 1020 return NULL; 1021 1022 if ((poz = *(index + (*str - ' '))) > -1) 1023 { 1024 const KeyWord *k = kw + poz; 1025 1026 do 1027 { 1028 if (strncmp(str, k->name, k->len) == 0) 1029 return k; 1030 k++; 1031 if (!k->name) 1032 return NULL; 1033 } while (*str == *k->name); 1034 } 1035 return NULL; 1036 } 1037 1038 static const KeySuffix * 1039 suff_search(const char *str, const KeySuffix *suf, int type) 1040 { 1041 const KeySuffix *s; 1042 1043 for (s = suf; s->name != NULL; s++) 1044 { 1045 if (s->type != type) 1046 continue; 1047 1048 if (strncmp(str, s->name, s->len) == 0) 1049 return s; 1050 } 1051 return NULL; 1052 } 1053 1054 static bool 1055 is_separator_char(const char *str) 1056 { 1057 /* ASCII printable character, but not letter or digit */ 1058 return (*str > 0x20 && *str < 0x7F && 1059 !(*str >= 'A' && *str <= 'Z') && 1060 !(*str >= 'a' && *str <= 'z') && 1061 !(*str >= '0' && *str <= '9')); 1062 } 1063 1064 /* ---------- 1065 * Prepare NUMDesc (number description struct) via FormatNode struct 1066 * ---------- 1067 */ 1068 static void 1069 NUMDesc_prepare(NUMDesc *num, FormatNode *n) 1070 { 1071 if (n->type != NODE_TYPE_ACTION) 1072 return; 1073 1074 if (IS_EEEE(num) && n->key->id != NUM_E) 1075 ereport(ERROR, 1076 (errcode(ERRCODE_SYNTAX_ERROR), 1077 errmsg("\"EEEE\" must be the last pattern used"))); 1078 1079 switch (n->key->id) 1080 { 1081 case NUM_9: 1082 if (IS_BRACKET(num)) 1083 ereport(ERROR, 1084 (errcode(ERRCODE_SYNTAX_ERROR), 1085 errmsg("\"9\" must be ahead of \"PR\""))); 1086 if (IS_MULTI(num)) 1087 { 1088 ++num->multi; 1089 break; 1090 } 1091 if (IS_DECIMAL(num)) 1092 ++num->post; 1093 else 1094 ++num->pre; 1095 break; 1096 1097 case NUM_0: 1098 if (IS_BRACKET(num)) 1099 ereport(ERROR, 1100 (errcode(ERRCODE_SYNTAX_ERROR), 1101 errmsg("\"0\" must be ahead of \"PR\""))); 1102 if (!IS_ZERO(num) && !IS_DECIMAL(num)) 1103 { 1104 num->flag |= NUM_F_ZERO; 1105 num->zero_start = num->pre + 1; 1106 } 1107 if (!IS_DECIMAL(num)) 1108 ++num->pre; 1109 else 1110 ++num->post; 1111 1112 num->zero_end = num->pre + num->post; 1113 break; 1114 1115 case NUM_B: 1116 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num))) 1117 num->flag |= NUM_F_BLANK; 1118 break; 1119 1120 case NUM_D: 1121 num->flag |= NUM_F_LDECIMAL; 1122 num->need_locale = true; 1123 /* FALLTHROUGH */ 1124 case NUM_DEC: 1125 if (IS_DECIMAL(num)) 1126 ereport(ERROR, 1127 (errcode(ERRCODE_SYNTAX_ERROR), 1128 errmsg("multiple decimal points"))); 1129 if (IS_MULTI(num)) 1130 ereport(ERROR, 1131 (errcode(ERRCODE_SYNTAX_ERROR), 1132 errmsg("cannot use \"V\" and decimal point together"))); 1133 num->flag |= NUM_F_DECIMAL; 1134 break; 1135 1136 case NUM_FM: 1137 num->flag |= NUM_F_FILLMODE; 1138 break; 1139 1140 case NUM_S: 1141 if (IS_LSIGN(num)) 1142 ereport(ERROR, 1143 (errcode(ERRCODE_SYNTAX_ERROR), 1144 errmsg("cannot use \"S\" twice"))); 1145 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num)) 1146 ereport(ERROR, 1147 (errcode(ERRCODE_SYNTAX_ERROR), 1148 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together"))); 1149 if (!IS_DECIMAL(num)) 1150 { 1151 num->lsign = NUM_LSIGN_PRE; 1152 num->pre_lsign_num = num->pre; 1153 num->need_locale = true; 1154 num->flag |= NUM_F_LSIGN; 1155 } 1156 else if (num->lsign == NUM_LSIGN_NONE) 1157 { 1158 num->lsign = NUM_LSIGN_POST; 1159 num->need_locale = true; 1160 num->flag |= NUM_F_LSIGN; 1161 } 1162 break; 1163 1164 case NUM_MI: 1165 if (IS_LSIGN(num)) 1166 ereport(ERROR, 1167 (errcode(ERRCODE_SYNTAX_ERROR), 1168 errmsg("cannot use \"S\" and \"MI\" together"))); 1169 num->flag |= NUM_F_MINUS; 1170 if (IS_DECIMAL(num)) 1171 num->flag |= NUM_F_MINUS_POST; 1172 break; 1173 1174 case NUM_PL: 1175 if (IS_LSIGN(num)) 1176 ereport(ERROR, 1177 (errcode(ERRCODE_SYNTAX_ERROR), 1178 errmsg("cannot use \"S\" and \"PL\" together"))); 1179 num->flag |= NUM_F_PLUS; 1180 if (IS_DECIMAL(num)) 1181 num->flag |= NUM_F_PLUS_POST; 1182 break; 1183 1184 case NUM_SG: 1185 if (IS_LSIGN(num)) 1186 ereport(ERROR, 1187 (errcode(ERRCODE_SYNTAX_ERROR), 1188 errmsg("cannot use \"S\" and \"SG\" together"))); 1189 num->flag |= NUM_F_MINUS; 1190 num->flag |= NUM_F_PLUS; 1191 break; 1192 1193 case NUM_PR: 1194 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num)) 1195 ereport(ERROR, 1196 (errcode(ERRCODE_SYNTAX_ERROR), 1197 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together"))); 1198 num->flag |= NUM_F_BRACKET; 1199 break; 1200 1201 case NUM_rn: 1202 case NUM_RN: 1203 num->flag |= NUM_F_ROMAN; 1204 break; 1205 1206 case NUM_L: 1207 case NUM_G: 1208 num->need_locale = true; 1209 break; 1210 1211 case NUM_V: 1212 if (IS_DECIMAL(num)) 1213 ereport(ERROR, 1214 (errcode(ERRCODE_SYNTAX_ERROR), 1215 errmsg("cannot use \"V\" and decimal point together"))); 1216 num->flag |= NUM_F_MULTI; 1217 break; 1218 1219 case NUM_E: 1220 if (IS_EEEE(num)) 1221 ereport(ERROR, 1222 (errcode(ERRCODE_SYNTAX_ERROR), 1223 errmsg("cannot use \"EEEE\" twice"))); 1224 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) || 1225 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) || 1226 IS_ROMAN(num) || IS_MULTI(num)) 1227 ereport(ERROR, 1228 (errcode(ERRCODE_SYNTAX_ERROR), 1229 errmsg("\"EEEE\" is incompatible with other formats"), 1230 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns."))); 1231 num->flag |= NUM_F_EEEE; 1232 break; 1233 } 1234 } 1235 1236 /* ---------- 1237 * Format parser, search small keywords and keyword's suffixes, and make 1238 * format-node tree. 1239 * 1240 * for DATE-TIME & NUMBER version 1241 * ---------- 1242 */ 1243 static void 1244 parse_format(FormatNode *node, const char *str, const KeyWord *kw, 1245 const KeySuffix *suf, const int *index, int ver, NUMDesc *Num) 1246 { 1247 FormatNode *n; 1248 1249 #ifdef DEBUG_TO_FROM_CHAR 1250 elog(DEBUG_elog_output, "to_char/number(): run parser"); 1251 #endif 1252 1253 n = node; 1254 1255 while (*str) 1256 { 1257 int suffix = 0; 1258 const KeySuffix *s; 1259 1260 /* 1261 * Prefix 1262 */ 1263 if (ver == DCH_TYPE && 1264 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL) 1265 { 1266 suffix |= s->id; 1267 if (s->len) 1268 str += s->len; 1269 } 1270 1271 /* 1272 * Keyword 1273 */ 1274 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL) 1275 { 1276 n->type = NODE_TYPE_ACTION; 1277 n->suffix = suffix; 1278 if (n->key->len) 1279 str += n->key->len; 1280 1281 /* 1282 * NUM version: Prepare global NUMDesc struct 1283 */ 1284 if (ver == NUM_TYPE) 1285 NUMDesc_prepare(Num, n); 1286 1287 /* 1288 * Postfix 1289 */ 1290 if (ver == DCH_TYPE && *str && 1291 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL) 1292 { 1293 n->suffix |= s->id; 1294 if (s->len) 1295 str += s->len; 1296 } 1297 1298 n++; 1299 } 1300 else if (*str) 1301 { 1302 int chlen; 1303 1304 /* 1305 * Process double-quoted literal string, if any 1306 */ 1307 if (*str == '"') 1308 { 1309 str++; 1310 while (*str) 1311 { 1312 if (*str == '"') 1313 { 1314 str++; 1315 break; 1316 } 1317 /* backslash quotes the next character, if any */ 1318 if (*str == '\\' && *(str + 1)) 1319 str++; 1320 chlen = pg_mblen(str); 1321 n->type = NODE_TYPE_CHAR; 1322 memcpy(n->character, str, chlen); 1323 n->character[chlen] = '\0'; 1324 n->key = NULL; 1325 n->suffix = 0; 1326 n++; 1327 str += chlen; 1328 } 1329 } 1330 else 1331 { 1332 /* 1333 * Outside double-quoted strings, backslash is only special if 1334 * it immediately precedes a double quote. 1335 */ 1336 if (*str == '\\' && *(str + 1) == '"') 1337 str++; 1338 chlen = pg_mblen(str); 1339 1340 if (ver == DCH_TYPE && is_separator_char(str)) 1341 n->type = NODE_TYPE_SEPARATOR; 1342 else if (isspace((unsigned char) *str)) 1343 n->type = NODE_TYPE_SPACE; 1344 else 1345 n->type = NODE_TYPE_CHAR; 1346 1347 memcpy(n->character, str, chlen); 1348 n->character[chlen] = '\0'; 1349 n->key = NULL; 1350 n->suffix = 0; 1351 n++; 1352 str += chlen; 1353 } 1354 } 1355 } 1356 1357 n->type = NODE_TYPE_END; 1358 n->suffix = 0; 1359 } 1360 1361 /* ---------- 1362 * DEBUG: Dump the FormatNode Tree (debug) 1363 * ---------- 1364 */ 1365 #ifdef DEBUG_TO_FROM_CHAR 1366 1367 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " ")) 1368 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ") 1369 1370 static void 1371 dump_node(FormatNode *node, int max) 1372 { 1373 FormatNode *n; 1374 int a; 1375 1376 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT"); 1377 1378 for (a = 0, n = node; a <= max; n++, a++) 1379 { 1380 if (n->type == NODE_TYPE_ACTION) 1381 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)", 1382 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix)); 1383 else if (n->type == NODE_TYPE_CHAR) 1384 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'", 1385 a, n->character); 1386 else if (n->type == NODE_TYPE_END) 1387 { 1388 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a); 1389 return; 1390 } 1391 else 1392 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a); 1393 } 1394 } 1395 #endif /* DEBUG */ 1396 1397 /***************************************************************************** 1398 * Private utils 1399 *****************************************************************************/ 1400 1401 /* ---------- 1402 * Return ST/ND/RD/TH for simple (1..9) numbers 1403 * type --> 0 upper, 1 lower 1404 * ---------- 1405 */ 1406 static const char * 1407 get_th(char *num, int type) 1408 { 1409 int len = strlen(num), 1410 last, 1411 seclast; 1412 1413 last = *(num + (len - 1)); 1414 if (!isdigit((unsigned char) last)) 1415 ereport(ERROR, 1416 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 1417 errmsg("\"%s\" is not a number", num))); 1418 1419 /* 1420 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get 1421 * 'ST/st', 'ND/nd', 'RD/rd', respectively 1422 */ 1423 if ((len > 1) && ((seclast = num[len - 2]) == '1')) 1424 last = 0; 1425 1426 switch (last) 1427 { 1428 case '1': 1429 if (type == TH_UPPER) 1430 return numTH[0]; 1431 return numth[0]; 1432 case '2': 1433 if (type == TH_UPPER) 1434 return numTH[1]; 1435 return numth[1]; 1436 case '3': 1437 if (type == TH_UPPER) 1438 return numTH[2]; 1439 return numth[2]; 1440 default: 1441 if (type == TH_UPPER) 1442 return numTH[3]; 1443 return numth[3]; 1444 } 1445 } 1446 1447 /* ---------- 1448 * Convert string-number to ordinal string-number 1449 * type --> 0 upper, 1 lower 1450 * ---------- 1451 */ 1452 static char * 1453 str_numth(char *dest, char *num, int type) 1454 { 1455 if (dest != num) 1456 strcpy(dest, num); 1457 strcat(dest, get_th(num, type)); 1458 return dest; 1459 } 1460 1461 /***************************************************************************** 1462 * upper/lower/initcap functions 1463 *****************************************************************************/ 1464 1465 #ifdef USE_ICU 1466 1467 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, 1468 const UChar *src, int32_t srcLength, 1469 const char *locale, 1470 UErrorCode *pErrorCode); 1471 1472 static int32_t 1473 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, 1474 UChar **buff_dest, UChar *buff_source, int32_t len_source) 1475 { 1476 UErrorCode status; 1477 int32_t len_dest; 1478 1479 len_dest = len_source; /* try first with same length */ 1480 *buff_dest = palloc(len_dest * sizeof(**buff_dest)); 1481 status = U_ZERO_ERROR; 1482 len_dest = func(*buff_dest, len_dest, buff_source, len_source, 1483 mylocale->info.icu.locale, &status); 1484 if (status == U_BUFFER_OVERFLOW_ERROR) 1485 { 1486 /* try again with adjusted length */ 1487 pfree(*buff_dest); 1488 *buff_dest = palloc(len_dest * sizeof(**buff_dest)); 1489 status = U_ZERO_ERROR; 1490 len_dest = func(*buff_dest, len_dest, buff_source, len_source, 1491 mylocale->info.icu.locale, &status); 1492 } 1493 if (U_FAILURE(status)) 1494 ereport(ERROR, 1495 (errmsg("case conversion failed: %s", u_errorName(status)))); 1496 return len_dest; 1497 } 1498 1499 static int32_t 1500 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, 1501 const UChar *src, int32_t srcLength, 1502 const char *locale, 1503 UErrorCode *pErrorCode) 1504 { 1505 return u_strToTitle(dest, destCapacity, src, srcLength, 1506 NULL, locale, pErrorCode); 1507 } 1508 1509 #endif /* USE_ICU */ 1510 1511 /* 1512 * If the system provides the needed functions for wide-character manipulation 1513 * (which are all standardized by C99), then we implement upper/lower/initcap 1514 * using wide-character functions, if necessary. Otherwise we use the 1515 * traditional <ctype.h> functions, which of course will not work as desired 1516 * in multibyte character sets. Note that in either case we are effectively 1517 * assuming that the database character encoding matches the encoding implied 1518 * by LC_CTYPE. 1519 * 1520 * If the system provides locale_t and associated functions (which are 1521 * standardized by Open Group's XBD), we can support collations that are 1522 * neither default nor C. The code is written to handle both combinations 1523 * of have-wide-characters and have-locale_t, though it's rather unlikely 1524 * a platform would have the latter without the former. 1525 */ 1526 1527 /* 1528 * collation-aware, wide-character-aware lower function 1529 * 1530 * We pass the number of bytes so we can pass varlena and char* 1531 * to this function. The result is a palloc'd, null-terminated string. 1532 */ 1533 char * 1534 str_tolower(const char *buff, size_t nbytes, Oid collid) 1535 { 1536 char *result; 1537 1538 if (!buff) 1539 return NULL; 1540 1541 /* C/POSIX collations use this path regardless of database encoding */ 1542 if (lc_ctype_is_c(collid)) 1543 { 1544 result = asc_tolower(buff, nbytes); 1545 } 1546 else 1547 { 1548 pg_locale_t mylocale = 0; 1549 1550 if (collid != DEFAULT_COLLATION_OID) 1551 { 1552 if (!OidIsValid(collid)) 1553 { 1554 /* 1555 * This typically means that the parser could not resolve a 1556 * conflict of implicit collations, so report it that way. 1557 */ 1558 ereport(ERROR, 1559 (errcode(ERRCODE_INDETERMINATE_COLLATION), 1560 errmsg("could not determine which collation to use for %s function", 1561 "lower()"), 1562 errhint("Use the COLLATE clause to set the collation explicitly."))); 1563 } 1564 mylocale = pg_newlocale_from_collation(collid); 1565 } 1566 1567 #ifdef USE_ICU 1568 if (mylocale && mylocale->provider == COLLPROVIDER_ICU) 1569 { 1570 int32_t len_uchar; 1571 int32_t len_conv; 1572 UChar *buff_uchar; 1573 UChar *buff_conv; 1574 1575 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); 1576 len_conv = icu_convert_case(u_strToLower, mylocale, 1577 &buff_conv, buff_uchar, len_uchar); 1578 icu_from_uchar(&result, buff_conv, len_conv); 1579 pfree(buff_uchar); 1580 pfree(buff_conv); 1581 } 1582 else 1583 #endif 1584 { 1585 if (pg_database_encoding_max_length() > 1) 1586 { 1587 wchar_t *workspace; 1588 size_t curr_char; 1589 size_t result_size; 1590 1591 /* Overflow paranoia */ 1592 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) 1593 ereport(ERROR, 1594 (errcode(ERRCODE_OUT_OF_MEMORY), 1595 errmsg("out of memory"))); 1596 1597 /* Output workspace cannot have more codes than input bytes */ 1598 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); 1599 1600 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); 1601 1602 for (curr_char = 0; workspace[curr_char] != 0; curr_char++) 1603 { 1604 #ifdef HAVE_LOCALE_T 1605 if (mylocale) 1606 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); 1607 else 1608 #endif 1609 workspace[curr_char] = towlower(workspace[curr_char]); 1610 } 1611 1612 /* 1613 * Make result large enough; case change might change number 1614 * of bytes 1615 */ 1616 result_size = curr_char * pg_database_encoding_max_length() + 1; 1617 result = palloc(result_size); 1618 1619 wchar2char(result, workspace, result_size, mylocale); 1620 pfree(workspace); 1621 } 1622 else 1623 { 1624 char *p; 1625 1626 result = pnstrdup(buff, nbytes); 1627 1628 /* 1629 * Note: we assume that tolower_l() will not be so broken as 1630 * to need an isupper_l() guard test. When using the default 1631 * collation, we apply the traditional Postgres behavior that 1632 * forces ASCII-style treatment of I/i, but in non-default 1633 * collations you get exactly what the collation says. 1634 */ 1635 for (p = result; *p; p++) 1636 { 1637 #ifdef HAVE_LOCALE_T 1638 if (mylocale) 1639 *p = tolower_l((unsigned char) *p, mylocale->info.lt); 1640 else 1641 #endif 1642 *p = pg_tolower((unsigned char) *p); 1643 } 1644 } 1645 } 1646 } 1647 1648 return result; 1649 } 1650 1651 /* 1652 * collation-aware, wide-character-aware upper function 1653 * 1654 * We pass the number of bytes so we can pass varlena and char* 1655 * to this function. The result is a palloc'd, null-terminated string. 1656 */ 1657 char * 1658 str_toupper(const char *buff, size_t nbytes, Oid collid) 1659 { 1660 char *result; 1661 1662 if (!buff) 1663 return NULL; 1664 1665 /* C/POSIX collations use this path regardless of database encoding */ 1666 if (lc_ctype_is_c(collid)) 1667 { 1668 result = asc_toupper(buff, nbytes); 1669 } 1670 else 1671 { 1672 pg_locale_t mylocale = 0; 1673 1674 if (collid != DEFAULT_COLLATION_OID) 1675 { 1676 if (!OidIsValid(collid)) 1677 { 1678 /* 1679 * This typically means that the parser could not resolve a 1680 * conflict of implicit collations, so report it that way. 1681 */ 1682 ereport(ERROR, 1683 (errcode(ERRCODE_INDETERMINATE_COLLATION), 1684 errmsg("could not determine which collation to use for %s function", 1685 "upper()"), 1686 errhint("Use the COLLATE clause to set the collation explicitly."))); 1687 } 1688 mylocale = pg_newlocale_from_collation(collid); 1689 } 1690 1691 #ifdef USE_ICU 1692 if (mylocale && mylocale->provider == COLLPROVIDER_ICU) 1693 { 1694 int32_t len_uchar, 1695 len_conv; 1696 UChar *buff_uchar; 1697 UChar *buff_conv; 1698 1699 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); 1700 len_conv = icu_convert_case(u_strToUpper, mylocale, 1701 &buff_conv, buff_uchar, len_uchar); 1702 icu_from_uchar(&result, buff_conv, len_conv); 1703 pfree(buff_uchar); 1704 pfree(buff_conv); 1705 } 1706 else 1707 #endif 1708 { 1709 if (pg_database_encoding_max_length() > 1) 1710 { 1711 wchar_t *workspace; 1712 size_t curr_char; 1713 size_t result_size; 1714 1715 /* Overflow paranoia */ 1716 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) 1717 ereport(ERROR, 1718 (errcode(ERRCODE_OUT_OF_MEMORY), 1719 errmsg("out of memory"))); 1720 1721 /* Output workspace cannot have more codes than input bytes */ 1722 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); 1723 1724 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); 1725 1726 for (curr_char = 0; workspace[curr_char] != 0; curr_char++) 1727 { 1728 #ifdef HAVE_LOCALE_T 1729 if (mylocale) 1730 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); 1731 else 1732 #endif 1733 workspace[curr_char] = towupper(workspace[curr_char]); 1734 } 1735 1736 /* 1737 * Make result large enough; case change might change number 1738 * of bytes 1739 */ 1740 result_size = curr_char * pg_database_encoding_max_length() + 1; 1741 result = palloc(result_size); 1742 1743 wchar2char(result, workspace, result_size, mylocale); 1744 pfree(workspace); 1745 } 1746 else 1747 { 1748 char *p; 1749 1750 result = pnstrdup(buff, nbytes); 1751 1752 /* 1753 * Note: we assume that toupper_l() will not be so broken as 1754 * to need an islower_l() guard test. When using the default 1755 * collation, we apply the traditional Postgres behavior that 1756 * forces ASCII-style treatment of I/i, but in non-default 1757 * collations you get exactly what the collation says. 1758 */ 1759 for (p = result; *p; p++) 1760 { 1761 #ifdef HAVE_LOCALE_T 1762 if (mylocale) 1763 *p = toupper_l((unsigned char) *p, mylocale->info.lt); 1764 else 1765 #endif 1766 *p = pg_toupper((unsigned char) *p); 1767 } 1768 } 1769 } 1770 } 1771 1772 return result; 1773 } 1774 1775 /* 1776 * collation-aware, wide-character-aware initcap function 1777 * 1778 * We pass the number of bytes so we can pass varlena and char* 1779 * to this function. The result is a palloc'd, null-terminated string. 1780 */ 1781 char * 1782 str_initcap(const char *buff, size_t nbytes, Oid collid) 1783 { 1784 char *result; 1785 int wasalnum = false; 1786 1787 if (!buff) 1788 return NULL; 1789 1790 /* C/POSIX collations use this path regardless of database encoding */ 1791 if (lc_ctype_is_c(collid)) 1792 { 1793 result = asc_initcap(buff, nbytes); 1794 } 1795 else 1796 { 1797 pg_locale_t mylocale = 0; 1798 1799 if (collid != DEFAULT_COLLATION_OID) 1800 { 1801 if (!OidIsValid(collid)) 1802 { 1803 /* 1804 * This typically means that the parser could not resolve a 1805 * conflict of implicit collations, so report it that way. 1806 */ 1807 ereport(ERROR, 1808 (errcode(ERRCODE_INDETERMINATE_COLLATION), 1809 errmsg("could not determine which collation to use for %s function", 1810 "initcap()"), 1811 errhint("Use the COLLATE clause to set the collation explicitly."))); 1812 } 1813 mylocale = pg_newlocale_from_collation(collid); 1814 } 1815 1816 #ifdef USE_ICU 1817 if (mylocale && mylocale->provider == COLLPROVIDER_ICU) 1818 { 1819 int32_t len_uchar, 1820 len_conv; 1821 UChar *buff_uchar; 1822 UChar *buff_conv; 1823 1824 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); 1825 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, 1826 &buff_conv, buff_uchar, len_uchar); 1827 icu_from_uchar(&result, buff_conv, len_conv); 1828 pfree(buff_uchar); 1829 pfree(buff_conv); 1830 } 1831 else 1832 #endif 1833 { 1834 if (pg_database_encoding_max_length() > 1) 1835 { 1836 wchar_t *workspace; 1837 size_t curr_char; 1838 size_t result_size; 1839 1840 /* Overflow paranoia */ 1841 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) 1842 ereport(ERROR, 1843 (errcode(ERRCODE_OUT_OF_MEMORY), 1844 errmsg("out of memory"))); 1845 1846 /* Output workspace cannot have more codes than input bytes */ 1847 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); 1848 1849 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); 1850 1851 for (curr_char = 0; workspace[curr_char] != 0; curr_char++) 1852 { 1853 #ifdef HAVE_LOCALE_T 1854 if (mylocale) 1855 { 1856 if (wasalnum) 1857 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); 1858 else 1859 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); 1860 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); 1861 } 1862 else 1863 #endif 1864 { 1865 if (wasalnum) 1866 workspace[curr_char] = towlower(workspace[curr_char]); 1867 else 1868 workspace[curr_char] = towupper(workspace[curr_char]); 1869 wasalnum = iswalnum(workspace[curr_char]); 1870 } 1871 } 1872 1873 /* 1874 * Make result large enough; case change might change number 1875 * of bytes 1876 */ 1877 result_size = curr_char * pg_database_encoding_max_length() + 1; 1878 result = palloc(result_size); 1879 1880 wchar2char(result, workspace, result_size, mylocale); 1881 pfree(workspace); 1882 } 1883 else 1884 { 1885 char *p; 1886 1887 result = pnstrdup(buff, nbytes); 1888 1889 /* 1890 * Note: we assume that toupper_l()/tolower_l() will not be so 1891 * broken as to need guard tests. When using the default 1892 * collation, we apply the traditional Postgres behavior that 1893 * forces ASCII-style treatment of I/i, but in non-default 1894 * collations you get exactly what the collation says. 1895 */ 1896 for (p = result; *p; p++) 1897 { 1898 #ifdef HAVE_LOCALE_T 1899 if (mylocale) 1900 { 1901 if (wasalnum) 1902 *p = tolower_l((unsigned char) *p, mylocale->info.lt); 1903 else 1904 *p = toupper_l((unsigned char) *p, mylocale->info.lt); 1905 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); 1906 } 1907 else 1908 #endif 1909 { 1910 if (wasalnum) 1911 *p = pg_tolower((unsigned char) *p); 1912 else 1913 *p = pg_toupper((unsigned char) *p); 1914 wasalnum = isalnum((unsigned char) *p); 1915 } 1916 } 1917 } 1918 } 1919 } 1920 1921 return result; 1922 } 1923 1924 /* 1925 * ASCII-only lower function 1926 * 1927 * We pass the number of bytes so we can pass varlena and char* 1928 * to this function. The result is a palloc'd, null-terminated string. 1929 */ 1930 char * 1931 asc_tolower(const char *buff, size_t nbytes) 1932 { 1933 char *result; 1934 char *p; 1935 1936 if (!buff) 1937 return NULL; 1938 1939 result = pnstrdup(buff, nbytes); 1940 1941 for (p = result; *p; p++) 1942 *p = pg_ascii_tolower((unsigned char) *p); 1943 1944 return result; 1945 } 1946 1947 /* 1948 * ASCII-only upper function 1949 * 1950 * We pass the number of bytes so we can pass varlena and char* 1951 * to this function. The result is a palloc'd, null-terminated string. 1952 */ 1953 char * 1954 asc_toupper(const char *buff, size_t nbytes) 1955 { 1956 char *result; 1957 char *p; 1958 1959 if (!buff) 1960 return NULL; 1961 1962 result = pnstrdup(buff, nbytes); 1963 1964 for (p = result; *p; p++) 1965 *p = pg_ascii_toupper((unsigned char) *p); 1966 1967 return result; 1968 } 1969 1970 /* 1971 * ASCII-only initcap function 1972 * 1973 * We pass the number of bytes so we can pass varlena and char* 1974 * to this function. The result is a palloc'd, null-terminated string. 1975 */ 1976 char * 1977 asc_initcap(const char *buff, size_t nbytes) 1978 { 1979 char *result; 1980 char *p; 1981 int wasalnum = false; 1982 1983 if (!buff) 1984 return NULL; 1985 1986 result = pnstrdup(buff, nbytes); 1987 1988 for (p = result; *p; p++) 1989 { 1990 char c; 1991 1992 if (wasalnum) 1993 *p = c = pg_ascii_tolower((unsigned char) *p); 1994 else 1995 *p = c = pg_ascii_toupper((unsigned char) *p); 1996 /* we don't trust isalnum() here */ 1997 wasalnum = ((c >= 'A' && c <= 'Z') || 1998 (c >= 'a' && c <= 'z') || 1999 (c >= '0' && c <= '9')); 2000 } 2001 2002 return result; 2003 } 2004 2005 /* convenience routines for when the input is null-terminated */ 2006 2007 static char * 2008 str_tolower_z(const char *buff, Oid collid) 2009 { 2010 return str_tolower(buff, strlen(buff), collid); 2011 } 2012 2013 static char * 2014 str_toupper_z(const char *buff, Oid collid) 2015 { 2016 return str_toupper(buff, strlen(buff), collid); 2017 } 2018 2019 static char * 2020 str_initcap_z(const char *buff, Oid collid) 2021 { 2022 return str_initcap(buff, strlen(buff), collid); 2023 } 2024 2025 static char * 2026 asc_tolower_z(const char *buff) 2027 { 2028 return asc_tolower(buff, strlen(buff)); 2029 } 2030 2031 static char * 2032 asc_toupper_z(const char *buff) 2033 { 2034 return asc_toupper(buff, strlen(buff)); 2035 } 2036 2037 /* asc_initcap_z is not currently needed */ 2038 2039 2040 /* ---------- 2041 * Skip TM / th in FROM_CHAR 2042 * 2043 * If S_THth is on, skip two chars, assuming there are two available 2044 * ---------- 2045 */ 2046 #define SKIP_THth(ptr, _suf) \ 2047 do { \ 2048 if (S_THth(_suf)) \ 2049 { \ 2050 if (*(ptr)) (ptr) += pg_mblen(ptr); \ 2051 if (*(ptr)) (ptr) += pg_mblen(ptr); \ 2052 } \ 2053 } while (0) 2054 2055 2056 #ifdef DEBUG_TO_FROM_CHAR 2057 /* ----------- 2058 * DEBUG: Call for debug and for index checking; (Show ASCII char 2059 * and defined keyword for each used position 2060 * ---------- 2061 */ 2062 static void 2063 dump_index(const KeyWord *k, const int *index) 2064 { 2065 int i, 2066 count = 0, 2067 free_i = 0; 2068 2069 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:"); 2070 2071 for (i = 0; i < KeyWord_INDEX_SIZE; i++) 2072 { 2073 if (index[i] != -1) 2074 { 2075 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name); 2076 count++; 2077 } 2078 else 2079 { 2080 free_i++; 2081 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]); 2082 } 2083 } 2084 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d", 2085 count, free_i); 2086 } 2087 #endif /* DEBUG */ 2088 2089 /* ---------- 2090 * Return true if next format picture is not digit value 2091 * ---------- 2092 */ 2093 static bool 2094 is_next_separator(FormatNode *n) 2095 { 2096 if (n->type == NODE_TYPE_END) 2097 return false; 2098 2099 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix)) 2100 return true; 2101 2102 /* 2103 * Next node 2104 */ 2105 n++; 2106 2107 /* end of format string is treated like a non-digit separator */ 2108 if (n->type == NODE_TYPE_END) 2109 return true; 2110 2111 if (n->type == NODE_TYPE_ACTION) 2112 { 2113 if (n->key->is_digit) 2114 return false; 2115 2116 return true; 2117 } 2118 else if (n->character[1] == '\0' && 2119 isdigit((unsigned char) n->character[0])) 2120 return false; 2121 2122 return true; /* some non-digit input (separator) */ 2123 } 2124 2125 2126 static int 2127 adjust_partial_year_to_2020(int year) 2128 { 2129 /* 2130 * Adjust all dates toward 2020; this is effectively what happens when we 2131 * assume '70' is 1970 and '69' is 2069. 2132 */ 2133 /* Force 0-69 into the 2000's */ 2134 if (year < 70) 2135 return year + 2000; 2136 /* Force 70-99 into the 1900's */ 2137 else if (year < 100) 2138 return year + 1900; 2139 /* Force 100-519 into the 2000's */ 2140 else if (year < 520) 2141 return year + 2000; 2142 /* Force 520-999 into the 1000's */ 2143 else if (year < 1000) 2144 return year + 1000; 2145 else 2146 return year; 2147 } 2148 2149 2150 static int 2151 strspace_len(const char *str) 2152 { 2153 int len = 0; 2154 2155 while (*str && isspace((unsigned char) *str)) 2156 { 2157 str++; 2158 len++; 2159 } 2160 return len; 2161 } 2162 2163 /* 2164 * Set the date mode of a from-char conversion. 2165 * 2166 * Puke if the date mode has already been set, and the caller attempts to set 2167 * it to a conflicting mode. 2168 */ 2169 static void 2170 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode) 2171 { 2172 if (mode != FROM_CHAR_DATE_NONE) 2173 { 2174 if (tmfc->mode == FROM_CHAR_DATE_NONE) 2175 tmfc->mode = mode; 2176 else if (tmfc->mode != mode) 2177 ereport(ERROR, 2178 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2179 errmsg("invalid combination of date conventions"), 2180 errhint("Do not mix Gregorian and ISO week date " 2181 "conventions in a formatting template."))); 2182 } 2183 } 2184 2185 /* 2186 * Set the integer pointed to by 'dest' to the given value. 2187 * 2188 * Puke if the destination integer has previously been set to some other 2189 * non-zero value. 2190 */ 2191 static void 2192 from_char_set_int(int *dest, const int value, const FormatNode *node) 2193 { 2194 if (*dest != 0 && *dest != value) 2195 ereport(ERROR, 2196 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2197 errmsg("conflicting values for \"%s\" field in formatting string", 2198 node->key->name), 2199 errdetail("This value contradicts a previous setting for " 2200 "the same field type."))); 2201 *dest = value; 2202 } 2203 2204 /* 2205 * Read a single integer from the source string, into the int pointed to by 2206 * 'dest'. If 'dest' is NULL, the result is discarded. 2207 * 2208 * In fixed-width mode (the node does not have the FM suffix), consume at most 2209 * 'len' characters. However, any leading whitespace isn't counted in 'len'. 2210 * 2211 * We use strtol() to recover the integer value from the source string, in 2212 * accordance with the given FormatNode. 2213 * 2214 * If the conversion completes successfully, src will have been advanced to 2215 * point at the character immediately following the last character used in the 2216 * conversion. 2217 * 2218 * Return the number of characters consumed. 2219 * 2220 * Note that from_char_parse_int() provides a more convenient wrapper where 2221 * the length of the field is the same as the length of the format keyword (as 2222 * with DD and MI). 2223 */ 2224 static int 2225 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node) 2226 { 2227 long result; 2228 char copy[DCH_MAX_ITEM_SIZ + 1]; 2229 const char *init = *src; 2230 int used; 2231 2232 /* 2233 * Skip any whitespace before parsing the integer. 2234 */ 2235 *src += strspace_len(*src); 2236 2237 Assert(len <= DCH_MAX_ITEM_SIZ); 2238 used = (int) strlcpy(copy, *src, len + 1); 2239 2240 if (S_FM(node->suffix) || is_next_separator(node)) 2241 { 2242 /* 2243 * This node is in Fill Mode, or the next node is known to be a 2244 * non-digit value, so we just slurp as many characters as we can get. 2245 */ 2246 char *endptr; 2247 2248 errno = 0; 2249 result = strtol(init, &endptr, 10); 2250 *src = endptr; 2251 } 2252 else 2253 { 2254 /* 2255 * We need to pull exactly the number of characters given in 'len' out 2256 * of the string, and convert those. 2257 */ 2258 char *last; 2259 2260 if (used < len) 2261 ereport(ERROR, 2262 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2263 errmsg("source string too short for \"%s\" formatting field", 2264 node->key->name), 2265 errdetail("Field requires %d characters, but only %d " 2266 "remain.", 2267 len, used), 2268 errhint("If your source string is not fixed-width, try " 2269 "using the \"FM\" modifier."))); 2270 2271 errno = 0; 2272 result = strtol(copy, &last, 10); 2273 used = last - copy; 2274 2275 if (used > 0 && used < len) 2276 ereport(ERROR, 2277 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2278 errmsg("invalid value \"%s\" for \"%s\"", 2279 copy, node->key->name), 2280 errdetail("Field requires %d characters, but only %d " 2281 "could be parsed.", len, used), 2282 errhint("If your source string is not fixed-width, try " 2283 "using the \"FM\" modifier."))); 2284 2285 *src += used; 2286 } 2287 2288 if (*src == init) 2289 ereport(ERROR, 2290 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2291 errmsg("invalid value \"%s\" for \"%s\"", 2292 copy, node->key->name), 2293 errdetail("Value must be an integer."))); 2294 2295 if (errno == ERANGE || result < INT_MIN || result > INT_MAX) 2296 ereport(ERROR, 2297 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2298 errmsg("value for \"%s\" in source string is out of range", 2299 node->key->name), 2300 errdetail("Value must be in the range %d to %d.", 2301 INT_MIN, INT_MAX))); 2302 2303 if (dest != NULL) 2304 from_char_set_int(dest, (int) result, node); 2305 return *src - init; 2306 } 2307 2308 /* 2309 * Call from_char_parse_int_len(), using the length of the format keyword as 2310 * the expected length of the field. 2311 * 2312 * Don't call this function if the field differs in length from the format 2313 * keyword (as with HH24; the keyword length is 4, but the field length is 2). 2314 * In such cases, call from_char_parse_int_len() instead to specify the 2315 * required length explicitly. 2316 */ 2317 static int 2318 from_char_parse_int(int *dest, const char **src, FormatNode *node) 2319 { 2320 return from_char_parse_int_len(dest, src, node->key->len, node); 2321 } 2322 2323 /* 2324 * Sequentially search null-terminated "array" for a case-insensitive match 2325 * to the initial character(s) of "name". 2326 * 2327 * Returns array index of match, or -1 for no match. 2328 * 2329 * *len is set to the length of the match, or 0 for no match. 2330 * 2331 * Case-insensitivity is defined per pg_tolower, so this is only 2332 * suitable for comparisons to ASCII strings. 2333 */ 2334 static int 2335 seq_search(const char *name, const char *const *array, int *len) 2336 { 2337 unsigned char firstc; 2338 const char *const *a; 2339 2340 *len = 0; 2341 2342 /* empty string can't match anything */ 2343 if (!*name) 2344 return -1; 2345 2346 /* we handle first char specially to gain some speed */ 2347 firstc = pg_tolower((unsigned char) *name); 2348 2349 for (a = array; *a != NULL; a++) 2350 { 2351 const char *p; 2352 const char *n; 2353 2354 /* compare first chars */ 2355 if (pg_tolower((unsigned char) **a) != firstc) 2356 continue; 2357 2358 /* compare rest of string */ 2359 for (p = *a + 1, n = name + 1;; p++, n++) 2360 { 2361 /* return success if we matched whole array entry */ 2362 if (*p == '\0') 2363 { 2364 *len = n - name; 2365 return a - array; 2366 } 2367 /* else, must have another character in "name" ... */ 2368 if (*n == '\0') 2369 break; 2370 /* ... and it must match */ 2371 if (pg_tolower((unsigned char) *p) != 2372 pg_tolower((unsigned char) *n)) 2373 break; 2374 } 2375 } 2376 2377 return -1; 2378 } 2379 2380 /* 2381 * Perform a sequential search in 'array' for an entry matching the first 2382 * character(s) of the 'src' string case-insensitively. 2383 * 2384 * If a match is found, copy the array index of the match into the integer 2385 * pointed to by 'dest', advance 'src' to the end of the part of the string 2386 * which matched, and return the number of characters consumed. 2387 * 2388 * If the string doesn't match, throw an error. 2389 * 2390 * 'node' is used only for error reports: node->key->name identifies the 2391 * field type we were searching for. 2392 */ 2393 static int 2394 from_char_seq_search(int *dest, const char **src, const char *const *array, 2395 FormatNode *node) 2396 { 2397 int len; 2398 2399 *dest = seq_search(*src, array, &len); 2400 2401 if (len <= 0) 2402 { 2403 /* 2404 * In the error report, truncate the string at the next whitespace (if 2405 * any) to avoid including irrelevant data. 2406 */ 2407 char *copy = pstrdup(*src); 2408 char *c; 2409 2410 for (c = copy; *c; c++) 2411 { 2412 if (scanner_isspace(*c)) 2413 { 2414 *c = '\0'; 2415 break; 2416 } 2417 } 2418 2419 ereport(ERROR, 2420 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 2421 errmsg("invalid value \"%s\" for \"%s\"", 2422 copy, node->key->name), 2423 errdetail("The given value did not match any of the allowed " 2424 "values for this field."))); 2425 } 2426 *src += len; 2427 return len; 2428 } 2429 2430 /* ---------- 2431 * Process a TmToChar struct as denoted by a list of FormatNodes. 2432 * The formatted data is written to the string pointed to by 'out'. 2433 * ---------- 2434 */ 2435 static void 2436 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid) 2437 { 2438 FormatNode *n; 2439 char *s; 2440 struct pg_tm *tm = &in->tm; 2441 int i; 2442 2443 /* cache localized days and months */ 2444 cache_locale_time(); 2445 2446 s = out; 2447 for (n = node; n->type != NODE_TYPE_END; n++) 2448 { 2449 if (n->type != NODE_TYPE_ACTION) 2450 { 2451 strcpy(s, n->character); 2452 s += strlen(s); 2453 continue; 2454 } 2455 2456 switch (n->key->id) 2457 { 2458 case DCH_A_M: 2459 case DCH_P_M: 2460 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2461 ? P_M_STR : A_M_STR); 2462 s += strlen(s); 2463 break; 2464 case DCH_AM: 2465 case DCH_PM: 2466 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2467 ? PM_STR : AM_STR); 2468 s += strlen(s); 2469 break; 2470 case DCH_a_m: 2471 case DCH_p_m: 2472 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2473 ? p_m_STR : a_m_STR); 2474 s += strlen(s); 2475 break; 2476 case DCH_am: 2477 case DCH_pm: 2478 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2) 2479 ? pm_STR : am_STR); 2480 s += strlen(s); 2481 break; 2482 case DCH_HH: 2483 case DCH_HH12: 2484 2485 /* 2486 * display time as shown on a 12-hour clock, even for 2487 * intervals 2488 */ 2489 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, 2490 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 : 2491 tm->tm_hour % (HOURS_PER_DAY / 2)); 2492 if (S_THth(n->suffix)) 2493 str_numth(s, s, S_TH_TYPE(n->suffix)); 2494 s += strlen(s); 2495 break; 2496 case DCH_HH24: 2497 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3, 2498 tm->tm_hour); 2499 if (S_THth(n->suffix)) 2500 str_numth(s, s, S_TH_TYPE(n->suffix)); 2501 s += strlen(s); 2502 break; 2503 case DCH_MI: 2504 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3, 2505 tm->tm_min); 2506 if (S_THth(n->suffix)) 2507 str_numth(s, s, S_TH_TYPE(n->suffix)); 2508 s += strlen(s); 2509 break; 2510 case DCH_SS: 2511 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3, 2512 tm->tm_sec); 2513 if (S_THth(n->suffix)) 2514 str_numth(s, s, S_TH_TYPE(n->suffix)); 2515 s += strlen(s); 2516 break; 2517 case DCH_MS: /* millisecond */ 2518 sprintf(s, "%03d", (int) (in->fsec / INT64CONST(1000))); 2519 if (S_THth(n->suffix)) 2520 str_numth(s, s, S_TH_TYPE(n->suffix)); 2521 s += strlen(s); 2522 break; 2523 case DCH_US: /* microsecond */ 2524 sprintf(s, "%06d", (int) in->fsec); 2525 if (S_THth(n->suffix)) 2526 str_numth(s, s, S_TH_TYPE(n->suffix)); 2527 s += strlen(s); 2528 break; 2529 case DCH_SSSS: 2530 sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR + 2531 tm->tm_min * SECS_PER_MINUTE + 2532 tm->tm_sec); 2533 if (S_THth(n->suffix)) 2534 str_numth(s, s, S_TH_TYPE(n->suffix)); 2535 s += strlen(s); 2536 break; 2537 case DCH_tz: 2538 INVALID_FOR_INTERVAL; 2539 if (tmtcTzn(in)) 2540 { 2541 /* We assume here that timezone names aren't localized */ 2542 char *p = asc_tolower_z(tmtcTzn(in)); 2543 2544 strcpy(s, p); 2545 pfree(p); 2546 s += strlen(s); 2547 } 2548 break; 2549 case DCH_TZ: 2550 INVALID_FOR_INTERVAL; 2551 if (tmtcTzn(in)) 2552 { 2553 strcpy(s, tmtcTzn(in)); 2554 s += strlen(s); 2555 } 2556 break; 2557 case DCH_TZH: 2558 INVALID_FOR_INTERVAL; 2559 sprintf(s, "%c%02d", 2560 (tm->tm_gmtoff >= 0) ? '+' : '-', 2561 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); 2562 s += strlen(s); 2563 break; 2564 case DCH_TZM: 2565 INVALID_FOR_INTERVAL; 2566 sprintf(s, "%02d", 2567 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); 2568 s += strlen(s); 2569 break; 2570 case DCH_OF: 2571 INVALID_FOR_INTERVAL; 2572 sprintf(s, "%c%0*d", 2573 (tm->tm_gmtoff >= 0) ? '+' : '-', 2574 S_FM(n->suffix) ? 0 : 2, 2575 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR); 2576 s += strlen(s); 2577 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0) 2578 { 2579 sprintf(s, ":%02d", 2580 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE); 2581 s += strlen(s); 2582 } 2583 break; 2584 case DCH_A_D: 2585 case DCH_B_C: 2586 INVALID_FOR_INTERVAL; 2587 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR)); 2588 s += strlen(s); 2589 break; 2590 case DCH_AD: 2591 case DCH_BC: 2592 INVALID_FOR_INTERVAL; 2593 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR)); 2594 s += strlen(s); 2595 break; 2596 case DCH_a_d: 2597 case DCH_b_c: 2598 INVALID_FOR_INTERVAL; 2599 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR)); 2600 s += strlen(s); 2601 break; 2602 case DCH_ad: 2603 case DCH_bc: 2604 INVALID_FOR_INTERVAL; 2605 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR)); 2606 s += strlen(s); 2607 break; 2608 case DCH_MONTH: 2609 INVALID_FOR_INTERVAL; 2610 if (!tm->tm_mon) 2611 break; 2612 if (S_TM(n->suffix)) 2613 { 2614 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid); 2615 2616 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2617 strcpy(s, str); 2618 else 2619 ereport(ERROR, 2620 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2621 errmsg("localized string format value too long"))); 2622 } 2623 else 2624 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2625 asc_toupper_z(months_full[tm->tm_mon - 1])); 2626 s += strlen(s); 2627 break; 2628 case DCH_Month: 2629 INVALID_FOR_INTERVAL; 2630 if (!tm->tm_mon) 2631 break; 2632 if (S_TM(n->suffix)) 2633 { 2634 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid); 2635 2636 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2637 strcpy(s, str); 2638 else 2639 ereport(ERROR, 2640 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2641 errmsg("localized string format value too long"))); 2642 } 2643 else 2644 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2645 months_full[tm->tm_mon - 1]); 2646 s += strlen(s); 2647 break; 2648 case DCH_month: 2649 INVALID_FOR_INTERVAL; 2650 if (!tm->tm_mon) 2651 break; 2652 if (S_TM(n->suffix)) 2653 { 2654 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid); 2655 2656 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2657 strcpy(s, str); 2658 else 2659 ereport(ERROR, 2660 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2661 errmsg("localized string format value too long"))); 2662 } 2663 else 2664 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2665 asc_tolower_z(months_full[tm->tm_mon - 1])); 2666 s += strlen(s); 2667 break; 2668 case DCH_MON: 2669 INVALID_FOR_INTERVAL; 2670 if (!tm->tm_mon) 2671 break; 2672 if (S_TM(n->suffix)) 2673 { 2674 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid); 2675 2676 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2677 strcpy(s, str); 2678 else 2679 ereport(ERROR, 2680 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2681 errmsg("localized string format value too long"))); 2682 } 2683 else 2684 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1])); 2685 s += strlen(s); 2686 break; 2687 case DCH_Mon: 2688 INVALID_FOR_INTERVAL; 2689 if (!tm->tm_mon) 2690 break; 2691 if (S_TM(n->suffix)) 2692 { 2693 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid); 2694 2695 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2696 strcpy(s, str); 2697 else 2698 ereport(ERROR, 2699 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2700 errmsg("localized string format value too long"))); 2701 } 2702 else 2703 strcpy(s, months[tm->tm_mon - 1]); 2704 s += strlen(s); 2705 break; 2706 case DCH_mon: 2707 INVALID_FOR_INTERVAL; 2708 if (!tm->tm_mon) 2709 break; 2710 if (S_TM(n->suffix)) 2711 { 2712 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid); 2713 2714 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2715 strcpy(s, str); 2716 else 2717 ereport(ERROR, 2718 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2719 errmsg("localized string format value too long"))); 2720 } 2721 else 2722 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1])); 2723 s += strlen(s); 2724 break; 2725 case DCH_MM: 2726 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3, 2727 tm->tm_mon); 2728 if (S_THth(n->suffix)) 2729 str_numth(s, s, S_TH_TYPE(n->suffix)); 2730 s += strlen(s); 2731 break; 2732 case DCH_DAY: 2733 INVALID_FOR_INTERVAL; 2734 if (S_TM(n->suffix)) 2735 { 2736 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid); 2737 2738 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2739 strcpy(s, str); 2740 else 2741 ereport(ERROR, 2742 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2743 errmsg("localized string format value too long"))); 2744 } 2745 else 2746 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2747 asc_toupper_z(days[tm->tm_wday])); 2748 s += strlen(s); 2749 break; 2750 case DCH_Day: 2751 INVALID_FOR_INTERVAL; 2752 if (S_TM(n->suffix)) 2753 { 2754 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid); 2755 2756 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2757 strcpy(s, str); 2758 else 2759 ereport(ERROR, 2760 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2761 errmsg("localized string format value too long"))); 2762 } 2763 else 2764 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2765 days[tm->tm_wday]); 2766 s += strlen(s); 2767 break; 2768 case DCH_day: 2769 INVALID_FOR_INTERVAL; 2770 if (S_TM(n->suffix)) 2771 { 2772 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid); 2773 2774 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2775 strcpy(s, str); 2776 else 2777 ereport(ERROR, 2778 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2779 errmsg("localized string format value too long"))); 2780 } 2781 else 2782 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, 2783 asc_tolower_z(days[tm->tm_wday])); 2784 s += strlen(s); 2785 break; 2786 case DCH_DY: 2787 INVALID_FOR_INTERVAL; 2788 if (S_TM(n->suffix)) 2789 { 2790 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid); 2791 2792 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2793 strcpy(s, str); 2794 else 2795 ereport(ERROR, 2796 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2797 errmsg("localized string format value too long"))); 2798 } 2799 else 2800 strcpy(s, asc_toupper_z(days_short[tm->tm_wday])); 2801 s += strlen(s); 2802 break; 2803 case DCH_Dy: 2804 INVALID_FOR_INTERVAL; 2805 if (S_TM(n->suffix)) 2806 { 2807 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid); 2808 2809 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2810 strcpy(s, str); 2811 else 2812 ereport(ERROR, 2813 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2814 errmsg("localized string format value too long"))); 2815 } 2816 else 2817 strcpy(s, days_short[tm->tm_wday]); 2818 s += strlen(s); 2819 break; 2820 case DCH_dy: 2821 INVALID_FOR_INTERVAL; 2822 if (S_TM(n->suffix)) 2823 { 2824 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid); 2825 2826 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ) 2827 strcpy(s, str); 2828 else 2829 ereport(ERROR, 2830 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 2831 errmsg("localized string format value too long"))); 2832 } 2833 else 2834 strcpy(s, asc_tolower_z(days_short[tm->tm_wday])); 2835 s += strlen(s); 2836 break; 2837 case DCH_DDD: 2838 case DCH_IDDD: 2839 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3, 2840 (n->key->id == DCH_DDD) ? 2841 tm->tm_yday : 2842 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday)); 2843 if (S_THth(n->suffix)) 2844 str_numth(s, s, S_TH_TYPE(n->suffix)); 2845 s += strlen(s); 2846 break; 2847 case DCH_DD: 2848 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday); 2849 if (S_THth(n->suffix)) 2850 str_numth(s, s, S_TH_TYPE(n->suffix)); 2851 s += strlen(s); 2852 break; 2853 case DCH_D: 2854 INVALID_FOR_INTERVAL; 2855 sprintf(s, "%d", tm->tm_wday + 1); 2856 if (S_THth(n->suffix)) 2857 str_numth(s, s, S_TH_TYPE(n->suffix)); 2858 s += strlen(s); 2859 break; 2860 case DCH_ID: 2861 INVALID_FOR_INTERVAL; 2862 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday); 2863 if (S_THth(n->suffix)) 2864 str_numth(s, s, S_TH_TYPE(n->suffix)); 2865 s += strlen(s); 2866 break; 2867 case DCH_WW: 2868 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, 2869 (tm->tm_yday - 1) / 7 + 1); 2870 if (S_THth(n->suffix)) 2871 str_numth(s, s, S_TH_TYPE(n->suffix)); 2872 s += strlen(s); 2873 break; 2874 case DCH_IW: 2875 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, 2876 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday)); 2877 if (S_THth(n->suffix)) 2878 str_numth(s, s, S_TH_TYPE(n->suffix)); 2879 s += strlen(s); 2880 break; 2881 case DCH_Q: 2882 if (!tm->tm_mon) 2883 break; 2884 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1); 2885 if (S_THth(n->suffix)) 2886 str_numth(s, s, S_TH_TYPE(n->suffix)); 2887 s += strlen(s); 2888 break; 2889 case DCH_CC: 2890 if (is_interval) /* straight calculation */ 2891 i = tm->tm_year / 100; 2892 else 2893 { 2894 if (tm->tm_year > 0) 2895 /* Century 20 == 1901 - 2000 */ 2896 i = (tm->tm_year - 1) / 100 + 1; 2897 else 2898 /* Century 6BC == 600BC - 501BC */ 2899 i = tm->tm_year / 100 - 1; 2900 } 2901 if (i <= 99 && i >= -99) 2902 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i); 2903 else 2904 sprintf(s, "%d", i); 2905 if (S_THth(n->suffix)) 2906 str_numth(s, s, S_TH_TYPE(n->suffix)); 2907 s += strlen(s); 2908 break; 2909 case DCH_Y_YYY: 2910 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000; 2911 sprintf(s, "%d,%03d", i, 2912 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000)); 2913 if (S_THth(n->suffix)) 2914 str_numth(s, s, S_TH_TYPE(n->suffix)); 2915 s += strlen(s); 2916 break; 2917 case DCH_YYYY: 2918 case DCH_IYYY: 2919 sprintf(s, "%0*d", 2920 S_FM(n->suffix) ? 0 : 2921 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5, 2922 (n->key->id == DCH_YYYY ? 2923 ADJUST_YEAR(tm->tm_year, is_interval) : 2924 ADJUST_YEAR(date2isoyear(tm->tm_year, 2925 tm->tm_mon, 2926 tm->tm_mday), 2927 is_interval))); 2928 if (S_THth(n->suffix)) 2929 str_numth(s, s, S_TH_TYPE(n->suffix)); 2930 s += strlen(s); 2931 break; 2932 case DCH_YYY: 2933 case DCH_IYY: 2934 sprintf(s, "%0*d", 2935 S_FM(n->suffix) ? 0 : 2936 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4, 2937 (n->key->id == DCH_YYY ? 2938 ADJUST_YEAR(tm->tm_year, is_interval) : 2939 ADJUST_YEAR(date2isoyear(tm->tm_year, 2940 tm->tm_mon, 2941 tm->tm_mday), 2942 is_interval)) % 1000); 2943 if (S_THth(n->suffix)) 2944 str_numth(s, s, S_TH_TYPE(n->suffix)); 2945 s += strlen(s); 2946 break; 2947 case DCH_YY: 2948 case DCH_IY: 2949 sprintf(s, "%0*d", 2950 S_FM(n->suffix) ? 0 : 2951 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3, 2952 (n->key->id == DCH_YY ? 2953 ADJUST_YEAR(tm->tm_year, is_interval) : 2954 ADJUST_YEAR(date2isoyear(tm->tm_year, 2955 tm->tm_mon, 2956 tm->tm_mday), 2957 is_interval)) % 100); 2958 if (S_THth(n->suffix)) 2959 str_numth(s, s, S_TH_TYPE(n->suffix)); 2960 s += strlen(s); 2961 break; 2962 case DCH_Y: 2963 case DCH_I: 2964 sprintf(s, "%1d", 2965 (n->key->id == DCH_Y ? 2966 ADJUST_YEAR(tm->tm_year, is_interval) : 2967 ADJUST_YEAR(date2isoyear(tm->tm_year, 2968 tm->tm_mon, 2969 tm->tm_mday), 2970 is_interval)) % 10); 2971 if (S_THth(n->suffix)) 2972 str_numth(s, s, S_TH_TYPE(n->suffix)); 2973 s += strlen(s); 2974 break; 2975 case DCH_RM: 2976 /* FALLTHROUGH */ 2977 case DCH_rm: 2978 2979 /* 2980 * For intervals, values like '12 month' will be reduced to 0 2981 * month and some years. These should be processed. 2982 */ 2983 if (!tm->tm_mon && !tm->tm_year) 2984 break; 2985 else 2986 { 2987 int mon = 0; 2988 const char *const *months; 2989 2990 if (n->key->id == DCH_RM) 2991 months = rm_months_upper; 2992 else 2993 months = rm_months_lower; 2994 2995 /* 2996 * Compute the position in the roman-numeral array. Note 2997 * that the contents of the array are reversed, December 2998 * being first and January last. 2999 */ 3000 if (tm->tm_mon == 0) 3001 { 3002 /* 3003 * This case is special, and tracks the case of full 3004 * interval years. 3005 */ 3006 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1; 3007 } 3008 else if (tm->tm_mon < 0) 3009 { 3010 /* 3011 * Negative case. In this case, the calculation is 3012 * reversed, where -1 means December, -2 November, 3013 * etc. 3014 */ 3015 mon = -1 * (tm->tm_mon + 1); 3016 } 3017 else 3018 { 3019 /* 3020 * Common case, with a strictly positive value. The 3021 * position in the array matches with the value of 3022 * tm_mon. 3023 */ 3024 mon = MONTHS_PER_YEAR - tm->tm_mon; 3025 } 3026 3027 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4, 3028 months[mon]); 3029 s += strlen(s); 3030 } 3031 break; 3032 case DCH_W: 3033 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1); 3034 if (S_THth(n->suffix)) 3035 str_numth(s, s, S_TH_TYPE(n->suffix)); 3036 s += strlen(s); 3037 break; 3038 case DCH_J: 3039 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)); 3040 if (S_THth(n->suffix)) 3041 str_numth(s, s, S_TH_TYPE(n->suffix)); 3042 s += strlen(s); 3043 break; 3044 } 3045 } 3046 3047 *s = '\0'; 3048 } 3049 3050 /* ---------- 3051 * Process a string as denoted by a list of FormatNodes. 3052 * The TmFromChar struct pointed to by 'out' is populated with the results. 3053 * 3054 * Note: we currently don't have any to_interval() function, so there 3055 * is no need here for INVALID_FOR_INTERVAL checks. 3056 * ---------- 3057 */ 3058 static void 3059 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out) 3060 { 3061 FormatNode *n; 3062 const char *s; 3063 int len, 3064 value; 3065 bool fx_mode = false; 3066 3067 /* number of extra skipped characters (more than given in format string) */ 3068 int extra_skip = 0; 3069 3070 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) 3071 { 3072 /* 3073 * Ignore spaces at the beginning of the string and before fields when 3074 * not in FX (fixed width) mode. 3075 */ 3076 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && 3077 (n->type == NODE_TYPE_ACTION || n == node)) 3078 { 3079 while (*s != '\0' && isspace((unsigned char) *s)) 3080 { 3081 s++; 3082 extra_skip++; 3083 } 3084 } 3085 3086 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) 3087 { 3088 if (!fx_mode) 3089 { 3090 /* 3091 * In non FX (fixed format) mode one format string space or 3092 * separator match to one space or separator in input string. 3093 * Or match nothing if there is no space or separator in the 3094 * current position of input string. 3095 */ 3096 extra_skip--; 3097 if (isspace((unsigned char) *s) || is_separator_char(s)) 3098 { 3099 s++; 3100 extra_skip++; 3101 } 3102 } 3103 else 3104 { 3105 /* 3106 * In FX mode, on format string space or separator we consume 3107 * exactly one character from input string. Notice we don't 3108 * insist that the consumed character match the format's 3109 * character. 3110 */ 3111 s += pg_mblen(s); 3112 } 3113 continue; 3114 } 3115 else if (n->type != NODE_TYPE_ACTION) 3116 { 3117 /* 3118 * Text character, so consume one character from input string. 3119 * Notice we don't insist that the consumed character match the 3120 * format's character. 3121 */ 3122 if (!fx_mode) 3123 { 3124 /* 3125 * In non FX mode we might have skipped some extra characters 3126 * (more than specified in format string) before. In this 3127 * case we don't skip input string character, because it might 3128 * be part of field. 3129 */ 3130 if (extra_skip > 0) 3131 extra_skip--; 3132 else 3133 s += pg_mblen(s); 3134 } 3135 else 3136 { 3137 s += pg_mblen(s); 3138 } 3139 continue; 3140 } 3141 3142 from_char_set_mode(out, n->key->date_mode); 3143 3144 switch (n->key->id) 3145 { 3146 case DCH_FX: 3147 fx_mode = true; 3148 break; 3149 case DCH_A_M: 3150 case DCH_P_M: 3151 case DCH_a_m: 3152 case DCH_p_m: 3153 from_char_seq_search(&value, &s, ampm_strings_long, 3154 n); 3155 from_char_set_int(&out->pm, value % 2, n); 3156 out->clock = CLOCK_12_HOUR; 3157 break; 3158 case DCH_AM: 3159 case DCH_PM: 3160 case DCH_am: 3161 case DCH_pm: 3162 from_char_seq_search(&value, &s, ampm_strings, 3163 n); 3164 from_char_set_int(&out->pm, value % 2, n); 3165 out->clock = CLOCK_12_HOUR; 3166 break; 3167 case DCH_HH: 3168 case DCH_HH12: 3169 from_char_parse_int_len(&out->hh, &s, 2, n); 3170 out->clock = CLOCK_12_HOUR; 3171 SKIP_THth(s, n->suffix); 3172 break; 3173 case DCH_HH24: 3174 from_char_parse_int_len(&out->hh, &s, 2, n); 3175 SKIP_THth(s, n->suffix); 3176 break; 3177 case DCH_MI: 3178 from_char_parse_int(&out->mi, &s, n); 3179 SKIP_THth(s, n->suffix); 3180 break; 3181 case DCH_SS: 3182 from_char_parse_int(&out->ss, &s, n); 3183 SKIP_THth(s, n->suffix); 3184 break; 3185 case DCH_MS: /* millisecond */ 3186 len = from_char_parse_int_len(&out->ms, &s, 3, n); 3187 3188 /* 3189 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25 3190 */ 3191 out->ms *= len == 1 ? 100 : 3192 len == 2 ? 10 : 1; 3193 3194 SKIP_THth(s, n->suffix); 3195 break; 3196 case DCH_US: /* microsecond */ 3197 len = from_char_parse_int_len(&out->us, &s, 6, n); 3198 3199 out->us *= len == 1 ? 100000 : 3200 len == 2 ? 10000 : 3201 len == 3 ? 1000 : 3202 len == 4 ? 100 : 3203 len == 5 ? 10 : 1; 3204 3205 SKIP_THth(s, n->suffix); 3206 break; 3207 case DCH_SSSS: 3208 from_char_parse_int(&out->ssss, &s, n); 3209 SKIP_THth(s, n->suffix); 3210 break; 3211 case DCH_tz: 3212 case DCH_TZ: 3213 case DCH_OF: 3214 ereport(ERROR, 3215 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 3216 errmsg("formatting field \"%s\" is only supported in to_char", 3217 n->key->name))); 3218 break; 3219 case DCH_TZH: 3220 3221 /* 3222 * Value of TZH might be negative. And the issue is that we 3223 * might swallow minus sign as the separator. So, if we have 3224 * skipped more characters than specified in the format 3225 * string, then we consider prepending last skipped minus to 3226 * TZH. 3227 */ 3228 if (*s == '+' || *s == '-' || *s == ' ') 3229 { 3230 out->tzsign = *s == '-' ? -1 : +1; 3231 s++; 3232 } 3233 else 3234 { 3235 if (extra_skip > 0 && *(s - 1) == '-') 3236 out->tzsign = -1; 3237 else 3238 out->tzsign = +1; 3239 } 3240 3241 from_char_parse_int_len(&out->tzh, &s, 2, n); 3242 break; 3243 case DCH_TZM: 3244 /* assign positive timezone sign if TZH was not seen before */ 3245 if (!out->tzsign) 3246 out->tzsign = +1; 3247 from_char_parse_int_len(&out->tzm, &s, 2, n); 3248 break; 3249 case DCH_A_D: 3250 case DCH_B_C: 3251 case DCH_a_d: 3252 case DCH_b_c: 3253 from_char_seq_search(&value, &s, adbc_strings_long, 3254 n); 3255 from_char_set_int(&out->bc, value % 2, n); 3256 break; 3257 case DCH_AD: 3258 case DCH_BC: 3259 case DCH_ad: 3260 case DCH_bc: 3261 from_char_seq_search(&value, &s, adbc_strings, 3262 n); 3263 from_char_set_int(&out->bc, value % 2, n); 3264 break; 3265 case DCH_MONTH: 3266 case DCH_Month: 3267 case DCH_month: 3268 from_char_seq_search(&value, &s, months_full, 3269 n); 3270 from_char_set_int(&out->mm, value + 1, n); 3271 break; 3272 case DCH_MON: 3273 case DCH_Mon: 3274 case DCH_mon: 3275 from_char_seq_search(&value, &s, months, 3276 n); 3277 from_char_set_int(&out->mm, value + 1, n); 3278 break; 3279 case DCH_MM: 3280 from_char_parse_int(&out->mm, &s, n); 3281 SKIP_THth(s, n->suffix); 3282 break; 3283 case DCH_DAY: 3284 case DCH_Day: 3285 case DCH_day: 3286 from_char_seq_search(&value, &s, days, 3287 n); 3288 from_char_set_int(&out->d, value, n); 3289 out->d++; 3290 break; 3291 case DCH_DY: 3292 case DCH_Dy: 3293 case DCH_dy: 3294 from_char_seq_search(&value, &s, days_short, 3295 n); 3296 from_char_set_int(&out->d, value, n); 3297 out->d++; 3298 break; 3299 case DCH_DDD: 3300 from_char_parse_int(&out->ddd, &s, n); 3301 SKIP_THth(s, n->suffix); 3302 break; 3303 case DCH_IDDD: 3304 from_char_parse_int_len(&out->ddd, &s, 3, n); 3305 SKIP_THth(s, n->suffix); 3306 break; 3307 case DCH_DD: 3308 from_char_parse_int(&out->dd, &s, n); 3309 SKIP_THth(s, n->suffix); 3310 break; 3311 case DCH_D: 3312 from_char_parse_int(&out->d, &s, n); 3313 SKIP_THth(s, n->suffix); 3314 break; 3315 case DCH_ID: 3316 from_char_parse_int_len(&out->d, &s, 1, n); 3317 /* Shift numbering to match Gregorian where Sunday = 1 */ 3318 if (++out->d > 7) 3319 out->d = 1; 3320 SKIP_THth(s, n->suffix); 3321 break; 3322 case DCH_WW: 3323 case DCH_IW: 3324 from_char_parse_int(&out->ww, &s, n); 3325 SKIP_THth(s, n->suffix); 3326 break; 3327 case DCH_Q: 3328 3329 /* 3330 * We ignore 'Q' when converting to date because it is unclear 3331 * which date in the quarter to use, and some people specify 3332 * both quarter and month, so if it was honored it might 3333 * conflict with the supplied month. That is also why we don't 3334 * throw an error. 3335 * 3336 * We still parse the source string for an integer, but it 3337 * isn't stored anywhere in 'out'. 3338 */ 3339 from_char_parse_int((int *) NULL, &s, n); 3340 SKIP_THth(s, n->suffix); 3341 break; 3342 case DCH_CC: 3343 from_char_parse_int(&out->cc, &s, n); 3344 SKIP_THth(s, n->suffix); 3345 break; 3346 case DCH_Y_YYY: 3347 { 3348 int matched, 3349 years, 3350 millennia, 3351 nch; 3352 3353 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch); 3354 if (matched < 2) 3355 ereport(ERROR, 3356 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3357 errmsg("invalid input string for \"Y,YYY\""))); 3358 years += (millennia * 1000); 3359 from_char_set_int(&out->year, years, n); 3360 out->yysz = 4; 3361 s += nch; 3362 SKIP_THth(s, n->suffix); 3363 } 3364 break; 3365 case DCH_YYYY: 3366 case DCH_IYYY: 3367 from_char_parse_int(&out->year, &s, n); 3368 out->yysz = 4; 3369 SKIP_THth(s, n->suffix); 3370 break; 3371 case DCH_YYY: 3372 case DCH_IYY: 3373 if (from_char_parse_int(&out->year, &s, n) < 4) 3374 out->year = adjust_partial_year_to_2020(out->year); 3375 out->yysz = 3; 3376 SKIP_THth(s, n->suffix); 3377 break; 3378 case DCH_YY: 3379 case DCH_IY: 3380 if (from_char_parse_int(&out->year, &s, n) < 4) 3381 out->year = adjust_partial_year_to_2020(out->year); 3382 out->yysz = 2; 3383 SKIP_THth(s, n->suffix); 3384 break; 3385 case DCH_Y: 3386 case DCH_I: 3387 if (from_char_parse_int(&out->year, &s, n) < 4) 3388 out->year = adjust_partial_year_to_2020(out->year); 3389 out->yysz = 1; 3390 SKIP_THth(s, n->suffix); 3391 break; 3392 case DCH_RM: 3393 case DCH_rm: 3394 from_char_seq_search(&value, &s, rm_months_lower, 3395 n); 3396 from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n); 3397 break; 3398 case DCH_W: 3399 from_char_parse_int(&out->w, &s, n); 3400 SKIP_THth(s, n->suffix); 3401 break; 3402 case DCH_J: 3403 from_char_parse_int(&out->j, &s, n); 3404 SKIP_THth(s, n->suffix); 3405 break; 3406 } 3407 3408 /* Ignore all spaces after fields */ 3409 if (!fx_mode) 3410 { 3411 extra_skip = 0; 3412 while (*s != '\0' && isspace((unsigned char) *s)) 3413 { 3414 s++; 3415 extra_skip++; 3416 } 3417 } 3418 } 3419 } 3420 3421 /* 3422 * The invariant for DCH cache entry management is that DCHCounter is equal 3423 * to the maximum age value among the existing entries, and we increment it 3424 * whenever an access occurs. If we approach overflow, deal with that by 3425 * halving all the age values, so that we retain a fairly accurate idea of 3426 * which entries are oldest. 3427 */ 3428 static inline void 3429 DCH_prevent_counter_overflow(void) 3430 { 3431 if (DCHCounter >= (INT_MAX - 1)) 3432 { 3433 for (int i = 0; i < n_DCHCache; i++) 3434 DCHCache[i]->age >>= 1; 3435 DCHCounter >>= 1; 3436 } 3437 } 3438 3439 /* select a DCHCacheEntry to hold the given format picture */ 3440 static DCHCacheEntry * 3441 DCH_cache_getnew(const char *str) 3442 { 3443 DCHCacheEntry *ent; 3444 3445 /* Ensure we can advance DCHCounter below */ 3446 DCH_prevent_counter_overflow(); 3447 3448 /* 3449 * If cache is full, remove oldest entry (or recycle first not-valid one) 3450 */ 3451 if (n_DCHCache >= DCH_CACHE_ENTRIES) 3452 { 3453 DCHCacheEntry *old = DCHCache[0]; 3454 3455 #ifdef DEBUG_TO_FROM_CHAR 3456 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache); 3457 #endif 3458 if (old->valid) 3459 { 3460 for (int i = 1; i < DCH_CACHE_ENTRIES; i++) 3461 { 3462 ent = DCHCache[i]; 3463 if (!ent->valid) 3464 { 3465 old = ent; 3466 break; 3467 } 3468 if (ent->age < old->age) 3469 old = ent; 3470 } 3471 } 3472 #ifdef DEBUG_TO_FROM_CHAR 3473 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age); 3474 #endif 3475 old->valid = false; 3476 StrNCpy(old->str, str, DCH_CACHE_SIZE + 1); 3477 old->age = (++DCHCounter); 3478 /* caller is expected to fill format, then set valid */ 3479 return old; 3480 } 3481 else 3482 { 3483 #ifdef DEBUG_TO_FROM_CHAR 3484 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache); 3485 #endif 3486 Assert(DCHCache[n_DCHCache] == NULL); 3487 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *) 3488 MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry)); 3489 ent->valid = false; 3490 StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1); 3491 ent->age = (++DCHCounter); 3492 /* caller is expected to fill format, then set valid */ 3493 ++n_DCHCache; 3494 return ent; 3495 } 3496 } 3497 3498 /* look for an existing DCHCacheEntry matching the given format picture */ 3499 static DCHCacheEntry * 3500 DCH_cache_search(const char *str) 3501 { 3502 /* Ensure we can advance DCHCounter below */ 3503 DCH_prevent_counter_overflow(); 3504 3505 for (int i = 0; i < n_DCHCache; i++) 3506 { 3507 DCHCacheEntry *ent = DCHCache[i]; 3508 3509 if (ent->valid && strcmp(ent->str, str) == 0) 3510 { 3511 ent->age = (++DCHCounter); 3512 return ent; 3513 } 3514 } 3515 3516 return NULL; 3517 } 3518 3519 /* Find or create a DCHCacheEntry for the given format picture */ 3520 static DCHCacheEntry * 3521 DCH_cache_fetch(const char *str) 3522 { 3523 DCHCacheEntry *ent; 3524 3525 if ((ent = DCH_cache_search(str)) == NULL) 3526 { 3527 /* 3528 * Not in the cache, must run parser and save a new format-picture to 3529 * the cache. Do not mark the cache entry valid until parsing 3530 * succeeds. 3531 */ 3532 ent = DCH_cache_getnew(str); 3533 3534 parse_format(ent->format, str, DCH_keywords, 3535 DCH_suff, DCH_index, DCH_TYPE, NULL); 3536 3537 ent->valid = true; 3538 } 3539 return ent; 3540 } 3541 3542 /* 3543 * Format a date/time or interval into a string according to fmt. 3544 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char 3545 * for formatting. 3546 */ 3547 static text * 3548 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) 3549 { 3550 FormatNode *format; 3551 char *fmt_str, 3552 *result; 3553 bool incache; 3554 int fmt_len; 3555 text *res; 3556 3557 /* 3558 * Convert fmt to C string 3559 */ 3560 fmt_str = text_to_cstring(fmt); 3561 fmt_len = strlen(fmt_str); 3562 3563 /* 3564 * Allocate workspace for result as C string 3565 */ 3566 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1); 3567 *result = '\0'; 3568 3569 if (fmt_len > DCH_CACHE_SIZE) 3570 { 3571 /* 3572 * Allocate new memory if format picture is bigger than static cache 3573 * and do not use cache (call parser always) 3574 */ 3575 incache = false; 3576 3577 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); 3578 3579 parse_format(format, fmt_str, DCH_keywords, 3580 DCH_suff, DCH_index, DCH_TYPE, NULL); 3581 } 3582 else 3583 { 3584 /* 3585 * Use cache buffers 3586 */ 3587 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str); 3588 3589 incache = true; 3590 format = ent->format; 3591 } 3592 3593 /* The real work is here */ 3594 DCH_to_char(format, is_interval, tmtc, result, collid); 3595 3596 if (!incache) 3597 pfree(format); 3598 3599 pfree(fmt_str); 3600 3601 /* convert C-string result to TEXT format */ 3602 res = cstring_to_text(result); 3603 3604 pfree(result); 3605 return res; 3606 } 3607 3608 /**************************************************************************** 3609 * Public routines 3610 ***************************************************************************/ 3611 3612 /* ------------------- 3613 * TIMESTAMP to_char() 3614 * ------------------- 3615 */ 3616 Datum 3617 timestamp_to_char(PG_FUNCTION_ARGS) 3618 { 3619 Timestamp dt = PG_GETARG_TIMESTAMP(0); 3620 text *fmt = PG_GETARG_TEXT_PP(1), 3621 *res; 3622 TmToChar tmtc; 3623 struct pg_tm *tm; 3624 int thisdate; 3625 3626 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) 3627 PG_RETURN_NULL(); 3628 3629 ZERO_tmtc(&tmtc); 3630 tm = tmtcTm(&tmtc); 3631 3632 if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0) 3633 ereport(ERROR, 3634 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3635 errmsg("timestamp out of range"))); 3636 3637 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); 3638 tm->tm_wday = (thisdate + 1) % 7; 3639 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; 3640 3641 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) 3642 PG_RETURN_NULL(); 3643 3644 PG_RETURN_TEXT_P(res); 3645 } 3646 3647 Datum 3648 timestamptz_to_char(PG_FUNCTION_ARGS) 3649 { 3650 TimestampTz dt = PG_GETARG_TIMESTAMP(0); 3651 text *fmt = PG_GETARG_TEXT_PP(1), 3652 *res; 3653 TmToChar tmtc; 3654 int tz; 3655 struct pg_tm *tm; 3656 int thisdate; 3657 3658 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt)) 3659 PG_RETURN_NULL(); 3660 3661 ZERO_tmtc(&tmtc); 3662 tm = tmtcTm(&tmtc); 3663 3664 if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0) 3665 ereport(ERROR, 3666 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3667 errmsg("timestamp out of range"))); 3668 3669 thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday); 3670 tm->tm_wday = (thisdate + 1) % 7; 3671 tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; 3672 3673 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) 3674 PG_RETURN_NULL(); 3675 3676 PG_RETURN_TEXT_P(res); 3677 } 3678 3679 3680 /* ------------------- 3681 * INTERVAL to_char() 3682 * ------------------- 3683 */ 3684 Datum 3685 interval_to_char(PG_FUNCTION_ARGS) 3686 { 3687 Interval *it = PG_GETARG_INTERVAL_P(0); 3688 text *fmt = PG_GETARG_TEXT_PP(1), 3689 *res; 3690 TmToChar tmtc; 3691 struct pg_tm *tm; 3692 3693 if (VARSIZE_ANY_EXHDR(fmt) <= 0) 3694 PG_RETURN_NULL(); 3695 3696 ZERO_tmtc(&tmtc); 3697 tm = tmtcTm(&tmtc); 3698 3699 if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0) 3700 PG_RETURN_NULL(); 3701 3702 /* wday is meaningless, yday approximates the total span in days */ 3703 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday; 3704 3705 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION()))) 3706 PG_RETURN_NULL(); 3707 3708 PG_RETURN_TEXT_P(res); 3709 } 3710 3711 /* --------------------- 3712 * TO_TIMESTAMP() 3713 * 3714 * Make Timestamp from date_str which is formatted at argument 'fmt' 3715 * ( to_timestamp is reverse to_char() ) 3716 * --------------------- 3717 */ 3718 Datum 3719 to_timestamp(PG_FUNCTION_ARGS) 3720 { 3721 text *date_txt = PG_GETARG_TEXT_PP(0); 3722 text *fmt = PG_GETARG_TEXT_PP(1); 3723 Timestamp result; 3724 int tz; 3725 struct pg_tm tm; 3726 fsec_t fsec; 3727 3728 do_to_timestamp(date_txt, fmt, &tm, &fsec); 3729 3730 /* Use the specified time zone, if any. */ 3731 if (tm.tm_zone) 3732 { 3733 int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz); 3734 3735 if (dterr) 3736 DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz"); 3737 } 3738 else 3739 tz = DetermineTimeZoneOffset(&tm, session_timezone); 3740 3741 if (tm2timestamp(&tm, fsec, &tz, &result) != 0) 3742 ereport(ERROR, 3743 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3744 errmsg("timestamp out of range"))); 3745 3746 PG_RETURN_TIMESTAMP(result); 3747 } 3748 3749 /* ---------- 3750 * TO_DATE 3751 * Make Date from date_str which is formatted at argument 'fmt' 3752 * ---------- 3753 */ 3754 Datum 3755 to_date(PG_FUNCTION_ARGS) 3756 { 3757 text *date_txt = PG_GETARG_TEXT_PP(0); 3758 text *fmt = PG_GETARG_TEXT_PP(1); 3759 DateADT result; 3760 struct pg_tm tm; 3761 fsec_t fsec; 3762 3763 do_to_timestamp(date_txt, fmt, &tm, &fsec); 3764 3765 /* Prevent overflow in Julian-day routines */ 3766 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) 3767 ereport(ERROR, 3768 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3769 errmsg("date out of range: \"%s\"", 3770 text_to_cstring(date_txt)))); 3771 3772 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE; 3773 3774 /* Now check for just-out-of-range dates */ 3775 if (!IS_VALID_DATE(result)) 3776 ereport(ERROR, 3777 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), 3778 errmsg("date out of range: \"%s\"", 3779 text_to_cstring(date_txt)))); 3780 3781 PG_RETURN_DATEADT(result); 3782 } 3783 3784 /* 3785 * do_to_timestamp: shared code for to_timestamp and to_date 3786 * 3787 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm 3788 * and fractional seconds. 3789 * 3790 * We parse 'fmt' into a list of FormatNodes, which is then passed to 3791 * DCH_from_char to populate a TmFromChar with the parsed contents of 3792 * 'date_txt'. 3793 * 3794 * The TmFromChar is then analysed and converted into the final results in 3795 * struct 'tm' and 'fsec'. 3796 */ 3797 static void 3798 do_to_timestamp(text *date_txt, text *fmt, 3799 struct pg_tm *tm, fsec_t *fsec) 3800 { 3801 FormatNode *format; 3802 TmFromChar tmfc; 3803 int fmt_len; 3804 char *date_str; 3805 int fmask; 3806 3807 date_str = text_to_cstring(date_txt); 3808 3809 ZERO_tmfc(&tmfc); 3810 ZERO_tm(tm); 3811 *fsec = 0; 3812 fmask = 0; /* bit mask for ValidateDate() */ 3813 3814 fmt_len = VARSIZE_ANY_EXHDR(fmt); 3815 3816 if (fmt_len) 3817 { 3818 char *fmt_str; 3819 bool incache; 3820 3821 fmt_str = text_to_cstring(fmt); 3822 3823 if (fmt_len > DCH_CACHE_SIZE) 3824 { 3825 /* 3826 * Allocate new memory if format picture is bigger than static 3827 * cache and do not use cache (call parser always) 3828 */ 3829 incache = false; 3830 3831 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); 3832 3833 parse_format(format, fmt_str, DCH_keywords, 3834 DCH_suff, DCH_index, DCH_TYPE, NULL); 3835 } 3836 else 3837 { 3838 /* 3839 * Use cache buffers 3840 */ 3841 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str); 3842 3843 incache = true; 3844 format = ent->format; 3845 } 3846 3847 #ifdef DEBUG_TO_FROM_CHAR 3848 /* dump_node(format, fmt_len); */ 3849 /* dump_index(DCH_keywords, DCH_index); */ 3850 #endif 3851 3852 DCH_from_char(format, date_str, &tmfc); 3853 3854 pfree(fmt_str); 3855 if (!incache) 3856 pfree(format); 3857 } 3858 3859 DEBUG_TMFC(&tmfc); 3860 3861 /* 3862 * Convert to_date/to_timestamp input fields to standard 'tm' 3863 */ 3864 if (tmfc.ssss) 3865 { 3866 int x = tmfc.ssss; 3867 3868 tm->tm_hour = x / SECS_PER_HOUR; 3869 x %= SECS_PER_HOUR; 3870 tm->tm_min = x / SECS_PER_MINUTE; 3871 x %= SECS_PER_MINUTE; 3872 tm->tm_sec = x; 3873 } 3874 3875 if (tmfc.ss) 3876 tm->tm_sec = tmfc.ss; 3877 if (tmfc.mi) 3878 tm->tm_min = tmfc.mi; 3879 if (tmfc.hh) 3880 tm->tm_hour = tmfc.hh; 3881 3882 if (tmfc.clock == CLOCK_12_HOUR) 3883 { 3884 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2) 3885 ereport(ERROR, 3886 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3887 errmsg("hour \"%d\" is invalid for the 12-hour clock", 3888 tm->tm_hour), 3889 errhint("Use the 24-hour clock, or give an hour between 1 and 12."))); 3890 3891 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2) 3892 tm->tm_hour += HOURS_PER_DAY / 2; 3893 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2) 3894 tm->tm_hour = 0; 3895 } 3896 3897 if (tmfc.year) 3898 { 3899 /* 3900 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for 3901 * the year in the given century. Keep in mind that the 21st century 3902 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from 3903 * 600BC to 501BC. 3904 */ 3905 if (tmfc.cc && tmfc.yysz <= 2) 3906 { 3907 if (tmfc.bc) 3908 tmfc.cc = -tmfc.cc; 3909 tm->tm_year = tmfc.year % 100; 3910 if (tm->tm_year) 3911 { 3912 if (tmfc.cc >= 0) 3913 tm->tm_year += (tmfc.cc - 1) * 100; 3914 else 3915 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1; 3916 } 3917 else 3918 { 3919 /* find century year for dates ending in "00" */ 3920 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1); 3921 } 3922 } 3923 else 3924 { 3925 /* If a 4-digit year is provided, we use that and ignore CC. */ 3926 tm->tm_year = tmfc.year; 3927 if (tmfc.bc) 3928 tm->tm_year = -tm->tm_year; 3929 /* correct for our representation of BC years */ 3930 if (tm->tm_year < 0) 3931 tm->tm_year++; 3932 } 3933 fmask |= DTK_M(YEAR); 3934 } 3935 else if (tmfc.cc) 3936 { 3937 /* use first year of century */ 3938 if (tmfc.bc) 3939 tmfc.cc = -tmfc.cc; 3940 if (tmfc.cc >= 0) 3941 /* +1 because 21st century started in 2001 */ 3942 tm->tm_year = (tmfc.cc - 1) * 100 + 1; 3943 else 3944 /* +1 because year == 599 is 600 BC */ 3945 tm->tm_year = tmfc.cc * 100 + 1; 3946 fmask |= DTK_M(YEAR); 3947 } 3948 3949 if (tmfc.j) 3950 { 3951 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 3952 fmask |= DTK_DATE_M; 3953 } 3954 3955 if (tmfc.ww) 3956 { 3957 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) 3958 { 3959 /* 3960 * If tmfc.d is not set, then the date is left at the beginning of 3961 * the ISO week (Monday). 3962 */ 3963 if (tmfc.d) 3964 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 3965 else 3966 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 3967 fmask |= DTK_DATE_M; 3968 } 3969 else 3970 tmfc.ddd = (tmfc.ww - 1) * 7 + 1; 3971 } 3972 3973 if (tmfc.w) 3974 tmfc.dd = (tmfc.w - 1) * 7 + 1; 3975 if (tmfc.dd) 3976 { 3977 tm->tm_mday = tmfc.dd; 3978 fmask |= DTK_M(DAY); 3979 } 3980 if (tmfc.mm) 3981 { 3982 tm->tm_mon = tmfc.mm; 3983 fmask |= DTK_M(MONTH); 3984 } 3985 3986 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1)) 3987 { 3988 /* 3989 * The month and day field have not been set, so we use the 3990 * day-of-year field to populate them. Depending on the date mode, 3991 * this field may be interpreted as a Gregorian day-of-year, or an ISO 3992 * week date day-of-year. 3993 */ 3994 3995 if (!tm->tm_year && !tmfc.bc) 3996 ereport(ERROR, 3997 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), 3998 errmsg("cannot calculate day of year without year information"))); 3999 4000 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK) 4001 { 4002 int j0; /* zeroth day of the ISO year, in Julian */ 4003 4004 j0 = isoweek2j(tm->tm_year, 1) - 1; 4005 4006 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); 4007 fmask |= DTK_DATE_M; 4008 } 4009 else 4010 { 4011 const int *y; 4012 int i; 4013 4014 static const int ysum[2][13] = { 4015 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, 4016 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; 4017 4018 y = ysum[isleap(tm->tm_year)]; 4019 4020 for (i = 1; i <= MONTHS_PER_YEAR; i++) 4021 { 4022 if (tmfc.ddd <= y[i]) 4023 break; 4024 } 4025 if (tm->tm_mon <= 1) 4026 tm->tm_mon = i; 4027 4028 if (tm->tm_mday <= 1) 4029 tm->tm_mday = tmfc.ddd - y[i - 1]; 4030 4031 fmask |= DTK_M(MONTH) | DTK_M(DAY); 4032 } 4033 } 4034 4035 if (tmfc.ms) 4036 *fsec += tmfc.ms * 1000; 4037 if (tmfc.us) 4038 *fsec += tmfc.us; 4039 4040 /* Range-check date fields according to bit mask computed above */ 4041 if (fmask != 0) 4042 { 4043 /* We already dealt with AD/BC, so pass isjulian = true */ 4044 int dterr = ValidateDate(fmask, true, false, false, tm); 4045 4046 if (dterr != 0) 4047 { 4048 /* 4049 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate 4050 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an 4051 * irrelevant hint about datestyle. 4052 */ 4053 DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"); 4054 } 4055 } 4056 4057 /* Range-check time fields too */ 4058 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY || 4059 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR || 4060 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE || 4061 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC) 4062 DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"); 4063 4064 /* Save parsed time-zone into tm->tm_zone if it was specified */ 4065 if (tmfc.tzsign) 4066 { 4067 char *tz; 4068 4069 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR || 4070 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR) 4071 DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"); 4072 4073 tz = psprintf("%c%02d:%02d", 4074 tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm); 4075 4076 tm->tm_zone = tz; 4077 } 4078 4079 DEBUG_TM(tm); 4080 4081 pfree(date_str); 4082 } 4083 4084 4085 /********************************************************************** 4086 * the NUMBER version part 4087 *********************************************************************/ 4088 4089 4090 static char * 4091 fill_str(char *str, int c, int max) 4092 { 4093 memset(str, c, max); 4094 *(str + max) = '\0'; 4095 return str; 4096 } 4097 4098 #define zeroize_NUM(_n) \ 4099 do { \ 4100 (_n)->flag = 0; \ 4101 (_n)->lsign = 0; \ 4102 (_n)->pre = 0; \ 4103 (_n)->post = 0; \ 4104 (_n)->pre_lsign_num = 0; \ 4105 (_n)->need_locale = 0; \ 4106 (_n)->multi = 0; \ 4107 (_n)->zero_start = 0; \ 4108 (_n)->zero_end = 0; \ 4109 } while(0) 4110 4111 /* This works the same as DCH_prevent_counter_overflow */ 4112 static inline void 4113 NUM_prevent_counter_overflow(void) 4114 { 4115 if (NUMCounter >= (INT_MAX - 1)) 4116 { 4117 for (int i = 0; i < n_NUMCache; i++) 4118 NUMCache[i]->age >>= 1; 4119 NUMCounter >>= 1; 4120 } 4121 } 4122 4123 /* select a NUMCacheEntry to hold the given format picture */ 4124 static NUMCacheEntry * 4125 NUM_cache_getnew(const char *str) 4126 { 4127 NUMCacheEntry *ent; 4128 4129 /* Ensure we can advance NUMCounter below */ 4130 NUM_prevent_counter_overflow(); 4131 4132 /* 4133 * If cache is full, remove oldest entry (or recycle first not-valid one) 4134 */ 4135 if (n_NUMCache >= NUM_CACHE_ENTRIES) 4136 { 4137 NUMCacheEntry *old = NUMCache[0]; 4138 4139 #ifdef DEBUG_TO_FROM_CHAR 4140 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache); 4141 #endif 4142 if (old->valid) 4143 { 4144 for (int i = 1; i < NUM_CACHE_ENTRIES; i++) 4145 { 4146 ent = NUMCache[i]; 4147 if (!ent->valid) 4148 { 4149 old = ent; 4150 break; 4151 } 4152 if (ent->age < old->age) 4153 old = ent; 4154 } 4155 } 4156 #ifdef DEBUG_TO_FROM_CHAR 4157 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age); 4158 #endif 4159 old->valid = false; 4160 StrNCpy(old->str, str, NUM_CACHE_SIZE + 1); 4161 old->age = (++NUMCounter); 4162 /* caller is expected to fill format and Num, then set valid */ 4163 return old; 4164 } 4165 else 4166 { 4167 #ifdef DEBUG_TO_FROM_CHAR 4168 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache); 4169 #endif 4170 Assert(NUMCache[n_NUMCache] == NULL); 4171 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *) 4172 MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry)); 4173 ent->valid = false; 4174 StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1); 4175 ent->age = (++NUMCounter); 4176 /* caller is expected to fill format and Num, then set valid */ 4177 ++n_NUMCache; 4178 return ent; 4179 } 4180 } 4181 4182 /* look for an existing NUMCacheEntry matching the given format picture */ 4183 static NUMCacheEntry * 4184 NUM_cache_search(const char *str) 4185 { 4186 /* Ensure we can advance NUMCounter below */ 4187 NUM_prevent_counter_overflow(); 4188 4189 for (int i = 0; i < n_NUMCache; i++) 4190 { 4191 NUMCacheEntry *ent = NUMCache[i]; 4192 4193 if (ent->valid && strcmp(ent->str, str) == 0) 4194 { 4195 ent->age = (++NUMCounter); 4196 return ent; 4197 } 4198 } 4199 4200 return NULL; 4201 } 4202 4203 /* Find or create a NUMCacheEntry for the given format picture */ 4204 static NUMCacheEntry * 4205 NUM_cache_fetch(const char *str) 4206 { 4207 NUMCacheEntry *ent; 4208 4209 if ((ent = NUM_cache_search(str)) == NULL) 4210 { 4211 /* 4212 * Not in the cache, must run parser and save a new format-picture to 4213 * the cache. Do not mark the cache entry valid until parsing 4214 * succeeds. 4215 */ 4216 ent = NUM_cache_getnew(str); 4217 4218 zeroize_NUM(&ent->Num); 4219 4220 parse_format(ent->format, str, NUM_keywords, 4221 NULL, NUM_index, NUM_TYPE, &ent->Num); 4222 4223 ent->valid = true; 4224 } 4225 return ent; 4226 } 4227 4228 /* ---------- 4229 * Cache routine for NUM to_char version 4230 * ---------- 4231 */ 4232 static FormatNode * 4233 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) 4234 { 4235 FormatNode *format = NULL; 4236 char *str; 4237 4238 str = text_to_cstring(pars_str); 4239 4240 if (len > NUM_CACHE_SIZE) 4241 { 4242 /* 4243 * Allocate new memory if format picture is bigger than static cache 4244 * and do not use cache (call parser always) 4245 */ 4246 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode)); 4247 4248 *shouldFree = true; 4249 4250 zeroize_NUM(Num); 4251 4252 parse_format(format, str, NUM_keywords, 4253 NULL, NUM_index, NUM_TYPE, Num); 4254 } 4255 else 4256 { 4257 /* 4258 * Use cache buffers 4259 */ 4260 NUMCacheEntry *ent = NUM_cache_fetch(str); 4261 4262 *shouldFree = false; 4263 4264 format = ent->format; 4265 4266 /* 4267 * Copy cache to used struct 4268 */ 4269 Num->flag = ent->Num.flag; 4270 Num->lsign = ent->Num.lsign; 4271 Num->pre = ent->Num.pre; 4272 Num->post = ent->Num.post; 4273 Num->pre_lsign_num = ent->Num.pre_lsign_num; 4274 Num->need_locale = ent->Num.need_locale; 4275 Num->multi = ent->Num.multi; 4276 Num->zero_start = ent->Num.zero_start; 4277 Num->zero_end = ent->Num.zero_end; 4278 } 4279 4280 #ifdef DEBUG_TO_FROM_CHAR 4281 /* dump_node(format, len); */ 4282 dump_index(NUM_keywords, NUM_index); 4283 #endif 4284 4285 pfree(str); 4286 return format; 4287 } 4288 4289 4290 static char * 4291 int_to_roman(int number) 4292 { 4293 int len = 0, 4294 num = 0; 4295 char *p = NULL, 4296 *result, 4297 numstr[12]; 4298 4299 result = (char *) palloc(16); 4300 *result = '\0'; 4301 4302 if (number > 3999 || number < 1) 4303 { 4304 fill_str(result, '#', 15); 4305 return result; 4306 } 4307 len = snprintf(numstr, sizeof(numstr), "%d", number); 4308 4309 for (p = numstr; *p != '\0'; p++, --len) 4310 { 4311 num = *p - 49; /* 48 ascii + 1 */ 4312 if (num < 0) 4313 continue; 4314 4315 if (len > 3) 4316 { 4317 while (num-- != -1) 4318 strcat(result, "M"); 4319 } 4320 else 4321 { 4322 if (len == 3) 4323 strcat(result, rm100[num]); 4324 else if (len == 2) 4325 strcat(result, rm10[num]); 4326 else if (len == 1) 4327 strcat(result, rm1[num]); 4328 } 4329 } 4330 return result; 4331 } 4332 4333 4334 4335 /* ---------- 4336 * Locale 4337 * ---------- 4338 */ 4339 static void 4340 NUM_prepare_locale(NUMProc *Np) 4341 { 4342 if (Np->Num->need_locale) 4343 { 4344 struct lconv *lconv; 4345 4346 /* 4347 * Get locales 4348 */ 4349 lconv = PGLC_localeconv(); 4350 4351 /* 4352 * Positive / Negative number sign 4353 */ 4354 if (lconv->negative_sign && *lconv->negative_sign) 4355 Np->L_negative_sign = lconv->negative_sign; 4356 else 4357 Np->L_negative_sign = "-"; 4358 4359 if (lconv->positive_sign && *lconv->positive_sign) 4360 Np->L_positive_sign = lconv->positive_sign; 4361 else 4362 Np->L_positive_sign = "+"; 4363 4364 /* 4365 * Number decimal point 4366 */ 4367 if (lconv->decimal_point && *lconv->decimal_point) 4368 Np->decimal = lconv->decimal_point; 4369 4370 else 4371 Np->decimal = "."; 4372 4373 if (!IS_LDECIMAL(Np->Num)) 4374 Np->decimal = "."; 4375 4376 /* 4377 * Number thousands separator 4378 * 4379 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal, 4380 * but "" for thousands_sep, so we set the thousands_sep too. 4381 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php 4382 */ 4383 if (lconv->thousands_sep && *lconv->thousands_sep) 4384 Np->L_thousands_sep = lconv->thousands_sep; 4385 /* Make sure thousands separator doesn't match decimal point symbol. */ 4386 else if (strcmp(Np->decimal, ",") !=0) 4387 Np->L_thousands_sep = ","; 4388 else 4389 Np->L_thousands_sep = "."; 4390 4391 /* 4392 * Currency symbol 4393 */ 4394 if (lconv->currency_symbol && *lconv->currency_symbol) 4395 Np->L_currency_symbol = lconv->currency_symbol; 4396 else 4397 Np->L_currency_symbol = " "; 4398 } 4399 else 4400 { 4401 /* 4402 * Default values 4403 */ 4404 Np->L_negative_sign = "-"; 4405 Np->L_positive_sign = "+"; 4406 Np->decimal = "."; 4407 4408 Np->L_thousands_sep = ","; 4409 Np->L_currency_symbol = " "; 4410 } 4411 } 4412 4413 /* ---------- 4414 * Return pointer of last relevant number after decimal point 4415 * 12.0500 --> last relevant is '5' 4416 * 12.0000 --> last relevant is '.' 4417 * If there is no decimal point, return NULL (which will result in same 4418 * behavior as if FM hadn't been specified). 4419 * ---------- 4420 */ 4421 static char * 4422 get_last_relevant_decnum(char *num) 4423 { 4424 char *result, 4425 *p = strchr(num, '.'); 4426 4427 #ifdef DEBUG_TO_FROM_CHAR 4428 elog(DEBUG_elog_output, "get_last_relevant_decnum()"); 4429 #endif 4430 4431 if (!p) 4432 return NULL; 4433 4434 result = p; 4435 4436 while (*(++p)) 4437 { 4438 if (*p != '0') 4439 result = p; 4440 } 4441 4442 return result; 4443 } 4444 4445 /* 4446 * These macros are used in NUM_processor() and its subsidiary routines. 4447 * OVERLOAD_TEST: true if we've reached end of input string 4448 * AMOUNT_TEST(s): true if at least s bytes remain in string 4449 */ 4450 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len) 4451 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s))) 4452 4453 /* ---------- 4454 * Number extraction for TO_NUMBER() 4455 * ---------- 4456 */ 4457 static void 4458 NUM_numpart_from_char(NUMProc *Np, int id, int input_len) 4459 { 4460 bool isread = false; 4461 4462 #ifdef DEBUG_TO_FROM_CHAR 4463 elog(DEBUG_elog_output, " --- scan start --- id=%s", 4464 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???"); 4465 #endif 4466 4467 if (OVERLOAD_TEST) 4468 return; 4469 4470 if (*Np->inout_p == ' ') 4471 Np->inout_p++; 4472 4473 if (OVERLOAD_TEST) 4474 return; 4475 4476 /* 4477 * read sign before number 4478 */ 4479 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) && 4480 (Np->read_pre + Np->read_post) == 0) 4481 { 4482 #ifdef DEBUG_TO_FROM_CHAR 4483 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s", 4484 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign); 4485 #endif 4486 4487 /* 4488 * locale sign 4489 */ 4490 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE) 4491 { 4492 int x = 0; 4493 4494 #ifdef DEBUG_TO_FROM_CHAR 4495 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p); 4496 #endif 4497 if ((x = strlen(Np->L_negative_sign)) && 4498 AMOUNT_TEST(x) && 4499 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) 4500 { 4501 Np->inout_p += x; 4502 *Np->number = '-'; 4503 } 4504 else if ((x = strlen(Np->L_positive_sign)) && 4505 AMOUNT_TEST(x) && 4506 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) 4507 { 4508 Np->inout_p += x; 4509 *Np->number = '+'; 4510 } 4511 } 4512 else 4513 { 4514 #ifdef DEBUG_TO_FROM_CHAR 4515 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p); 4516 #endif 4517 4518 /* 4519 * simple + - < > 4520 */ 4521 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) && 4522 *Np->inout_p == '<')) 4523 { 4524 *Np->number = '-'; /* set - */ 4525 Np->inout_p++; 4526 } 4527 else if (*Np->inout_p == '+') 4528 { 4529 *Np->number = '+'; /* set + */ 4530 Np->inout_p++; 4531 } 4532 } 4533 } 4534 4535 if (OVERLOAD_TEST) 4536 return; 4537 4538 #ifdef DEBUG_TO_FROM_CHAR 4539 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number); 4540 #endif 4541 4542 /* 4543 * read digit or decimal point 4544 */ 4545 if (isdigit((unsigned char) *Np->inout_p)) 4546 { 4547 if (Np->read_dec && Np->read_post == Np->Num->post) 4548 return; 4549 4550 *Np->number_p = *Np->inout_p; 4551 Np->number_p++; 4552 4553 if (Np->read_dec) 4554 Np->read_post++; 4555 else 4556 Np->read_pre++; 4557 4558 isread = true; 4559 4560 #ifdef DEBUG_TO_FROM_CHAR 4561 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p); 4562 #endif 4563 } 4564 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false) 4565 { 4566 /* 4567 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because 4568 * Np->decimal is always just "." if we don't have a D format token. 4569 * So we just unconditionally match to Np->decimal. 4570 */ 4571 int x = strlen(Np->decimal); 4572 4573 #ifdef DEBUG_TO_FROM_CHAR 4574 elog(DEBUG_elog_output, "Try read decimal point (%c)", 4575 *Np->inout_p); 4576 #endif 4577 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0) 4578 { 4579 Np->inout_p += x - 1; 4580 *Np->number_p = '.'; 4581 Np->number_p++; 4582 Np->read_dec = true; 4583 isread = true; 4584 } 4585 } 4586 4587 if (OVERLOAD_TEST) 4588 return; 4589 4590 /* 4591 * Read sign behind "last" number 4592 * 4593 * We need sign detection because determine exact position of post-sign is 4594 * difficult: 4595 * 4596 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI -> 4597 * 5.01- 4598 */ 4599 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0) 4600 { 4601 /* 4602 * locale sign (NUM_S) is always anchored behind a last number, if: - 4603 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and 4604 * next char is not digit 4605 */ 4606 if (IS_LSIGN(Np->Num) && isread && 4607 (Np->inout_p + 1) < Np->inout + input_len && 4608 !isdigit((unsigned char) *(Np->inout_p + 1))) 4609 { 4610 int x; 4611 char *tmp = Np->inout_p++; 4612 4613 #ifdef DEBUG_TO_FROM_CHAR 4614 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p); 4615 #endif 4616 if ((x = strlen(Np->L_negative_sign)) && 4617 AMOUNT_TEST(x) && 4618 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0) 4619 { 4620 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ 4621 *Np->number = '-'; 4622 } 4623 else if ((x = strlen(Np->L_positive_sign)) && 4624 AMOUNT_TEST(x) && 4625 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0) 4626 { 4627 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */ 4628 *Np->number = '+'; 4629 } 4630 if (*Np->number == ' ') 4631 /* no sign read */ 4632 Np->inout_p = tmp; 4633 } 4634 4635 /* 4636 * try read non-locale sign, it's happen only if format is not exact 4637 * and we cannot determine sign position of MI/PL/SG, an example: 4638 * 4639 * FM9.999999MI -> 5.01- 4640 * 4641 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats 4642 * like to_number('1 -', '9S') where sign is not anchored to last 4643 * number. 4644 */ 4645 else if (isread == false && IS_LSIGN(Np->Num) == false && 4646 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))) 4647 { 4648 #ifdef DEBUG_TO_FROM_CHAR 4649 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p); 4650 #endif 4651 4652 /* 4653 * simple + - 4654 */ 4655 if (*Np->inout_p == '-' || *Np->inout_p == '+') 4656 /* NUM_processor() do inout_p++ */ 4657 *Np->number = *Np->inout_p; 4658 } 4659 } 4660 } 4661 4662 #define IS_PREDEC_SPACE(_n) \ 4663 (IS_ZERO((_n)->Num)==false && \ 4664 (_n)->number == (_n)->number_p && \ 4665 *(_n)->number == '0' && \ 4666 (_n)->Num->post != 0) 4667 4668 /* ---------- 4669 * Add digit or sign to number-string 4670 * ---------- 4671 */ 4672 static void 4673 NUM_numpart_to_char(NUMProc *Np, int id) 4674 { 4675 int end; 4676 4677 if (IS_ROMAN(Np->Num)) 4678 return; 4679 4680 /* Note: in this elog() output not set '\0' in 'inout' */ 4681 4682 #ifdef DEBUG_TO_FROM_CHAR 4683 4684 /* 4685 * Np->num_curr is number of current item in format-picture, it is not 4686 * current position in inout! 4687 */ 4688 elog(DEBUG_elog_output, 4689 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"", 4690 Np->sign_wrote, 4691 Np->num_curr, 4692 Np->number_p, 4693 Np->inout); 4694 #endif 4695 Np->num_in = false; 4696 4697 /* 4698 * Write sign if real number will write to output Note: IS_PREDEC_SPACE() 4699 * handle "9.9" --> " .1" 4700 */ 4701 if (Np->sign_wrote == false && 4702 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) && 4703 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.'))) 4704 { 4705 if (IS_LSIGN(Np->Num)) 4706 { 4707 if (Np->Num->lsign == NUM_LSIGN_PRE) 4708 { 4709 if (Np->sign == '-') 4710 strcpy(Np->inout_p, Np->L_negative_sign); 4711 else 4712 strcpy(Np->inout_p, Np->L_positive_sign); 4713 Np->inout_p += strlen(Np->inout_p); 4714 Np->sign_wrote = true; 4715 } 4716 } 4717 else if (IS_BRACKET(Np->Num)) 4718 { 4719 *Np->inout_p = Np->sign == '+' ? ' ' : '<'; 4720 ++Np->inout_p; 4721 Np->sign_wrote = true; 4722 } 4723 else if (Np->sign == '+') 4724 { 4725 if (!IS_FILLMODE(Np->Num)) 4726 { 4727 *Np->inout_p = ' '; /* Write + */ 4728 ++Np->inout_p; 4729 } 4730 Np->sign_wrote = true; 4731 } 4732 else if (Np->sign == '-') 4733 { /* Write - */ 4734 *Np->inout_p = '-'; 4735 ++Np->inout_p; 4736 Np->sign_wrote = true; 4737 } 4738 } 4739 4740 4741 /* 4742 * digits / FM / Zero / Dec. point 4743 */ 4744 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC) 4745 { 4746 if (Np->num_curr < Np->out_pre_spaces && 4747 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num))) 4748 { 4749 /* 4750 * Write blank space 4751 */ 4752 if (!IS_FILLMODE(Np->Num)) 4753 { 4754 *Np->inout_p = ' '; /* Write ' ' */ 4755 ++Np->inout_p; 4756 } 4757 } 4758 else if (IS_ZERO(Np->Num) && 4759 Np->num_curr < Np->out_pre_spaces && 4760 Np->Num->zero_start <= Np->num_curr) 4761 { 4762 /* 4763 * Write ZERO 4764 */ 4765 *Np->inout_p = '0'; /* Write '0' */ 4766 ++Np->inout_p; 4767 Np->num_in = true; 4768 } 4769 else 4770 { 4771 /* 4772 * Write Decimal point 4773 */ 4774 if (*Np->number_p == '.') 4775 { 4776 if (!Np->last_relevant || *Np->last_relevant != '.') 4777 { 4778 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ 4779 Np->inout_p += strlen(Np->inout_p); 4780 } 4781 4782 /* 4783 * Ora 'n' -- FM9.9 --> 'n.' 4784 */ 4785 else if (IS_FILLMODE(Np->Num) && 4786 Np->last_relevant && *Np->last_relevant == '.') 4787 { 4788 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ 4789 Np->inout_p += strlen(Np->inout_p); 4790 } 4791 } 4792 else 4793 { 4794 /* 4795 * Write Digits 4796 */ 4797 if (Np->last_relevant && Np->number_p > Np->last_relevant && 4798 id != NUM_0) 4799 ; 4800 4801 /* 4802 * '0.1' -- 9.9 --> ' .1' 4803 */ 4804 else if (IS_PREDEC_SPACE(Np)) 4805 { 4806 if (!IS_FILLMODE(Np->Num)) 4807 { 4808 *Np->inout_p = ' '; 4809 ++Np->inout_p; 4810 } 4811 4812 /* 4813 * '0' -- FM9.9 --> '0.' 4814 */ 4815 else if (Np->last_relevant && *Np->last_relevant == '.') 4816 { 4817 *Np->inout_p = '0'; 4818 ++Np->inout_p; 4819 } 4820 } 4821 else 4822 { 4823 *Np->inout_p = *Np->number_p; /* Write DIGIT */ 4824 ++Np->inout_p; 4825 Np->num_in = true; 4826 } 4827 } 4828 /* do no exceed string length */ 4829 if (*Np->number_p) 4830 ++Np->number_p; 4831 } 4832 4833 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0); 4834 4835 if (Np->last_relevant && Np->last_relevant == Np->number_p) 4836 end = Np->num_curr; 4837 4838 if (Np->num_curr + 1 == end) 4839 { 4840 if (Np->sign_wrote == true && IS_BRACKET(Np->Num)) 4841 { 4842 *Np->inout_p = Np->sign == '+' ? ' ' : '>'; 4843 ++Np->inout_p; 4844 } 4845 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST) 4846 { 4847 if (Np->sign == '-') 4848 strcpy(Np->inout_p, Np->L_negative_sign); 4849 else 4850 strcpy(Np->inout_p, Np->L_positive_sign); 4851 Np->inout_p += strlen(Np->inout_p); 4852 } 4853 } 4854 } 4855 4856 ++Np->num_curr; 4857 } 4858 4859 /* 4860 * Skip over "n" input characters, but only if they aren't numeric data 4861 */ 4862 static void 4863 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) 4864 { 4865 while (n-- > 0) 4866 { 4867 if (OVERLOAD_TEST) 4868 break; /* end of input */ 4869 if (strchr("0123456789.,+-", *Np->inout_p) != NULL) 4870 break; /* it's a data character */ 4871 Np->inout_p += pg_mblen(Np->inout_p); 4872 } 4873 } 4874 4875 static char * 4876 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, 4877 char *number, int input_len, int to_char_out_pre_spaces, 4878 int sign, bool is_to_char, Oid collid) 4879 { 4880 FormatNode *n; 4881 NUMProc _Np, 4882 *Np = &_Np; 4883 const char *pattern; 4884 int pattern_len; 4885 4886 MemSet(Np, 0, sizeof(NUMProc)); 4887 4888 Np->Num = Num; 4889 Np->is_to_char = is_to_char; 4890 Np->number = number; 4891 Np->inout = inout; 4892 Np->last_relevant = NULL; 4893 Np->read_post = 0; 4894 Np->read_pre = 0; 4895 Np->read_dec = false; 4896 4897 if (Np->Num->zero_start) 4898 --Np->Num->zero_start; 4899 4900 if (IS_EEEE(Np->Num)) 4901 { 4902 if (!Np->is_to_char) 4903 ereport(ERROR, 4904 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 4905 errmsg("\"EEEE\" not supported for input"))); 4906 return strcpy(inout, number); 4907 } 4908 4909 /* 4910 * Roman correction 4911 */ 4912 if (IS_ROMAN(Np->Num)) 4913 { 4914 if (!Np->is_to_char) 4915 ereport(ERROR, 4916 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 4917 errmsg("\"RN\" not supported for input"))); 4918 4919 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post = 4920 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0; 4921 4922 if (IS_FILLMODE(Np->Num)) 4923 { 4924 Np->Num->flag = 0; 4925 Np->Num->flag |= NUM_F_FILLMODE; 4926 } 4927 else 4928 Np->Num->flag = 0; 4929 Np->Num->flag |= NUM_F_ROMAN; 4930 } 4931 4932 /* 4933 * Sign 4934 */ 4935 if (is_to_char) 4936 { 4937 Np->sign = sign; 4938 4939 /* MI/PL/SG - write sign itself and not in number */ 4940 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)) 4941 { 4942 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false) 4943 Np->sign_wrote = false; /* need sign */ 4944 else 4945 Np->sign_wrote = true; /* needn't sign */ 4946 } 4947 else 4948 { 4949 if (Np->sign != '-') 4950 { 4951 if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num)) 4952 Np->Num->flag &= ~NUM_F_BRACKET; 4953 if (IS_MINUS(Np->Num)) 4954 Np->Num->flag &= ~NUM_F_MINUS; 4955 } 4956 else if (Np->sign != '+' && IS_PLUS(Np->Num)) 4957 Np->Num->flag &= ~NUM_F_PLUS; 4958 4959 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false) 4960 Np->sign_wrote = true; /* needn't sign */ 4961 else 4962 Np->sign_wrote = false; /* need sign */ 4963 4964 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num) 4965 Np->Num->lsign = NUM_LSIGN_POST; 4966 } 4967 } 4968 else 4969 Np->sign = false; 4970 4971 /* 4972 * Count 4973 */ 4974 Np->num_count = Np->Num->post + Np->Num->pre - 1; 4975 4976 if (is_to_char) 4977 { 4978 Np->out_pre_spaces = to_char_out_pre_spaces; 4979 4980 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num)) 4981 { 4982 Np->last_relevant = get_last_relevant_decnum(Np->number); 4983 4984 /* 4985 * If any '0' specifiers are present, make sure we don't strip 4986 * those digits. 4987 */ 4988 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) 4989 { 4990 char *last_zero; 4991 4992 last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces); 4993 if (Np->last_relevant < last_zero) 4994 Np->last_relevant = last_zero; 4995 } 4996 } 4997 4998 if (Np->sign_wrote == false && Np->out_pre_spaces == 0) 4999 ++Np->num_count; 5000 } 5001 else 5002 { 5003 Np->out_pre_spaces = 0; 5004 *Np->number = ' '; /* sign space */ 5005 *(Np->number + 1) = '\0'; 5006 } 5007 5008 Np->num_in = 0; 5009 Np->num_curr = 0; 5010 5011 #ifdef DEBUG_TO_FROM_CHAR 5012 elog(DEBUG_elog_output, 5013 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s", 5014 Np->sign, 5015 Np->number, 5016 Np->Num->pre, 5017 Np->Num->post, 5018 Np->num_count, 5019 Np->out_pre_spaces, 5020 Np->sign_wrote ? "Yes" : "No", 5021 IS_ZERO(Np->Num) ? "Yes" : "No", 5022 Np->Num->zero_start, 5023 Np->Num->zero_end, 5024 Np->last_relevant ? Np->last_relevant : "<not set>", 5025 IS_BRACKET(Np->Num) ? "Yes" : "No", 5026 IS_PLUS(Np->Num) ? "Yes" : "No", 5027 IS_MINUS(Np->Num) ? "Yes" : "No", 5028 IS_FILLMODE(Np->Num) ? "Yes" : "No", 5029 IS_ROMAN(Np->Num) ? "Yes" : "No", 5030 IS_EEEE(Np->Num) ? "Yes" : "No" 5031 ); 5032 #endif 5033 5034 /* 5035 * Locale 5036 */ 5037 NUM_prepare_locale(Np); 5038 5039 /* 5040 * Processor direct cycle 5041 */ 5042 if (Np->is_to_char) 5043 Np->number_p = Np->number; 5044 else 5045 Np->number_p = Np->number + 1; /* first char is space for sign */ 5046 5047 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++) 5048 { 5049 if (!Np->is_to_char) 5050 { 5051 /* 5052 * Check at least one byte remains to be scanned. (In actions 5053 * below, must use AMOUNT_TEST if we want to read more bytes than 5054 * that.) 5055 */ 5056 if (OVERLOAD_TEST) 5057 break; 5058 } 5059 5060 /* 5061 * Format pictures actions 5062 */ 5063 if (n->type == NODE_TYPE_ACTION) 5064 { 5065 /* 5066 * Create/read digit/zero/blank/sign/special-case 5067 * 5068 * 'NUM_S' note: The locale sign is anchored to number and we 5069 * read/write it when we work with first or last number 5070 * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). 5071 * 5072 * Notice the "Np->inout_p++" at the bottom of the loop. This is 5073 * why most of the actions advance inout_p one less than you might 5074 * expect. In cases where we don't want that increment to happen, 5075 * a switch case ends with "continue" not "break". 5076 */ 5077 switch (n->key->id) 5078 { 5079 case NUM_9: 5080 case NUM_0: 5081 case NUM_DEC: 5082 case NUM_D: 5083 if (Np->is_to_char) 5084 { 5085 NUM_numpart_to_char(Np, n->key->id); 5086 continue; /* for() */ 5087 } 5088 else 5089 { 5090 NUM_numpart_from_char(Np, n->key->id, input_len); 5091 break; /* switch() case: */ 5092 } 5093 5094 case NUM_COMMA: 5095 if (Np->is_to_char) 5096 { 5097 if (!Np->num_in) 5098 { 5099 if (IS_FILLMODE(Np->Num)) 5100 continue; 5101 else 5102 *Np->inout_p = ' '; 5103 } 5104 else 5105 *Np->inout_p = ','; 5106 } 5107 else 5108 { 5109 if (!Np->num_in) 5110 { 5111 if (IS_FILLMODE(Np->Num)) 5112 continue; 5113 } 5114 if (*Np->inout_p != ',') 5115 continue; 5116 } 5117 break; 5118 5119 case NUM_G: 5120 pattern = Np->L_thousands_sep; 5121 pattern_len = strlen(pattern); 5122 if (Np->is_to_char) 5123 { 5124 if (!Np->num_in) 5125 { 5126 if (IS_FILLMODE(Np->Num)) 5127 continue; 5128 else 5129 { 5130 /* just in case there are MB chars */ 5131 pattern_len = pg_mbstrlen(pattern); 5132 memset(Np->inout_p, ' ', pattern_len); 5133 Np->inout_p += pattern_len - 1; 5134 } 5135 } 5136 else 5137 { 5138 strcpy(Np->inout_p, pattern); 5139 Np->inout_p += pattern_len - 1; 5140 } 5141 } 5142 else 5143 { 5144 if (!Np->num_in) 5145 { 5146 if (IS_FILLMODE(Np->Num)) 5147 continue; 5148 } 5149 5150 /* 5151 * Because L_thousands_sep typically contains data 5152 * characters (either '.' or ','), we can't use 5153 * NUM_eat_non_data_chars here. Instead skip only if 5154 * the input matches L_thousands_sep. 5155 */ 5156 if (AMOUNT_TEST(pattern_len) && 5157 strncmp(Np->inout_p, pattern, pattern_len) == 0) 5158 Np->inout_p += pattern_len - 1; 5159 else 5160 continue; 5161 } 5162 break; 5163 5164 case NUM_L: 5165 pattern = Np->L_currency_symbol; 5166 if (Np->is_to_char) 5167 { 5168 strcpy(Np->inout_p, pattern); 5169 Np->inout_p += strlen(pattern) - 1; 5170 } 5171 else 5172 { 5173 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); 5174 continue; 5175 } 5176 break; 5177 5178 case NUM_RN: 5179 if (IS_FILLMODE(Np->Num)) 5180 { 5181 strcpy(Np->inout_p, Np->number_p); 5182 Np->inout_p += strlen(Np->inout_p) - 1; 5183 } 5184 else 5185 { 5186 sprintf(Np->inout_p, "%15s", Np->number_p); 5187 Np->inout_p += strlen(Np->inout_p) - 1; 5188 } 5189 break; 5190 5191 case NUM_rn: 5192 if (IS_FILLMODE(Np->Num)) 5193 { 5194 strcpy(Np->inout_p, asc_tolower_z(Np->number_p)); 5195 Np->inout_p += strlen(Np->inout_p) - 1; 5196 } 5197 else 5198 { 5199 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p)); 5200 Np->inout_p += strlen(Np->inout_p) - 1; 5201 } 5202 break; 5203 5204 case NUM_th: 5205 if (IS_ROMAN(Np->Num) || *Np->number == '#' || 5206 Np->sign == '-' || IS_DECIMAL(Np->Num)) 5207 continue; 5208 5209 if (Np->is_to_char) 5210 { 5211 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); 5212 Np->inout_p += 1; 5213 } 5214 else 5215 { 5216 /* All variants of 'th' occupy 2 characters */ 5217 NUM_eat_non_data_chars(Np, 2, input_len); 5218 continue; 5219 } 5220 break; 5221 5222 case NUM_TH: 5223 if (IS_ROMAN(Np->Num) || *Np->number == '#' || 5224 Np->sign == '-' || IS_DECIMAL(Np->Num)) 5225 continue; 5226 5227 if (Np->is_to_char) 5228 { 5229 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); 5230 Np->inout_p += 1; 5231 } 5232 else 5233 { 5234 /* All variants of 'TH' occupy 2 characters */ 5235 NUM_eat_non_data_chars(Np, 2, input_len); 5236 continue; 5237 } 5238 break; 5239 5240 case NUM_MI: 5241 if (Np->is_to_char) 5242 { 5243 if (Np->sign == '-') 5244 *Np->inout_p = '-'; 5245 else if (IS_FILLMODE(Np->Num)) 5246 continue; 5247 else 5248 *Np->inout_p = ' '; 5249 } 5250 else 5251 { 5252 if (*Np->inout_p == '-') 5253 *Np->number = '-'; 5254 else 5255 { 5256 NUM_eat_non_data_chars(Np, 1, input_len); 5257 continue; 5258 } 5259 } 5260 break; 5261 5262 case NUM_PL: 5263 if (Np->is_to_char) 5264 { 5265 if (Np->sign == '+') 5266 *Np->inout_p = '+'; 5267 else if (IS_FILLMODE(Np->Num)) 5268 continue; 5269 else 5270 *Np->inout_p = ' '; 5271 } 5272 else 5273 { 5274 if (*Np->inout_p == '+') 5275 *Np->number = '+'; 5276 else 5277 { 5278 NUM_eat_non_data_chars(Np, 1, input_len); 5279 continue; 5280 } 5281 } 5282 break; 5283 5284 case NUM_SG: 5285 if (Np->is_to_char) 5286 *Np->inout_p = Np->sign; 5287 else 5288 { 5289 if (*Np->inout_p == '-') 5290 *Np->number = '-'; 5291 else if (*Np->inout_p == '+') 5292 *Np->number = '+'; 5293 else 5294 { 5295 NUM_eat_non_data_chars(Np, 1, input_len); 5296 continue; 5297 } 5298 } 5299 break; 5300 5301 default: 5302 continue; 5303 break; 5304 } 5305 } 5306 else 5307 { 5308 /* 5309 * In TO_CHAR, non-pattern characters in the format are copied to 5310 * the output. In TO_NUMBER, we skip one input character for each 5311 * non-pattern format character, whether or not it matches the 5312 * format character. 5313 */ 5314 if (Np->is_to_char) 5315 { 5316 strcpy(Np->inout_p, n->character); 5317 Np->inout_p += strlen(Np->inout_p); 5318 } 5319 else 5320 { 5321 Np->inout_p += pg_mblen(Np->inout_p); 5322 } 5323 continue; 5324 } 5325 Np->inout_p++; 5326 } 5327 5328 if (Np->is_to_char) 5329 { 5330 *Np->inout_p = '\0'; 5331 return Np->inout; 5332 } 5333 else 5334 { 5335 if (*(Np->number_p - 1) == '.') 5336 *(Np->number_p - 1) = '\0'; 5337 else 5338 *Np->number_p = '\0'; 5339 5340 /* 5341 * Correction - precision of dec. number 5342 */ 5343 Np->Num->post = Np->read_post; 5344 5345 #ifdef DEBUG_TO_FROM_CHAR 5346 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number); 5347 #endif 5348 return Np->number; 5349 } 5350 } 5351 5352 /* ---------- 5353 * MACRO: Start part of NUM - for all NUM's to_char variants 5354 * (sorry, but I hate copy same code - macro is better..) 5355 * ---------- 5356 */ 5357 #define NUM_TOCHAR_prepare \ 5358 do { \ 5359 int len = VARSIZE_ANY_EXHDR(fmt); \ 5360 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \ 5361 PG_RETURN_TEXT_P(cstring_to_text("")); \ 5362 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \ 5363 format = NUM_cache(len, &Num, fmt, &shouldFree); \ 5364 } while (0) 5365 5366 /* ---------- 5367 * MACRO: Finish part of NUM 5368 * ---------- 5369 */ 5370 #define NUM_TOCHAR_finish \ 5371 do { \ 5372 int len; \ 5373 \ 5374 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \ 5375 \ 5376 if (shouldFree) \ 5377 pfree(format); \ 5378 \ 5379 /* \ 5380 * Convert null-terminated representation of result to standard text. \ 5381 * The result is usually much bigger than it needs to be, but there \ 5382 * seems little point in realloc'ing it smaller. \ 5383 */ \ 5384 len = strlen(VARDATA(result)); \ 5385 SET_VARSIZE(result, len + VARHDRSZ); \ 5386 } while (0) 5387 5388 /* ------------------- 5389 * NUMERIC to_number() (convert string to numeric) 5390 * ------------------- 5391 */ 5392 Datum 5393 numeric_to_number(PG_FUNCTION_ARGS) 5394 { 5395 text *value = PG_GETARG_TEXT_PP(0); 5396 text *fmt = PG_GETARG_TEXT_PP(1); 5397 NUMDesc Num; 5398 Datum result; 5399 FormatNode *format; 5400 char *numstr; 5401 bool shouldFree; 5402 int len = 0; 5403 int scale, 5404 precision; 5405 5406 len = VARSIZE_ANY_EXHDR(fmt); 5407 5408 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ) 5409 PG_RETURN_NULL(); 5410 5411 format = NUM_cache(len, &Num, fmt, &shouldFree); 5412 5413 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1); 5414 5415 NUM_processor(format, &Num, VARDATA_ANY(value), numstr, 5416 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION()); 5417 5418 scale = Num.post; 5419 precision = Num.pre + Num.multi + scale; 5420 5421 if (shouldFree) 5422 pfree(format); 5423 5424 result = DirectFunctionCall3(numeric_in, 5425 CStringGetDatum(numstr), 5426 ObjectIdGetDatum(InvalidOid), 5427 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ)); 5428 5429 if (IS_MULTI(&Num)) 5430 { 5431 Numeric x; 5432 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 5433 Int32GetDatum(10))); 5434 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 5435 Int32GetDatum(-Num.multi))); 5436 5437 x = DatumGetNumeric(DirectFunctionCall2(numeric_power, 5438 NumericGetDatum(a), 5439 NumericGetDatum(b))); 5440 result = DirectFunctionCall2(numeric_mul, 5441 result, 5442 NumericGetDatum(x)); 5443 } 5444 5445 pfree(numstr); 5446 return result; 5447 } 5448 5449 /* ------------------ 5450 * NUMERIC to_char() 5451 * ------------------ 5452 */ 5453 Datum 5454 numeric_to_char(PG_FUNCTION_ARGS) 5455 { 5456 Numeric value = PG_GETARG_NUMERIC(0); 5457 text *fmt = PG_GETARG_TEXT_PP(1); 5458 NUMDesc Num; 5459 FormatNode *format; 5460 text *result; 5461 bool shouldFree; 5462 int out_pre_spaces = 0, 5463 sign = 0; 5464 char *numstr, 5465 *orgnum, 5466 *p; 5467 Numeric x; 5468 5469 NUM_TOCHAR_prepare; 5470 5471 /* 5472 * On DateType depend part (numeric) 5473 */ 5474 if (IS_ROMAN(&Num)) 5475 { 5476 x = DatumGetNumeric(DirectFunctionCall2(numeric_round, 5477 NumericGetDatum(value), 5478 Int32GetDatum(0))); 5479 numstr = orgnum = 5480 int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4, 5481 NumericGetDatum(x)))); 5482 } 5483 else if (IS_EEEE(&Num)) 5484 { 5485 orgnum = numeric_out_sci(value, Num.post); 5486 5487 /* 5488 * numeric_out_sci() does not emit a sign for positive numbers. We 5489 * need to add a space in this case so that positive and negative 5490 * numbers are aligned. We also have to do the right thing for NaN. 5491 */ 5492 if (strcmp(orgnum, "NaN") == 0) 5493 { 5494 /* 5495 * Allow 6 characters for the leading sign, the decimal point, 5496 * "e", the exponent's sign and two exponent digits. 5497 */ 5498 numstr = (char *) palloc(Num.pre + Num.post + 7); 5499 fill_str(numstr, '#', Num.pre + Num.post + 6); 5500 *numstr = ' '; 5501 *(numstr + Num.pre + 1) = '.'; 5502 } 5503 else if (*orgnum != '-') 5504 { 5505 numstr = (char *) palloc(strlen(orgnum) + 2); 5506 *numstr = ' '; 5507 strcpy(numstr + 1, orgnum); 5508 } 5509 else 5510 { 5511 numstr = orgnum; 5512 } 5513 } 5514 else 5515 { 5516 int numstr_pre_len; 5517 Numeric val = value; 5518 5519 if (IS_MULTI(&Num)) 5520 { 5521 Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 5522 Int32GetDatum(10))); 5523 Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric, 5524 Int32GetDatum(Num.multi))); 5525 5526 x = DatumGetNumeric(DirectFunctionCall2(numeric_power, 5527 NumericGetDatum(a), 5528 NumericGetDatum(b))); 5529 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul, 5530 NumericGetDatum(value), 5531 NumericGetDatum(x))); 5532 Num.pre += Num.multi; 5533 } 5534 5535 x = DatumGetNumeric(DirectFunctionCall2(numeric_round, 5536 NumericGetDatum(val), 5537 Int32GetDatum(Num.post))); 5538 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out, 5539 NumericGetDatum(x))); 5540 5541 if (*orgnum == '-') 5542 { 5543 sign = '-'; 5544 numstr = orgnum + 1; 5545 } 5546 else 5547 { 5548 sign = '+'; 5549 numstr = orgnum; 5550 } 5551 5552 if ((p = strchr(numstr, '.'))) 5553 numstr_pre_len = p - numstr; 5554 else 5555 numstr_pre_len = strlen(numstr); 5556 5557 /* needs padding? */ 5558 if (numstr_pre_len < Num.pre) 5559 out_pre_spaces = Num.pre - numstr_pre_len; 5560 /* overflowed prefix digit format? */ 5561 else if (numstr_pre_len > Num.pre) 5562 { 5563 numstr = (char *) palloc(Num.pre + Num.post + 2); 5564 fill_str(numstr, '#', Num.pre + Num.post + 1); 5565 *(numstr + Num.pre) = '.'; 5566 } 5567 } 5568 5569 NUM_TOCHAR_finish; 5570 PG_RETURN_TEXT_P(result); 5571 } 5572 5573 /* --------------- 5574 * INT4 to_char() 5575 * --------------- 5576 */ 5577 Datum 5578 int4_to_char(PG_FUNCTION_ARGS) 5579 { 5580 int32 value = PG_GETARG_INT32(0); 5581 text *fmt = PG_GETARG_TEXT_PP(1); 5582 NUMDesc Num; 5583 FormatNode *format; 5584 text *result; 5585 bool shouldFree; 5586 int out_pre_spaces = 0, 5587 sign = 0; 5588 char *numstr, 5589 *orgnum; 5590 5591 NUM_TOCHAR_prepare; 5592 5593 /* 5594 * On DateType depend part (int32) 5595 */ 5596 if (IS_ROMAN(&Num)) 5597 numstr = orgnum = int_to_roman(value); 5598 else if (IS_EEEE(&Num)) 5599 { 5600 /* we can do it easily because float8 won't lose any precision */ 5601 float8 val = (float8) value; 5602 5603 orgnum = (char *) psprintf("%+.*e", Num.post, val); 5604 5605 /* 5606 * Swap a leading positive sign for a space. 5607 */ 5608 if (*orgnum == '+') 5609 *orgnum = ' '; 5610 5611 numstr = orgnum; 5612 } 5613 else 5614 { 5615 int numstr_pre_len; 5616 5617 if (IS_MULTI(&Num)) 5618 { 5619 orgnum = DatumGetCString(DirectFunctionCall1(int4out, 5620 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi))))); 5621 Num.pre += Num.multi; 5622 } 5623 else 5624 { 5625 orgnum = DatumGetCString(DirectFunctionCall1(int4out, 5626 Int32GetDatum(value))); 5627 } 5628 5629 if (*orgnum == '-') 5630 { 5631 sign = '-'; 5632 orgnum++; 5633 } 5634 else 5635 sign = '+'; 5636 5637 numstr_pre_len = strlen(orgnum); 5638 5639 /* post-decimal digits? Pad out with zeros. */ 5640 if (Num.post) 5641 { 5642 numstr = (char *) palloc(numstr_pre_len + Num.post + 2); 5643 strcpy(numstr, orgnum); 5644 *(numstr + numstr_pre_len) = '.'; 5645 memset(numstr + numstr_pre_len + 1, '0', Num.post); 5646 *(numstr + numstr_pre_len + Num.post + 1) = '\0'; 5647 } 5648 else 5649 numstr = orgnum; 5650 5651 /* needs padding? */ 5652 if (numstr_pre_len < Num.pre) 5653 out_pre_spaces = Num.pre - numstr_pre_len; 5654 /* overflowed prefix digit format? */ 5655 else if (numstr_pre_len > Num.pre) 5656 { 5657 numstr = (char *) palloc(Num.pre + Num.post + 2); 5658 fill_str(numstr, '#', Num.pre + Num.post + 1); 5659 *(numstr + Num.pre) = '.'; 5660 } 5661 } 5662 5663 NUM_TOCHAR_finish; 5664 PG_RETURN_TEXT_P(result); 5665 } 5666 5667 /* --------------- 5668 * INT8 to_char() 5669 * --------------- 5670 */ 5671 Datum 5672 int8_to_char(PG_FUNCTION_ARGS) 5673 { 5674 int64 value = PG_GETARG_INT64(0); 5675 text *fmt = PG_GETARG_TEXT_PP(1); 5676 NUMDesc Num; 5677 FormatNode *format; 5678 text *result; 5679 bool shouldFree; 5680 int out_pre_spaces = 0, 5681 sign = 0; 5682 char *numstr, 5683 *orgnum; 5684 5685 NUM_TOCHAR_prepare; 5686 5687 /* 5688 * On DateType depend part (int32) 5689 */ 5690 if (IS_ROMAN(&Num)) 5691 { 5692 /* Currently don't support int8 conversion to roman... */ 5693 numstr = orgnum = int_to_roman(DatumGetInt32( 5694 DirectFunctionCall1(int84, Int64GetDatum(value)))); 5695 } 5696 else if (IS_EEEE(&Num)) 5697 { 5698 /* to avoid loss of precision, must go via numeric not float8 */ 5699 Numeric val; 5700 5701 val = DatumGetNumeric(DirectFunctionCall1(int8_numeric, 5702 Int64GetDatum(value))); 5703 orgnum = numeric_out_sci(val, Num.post); 5704 5705 /* 5706 * numeric_out_sci() does not emit a sign for positive numbers. We 5707 * need to add a space in this case so that positive and negative 5708 * numbers are aligned. We don't have to worry about NaN here. 5709 */ 5710 if (*orgnum != '-') 5711 { 5712 numstr = (char *) palloc(strlen(orgnum) + 2); 5713 *numstr = ' '; 5714 strcpy(numstr + 1, orgnum); 5715 } 5716 else 5717 { 5718 numstr = orgnum; 5719 } 5720 } 5721 else 5722 { 5723 int numstr_pre_len; 5724 5725 if (IS_MULTI(&Num)) 5726 { 5727 double multi = pow((double) 10, (double) Num.multi); 5728 5729 value = DatumGetInt64(DirectFunctionCall2(int8mul, 5730 Int64GetDatum(value), 5731 DirectFunctionCall1(dtoi8, 5732 Float8GetDatum(multi)))); 5733 Num.pre += Num.multi; 5734 } 5735 5736 orgnum = DatumGetCString(DirectFunctionCall1(int8out, 5737 Int64GetDatum(value))); 5738 5739 if (*orgnum == '-') 5740 { 5741 sign = '-'; 5742 orgnum++; 5743 } 5744 else 5745 sign = '+'; 5746 5747 numstr_pre_len = strlen(orgnum); 5748 5749 /* post-decimal digits? Pad out with zeros. */ 5750 if (Num.post) 5751 { 5752 numstr = (char *) palloc(numstr_pre_len + Num.post + 2); 5753 strcpy(numstr, orgnum); 5754 *(numstr + numstr_pre_len) = '.'; 5755 memset(numstr + numstr_pre_len + 1, '0', Num.post); 5756 *(numstr + numstr_pre_len + Num.post + 1) = '\0'; 5757 } 5758 else 5759 numstr = orgnum; 5760 5761 /* needs padding? */ 5762 if (numstr_pre_len < Num.pre) 5763 out_pre_spaces = Num.pre - numstr_pre_len; 5764 /* overflowed prefix digit format? */ 5765 else if (numstr_pre_len > Num.pre) 5766 { 5767 numstr = (char *) palloc(Num.pre + Num.post + 2); 5768 fill_str(numstr, '#', Num.pre + Num.post + 1); 5769 *(numstr + Num.pre) = '.'; 5770 } 5771 } 5772 5773 NUM_TOCHAR_finish; 5774 PG_RETURN_TEXT_P(result); 5775 } 5776 5777 /* ----------------- 5778 * FLOAT4 to_char() 5779 * ----------------- 5780 */ 5781 Datum 5782 float4_to_char(PG_FUNCTION_ARGS) 5783 { 5784 float4 value = PG_GETARG_FLOAT4(0); 5785 text *fmt = PG_GETARG_TEXT_PP(1); 5786 NUMDesc Num; 5787 FormatNode *format; 5788 text *result; 5789 bool shouldFree; 5790 int out_pre_spaces = 0, 5791 sign = 0; 5792 char *numstr, 5793 *orgnum, 5794 *p; 5795 5796 NUM_TOCHAR_prepare; 5797 5798 if (IS_ROMAN(&Num)) 5799 numstr = orgnum = int_to_roman((int) rint(value)); 5800 else if (IS_EEEE(&Num)) 5801 { 5802 if (isnan(value) || isinf(value)) 5803 { 5804 /* 5805 * Allow 6 characters for the leading sign, the decimal point, 5806 * "e", the exponent's sign and two exponent digits. 5807 */ 5808 numstr = (char *) palloc(Num.pre + Num.post + 7); 5809 fill_str(numstr, '#', Num.pre + Num.post + 6); 5810 *numstr = ' '; 5811 *(numstr + Num.pre + 1) = '.'; 5812 } 5813 else 5814 { 5815 numstr = orgnum = psprintf("%+.*e", Num.post, value); 5816 5817 /* 5818 * Swap a leading positive sign for a space. 5819 */ 5820 if (*orgnum == '+') 5821 *orgnum = ' '; 5822 5823 numstr = orgnum; 5824 } 5825 } 5826 else 5827 { 5828 float4 val = value; 5829 int numstr_pre_len; 5830 5831 if (IS_MULTI(&Num)) 5832 { 5833 float multi = pow((double) 10, (double) Num.multi); 5834 5835 val = value * multi; 5836 Num.pre += Num.multi; 5837 } 5838 5839 orgnum = (char *) psprintf("%.0f", fabs(val)); 5840 numstr_pre_len = strlen(orgnum); 5841 5842 /* adjust post digits to fit max float digits */ 5843 if (numstr_pre_len >= FLT_DIG) 5844 Num.post = 0; 5845 else if (numstr_pre_len + Num.post > FLT_DIG) 5846 Num.post = FLT_DIG - numstr_pre_len; 5847 orgnum = psprintf("%.*f", Num.post, val); 5848 5849 if (*orgnum == '-') 5850 { /* < 0 */ 5851 sign = '-'; 5852 numstr = orgnum + 1; 5853 } 5854 else 5855 { 5856 sign = '+'; 5857 numstr = orgnum; 5858 } 5859 5860 if ((p = strchr(numstr, '.'))) 5861 numstr_pre_len = p - numstr; 5862 else 5863 numstr_pre_len = strlen(numstr); 5864 5865 /* needs padding? */ 5866 if (numstr_pre_len < Num.pre) 5867 out_pre_spaces = Num.pre - numstr_pre_len; 5868 /* overflowed prefix digit format? */ 5869 else if (numstr_pre_len > Num.pre) 5870 { 5871 numstr = (char *) palloc(Num.pre + Num.post + 2); 5872 fill_str(numstr, '#', Num.pre + Num.post + 1); 5873 *(numstr + Num.pre) = '.'; 5874 } 5875 } 5876 5877 NUM_TOCHAR_finish; 5878 PG_RETURN_TEXT_P(result); 5879 } 5880 5881 /* ----------------- 5882 * FLOAT8 to_char() 5883 * ----------------- 5884 */ 5885 Datum 5886 float8_to_char(PG_FUNCTION_ARGS) 5887 { 5888 float8 value = PG_GETARG_FLOAT8(0); 5889 text *fmt = PG_GETARG_TEXT_PP(1); 5890 NUMDesc Num; 5891 FormatNode *format; 5892 text *result; 5893 bool shouldFree; 5894 int out_pre_spaces = 0, 5895 sign = 0; 5896 char *numstr, 5897 *orgnum, 5898 *p; 5899 5900 NUM_TOCHAR_prepare; 5901 5902 if (IS_ROMAN(&Num)) 5903 numstr = orgnum = int_to_roman((int) rint(value)); 5904 else if (IS_EEEE(&Num)) 5905 { 5906 if (isnan(value) || isinf(value)) 5907 { 5908 /* 5909 * Allow 6 characters for the leading sign, the decimal point, 5910 * "e", the exponent's sign and two exponent digits. 5911 */ 5912 numstr = (char *) palloc(Num.pre + Num.post + 7); 5913 fill_str(numstr, '#', Num.pre + Num.post + 6); 5914 *numstr = ' '; 5915 *(numstr + Num.pre + 1) = '.'; 5916 } 5917 else 5918 { 5919 numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value); 5920 5921 /* 5922 * Swap a leading positive sign for a space. 5923 */ 5924 if (*orgnum == '+') 5925 *orgnum = ' '; 5926 5927 numstr = orgnum; 5928 } 5929 } 5930 else 5931 { 5932 float8 val = value; 5933 int numstr_pre_len; 5934 5935 if (IS_MULTI(&Num)) 5936 { 5937 double multi = pow((double) 10, (double) Num.multi); 5938 5939 val = value * multi; 5940 Num.pre += Num.multi; 5941 } 5942 orgnum = psprintf("%.0f", fabs(val)); 5943 numstr_pre_len = strlen(orgnum); 5944 5945 /* adjust post digits to fit max double digits */ 5946 if (numstr_pre_len >= DBL_DIG) 5947 Num.post = 0; 5948 else if (numstr_pre_len + Num.post > DBL_DIG) 5949 Num.post = DBL_DIG - numstr_pre_len; 5950 orgnum = psprintf("%.*f", Num.post, val); 5951 5952 if (*orgnum == '-') 5953 { /* < 0 */ 5954 sign = '-'; 5955 numstr = orgnum + 1; 5956 } 5957 else 5958 { 5959 sign = '+'; 5960 numstr = orgnum; 5961 } 5962 5963 if ((p = strchr(numstr, '.'))) 5964 numstr_pre_len = p - numstr; 5965 else 5966 numstr_pre_len = strlen(numstr); 5967 5968 /* needs padding? */ 5969 if (numstr_pre_len < Num.pre) 5970 out_pre_spaces = Num.pre - numstr_pre_len; 5971 /* overflowed prefix digit format? */ 5972 else if (numstr_pre_len > Num.pre) 5973 { 5974 numstr = (char *) palloc(Num.pre + Num.post + 2); 5975 fill_str(numstr, '#', Num.pre + Num.post + 1); 5976 *(numstr + Num.pre) = '.'; 5977 } 5978 } 5979 5980 NUM_TOCHAR_finish; 5981 PG_RETURN_TEXT_P(result); 5982 } 5983