1 /* $OpenBSD: strptime.c,v 1.22 2016/05/23 00:05:15 guenther Exp $ */ 2 /* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */ 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <ctype.h> 32 #include <locale.h> 33 #include <string.h> 34 #include <time.h> 35 36 #include "localedef.h" 37 #include "private.h" 38 #include "tzfile.h" 39 40 #define _ctloc(x) (_CurrentTimeLocale->x) 41 42 /* 43 * We do not implement alternate representations. However, we always 44 * check whether a given modifier is allowed for a certain conversion. 45 */ 46 #define _ALT_E 0x01 47 #define _ALT_O 0x02 48 #define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); } 49 50 /* 51 * We keep track of some of the fields we set in order to compute missing ones. 52 */ 53 #define FIELD_TM_MON (1 << 0) 54 #define FIELD_TM_MDAY (1 << 1) 55 #define FIELD_TM_WDAY (1 << 2) 56 #define FIELD_TM_YDAY (1 << 3) 57 #define FIELD_TM_YEAR (1 << 4) 58 59 static char gmt[] = { "GMT" }; 60 static char utc[] = { "UTC" }; 61 /* RFC-822/RFC-2822 */ 62 static const char * const nast[5] = { 63 "EST", "CST", "MST", "PST", "\0\0\0" 64 }; 65 static const char * const nadt[5] = { 66 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 67 }; 68 69 static const int mon_lengths[2][MONSPERYEAR] = { 70 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, 71 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } 72 }; 73 74 static int _conv_num(const unsigned char **, int *, int, int); 75 static int leaps_thru_end_of(const int y); 76 static char *_strptime(const char *, const char *, struct tm *, int); 77 static const u_char *_find_string(const u_char *, int *, const char * const *, 78 const char * const *, int); 79 80 81 char * 82 strptime(const char *buf, const char *fmt, struct tm *tm) 83 { 84 return(_strptime(buf, fmt, tm, 1)); 85 } 86 DEF_WEAK(strptime); 87 88 static char * 89 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize) 90 { 91 unsigned char c; 92 const unsigned char *bp, *ep; 93 size_t len; 94 int alt_format, i, offs; 95 int neg = 0; 96 static int century, relyear, fields; 97 98 if (initialize) { 99 century = TM_YEAR_BASE; 100 relyear = -1; 101 fields = 0; 102 } 103 104 bp = (const unsigned char *)buf; 105 while ((c = *fmt) != '\0') { 106 /* Clear `alternate' modifier prior to new conversion. */ 107 alt_format = 0; 108 109 /* Eat up white-space. */ 110 if (isspace(c)) { 111 while (isspace(*bp)) 112 bp++; 113 114 fmt++; 115 continue; 116 } 117 118 if ((c = *fmt++) != '%') 119 goto literal; 120 121 122 again: switch (c = *fmt++) { 123 case '%': /* "%%" is converted to "%". */ 124 literal: 125 if (c != *bp++) 126 return (NULL); 127 128 break; 129 130 /* 131 * "Alternative" modifiers. Just set the appropriate flag 132 * and start over again. 133 */ 134 case 'E': /* "%E?" alternative conversion modifier. */ 135 _LEGAL_ALT(0); 136 alt_format |= _ALT_E; 137 goto again; 138 139 case 'O': /* "%O?" alternative conversion modifier. */ 140 _LEGAL_ALT(0); 141 alt_format |= _ALT_O; 142 goto again; 143 144 /* 145 * "Complex" conversion rules, implemented through recursion. 146 */ 147 case 'c': /* Date and time, using the locale's format. */ 148 _LEGAL_ALT(_ALT_E); 149 if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0))) 150 return (NULL); 151 break; 152 153 case 'D': /* The date as "%m/%d/%y". */ 154 _LEGAL_ALT(0); 155 if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0))) 156 return (NULL); 157 break; 158 159 case 'F': /* The date as "%Y-%m-%d". */ 160 _LEGAL_ALT(0); 161 if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0))) 162 return (NULL); 163 continue; 164 165 case 'R': /* The time as "%H:%M". */ 166 _LEGAL_ALT(0); 167 if (!(bp = _strptime(bp, "%H:%M", tm, 0))) 168 return (NULL); 169 break; 170 171 case 'r': /* The time as "%I:%M:%S %p". */ 172 _LEGAL_ALT(0); 173 if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0))) 174 return (NULL); 175 break; 176 177 case 'T': /* The time as "%H:%M:%S". */ 178 _LEGAL_ALT(0); 179 if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0))) 180 return (NULL); 181 break; 182 183 case 'X': /* The time, using the locale's format. */ 184 _LEGAL_ALT(_ALT_E); 185 if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0))) 186 return (NULL); 187 break; 188 189 case 'x': /* The date, using the locale's format. */ 190 _LEGAL_ALT(_ALT_E); 191 if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0))) 192 return (NULL); 193 break; 194 195 /* 196 * "Elementary" conversion rules. 197 */ 198 case 'A': /* The day of week, using the locale's form. */ 199 case 'a': 200 _LEGAL_ALT(0); 201 for (i = 0; i < 7; i++) { 202 /* Full name. */ 203 len = strlen(_ctloc(day[i])); 204 if (strncasecmp(_ctloc(day[i]), bp, len) == 0) 205 break; 206 207 /* Abbreviated name. */ 208 len = strlen(_ctloc(abday[i])); 209 if (strncasecmp(_ctloc(abday[i]), bp, len) == 0) 210 break; 211 } 212 213 /* Nothing matched. */ 214 if (i == 7) 215 return (NULL); 216 217 tm->tm_wday = i; 218 bp += len; 219 fields |= FIELD_TM_WDAY; 220 break; 221 222 case 'B': /* The month, using the locale's form. */ 223 case 'b': 224 case 'h': 225 _LEGAL_ALT(0); 226 for (i = 0; i < 12; i++) { 227 /* Full name. */ 228 len = strlen(_ctloc(mon[i])); 229 if (strncasecmp(_ctloc(mon[i]), bp, len) == 0) 230 break; 231 232 /* Abbreviated name. */ 233 len = strlen(_ctloc(abmon[i])); 234 if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0) 235 break; 236 } 237 238 /* Nothing matched. */ 239 if (i == 12) 240 return (NULL); 241 242 tm->tm_mon = i; 243 bp += len; 244 fields |= FIELD_TM_MON; 245 break; 246 247 case 'C': /* The century number. */ 248 _LEGAL_ALT(_ALT_E); 249 if (!(_conv_num(&bp, &i, 0, 99))) 250 return (NULL); 251 252 century = i * 100; 253 break; 254 255 case 'd': /* The day of month. */ 256 case 'e': 257 _LEGAL_ALT(_ALT_O); 258 if (!(_conv_num(&bp, &tm->tm_mday, 1, 31))) 259 return (NULL); 260 fields |= FIELD_TM_MDAY; 261 break; 262 263 case 'k': /* The hour (24-hour clock representation). */ 264 _LEGAL_ALT(0); 265 /* FALLTHROUGH */ 266 case 'H': 267 _LEGAL_ALT(_ALT_O); 268 if (!(_conv_num(&bp, &tm->tm_hour, 0, 23))) 269 return (NULL); 270 break; 271 272 case 'l': /* The hour (12-hour clock representation). */ 273 _LEGAL_ALT(0); 274 /* FALLTHROUGH */ 275 case 'I': 276 _LEGAL_ALT(_ALT_O); 277 if (!(_conv_num(&bp, &tm->tm_hour, 1, 12))) 278 return (NULL); 279 break; 280 281 case 'j': /* The day of year. */ 282 _LEGAL_ALT(0); 283 if (!(_conv_num(&bp, &tm->tm_yday, 1, 366))) 284 return (NULL); 285 tm->tm_yday--; 286 fields |= FIELD_TM_YDAY; 287 break; 288 289 case 'M': /* The minute. */ 290 _LEGAL_ALT(_ALT_O); 291 if (!(_conv_num(&bp, &tm->tm_min, 0, 59))) 292 return (NULL); 293 break; 294 295 case 'm': /* The month. */ 296 _LEGAL_ALT(_ALT_O); 297 if (!(_conv_num(&bp, &tm->tm_mon, 1, 12))) 298 return (NULL); 299 tm->tm_mon--; 300 fields |= FIELD_TM_MON; 301 break; 302 303 case 'p': /* The locale's equivalent of AM/PM. */ 304 _LEGAL_ALT(0); 305 /* AM? */ 306 len = strlen(_ctloc(am_pm[0])); 307 if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) { 308 if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */ 309 return (NULL); 310 else if (tm->tm_hour == 12) 311 tm->tm_hour = 0; 312 313 bp += len; 314 break; 315 } 316 /* PM? */ 317 len = strlen(_ctloc(am_pm[1])); 318 if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) { 319 if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */ 320 return (NULL); 321 else if (tm->tm_hour < 12) 322 tm->tm_hour += 12; 323 324 bp += len; 325 break; 326 } 327 328 /* Nothing matched. */ 329 return (NULL); 330 331 case 'S': /* The seconds. */ 332 _LEGAL_ALT(_ALT_O); 333 if (!(_conv_num(&bp, &tm->tm_sec, 0, 61))) 334 return (NULL); 335 break; 336 337 case 'U': /* The week of year, beginning on sunday. */ 338 case 'W': /* The week of year, beginning on monday. */ 339 _LEGAL_ALT(_ALT_O); 340 /* 341 * XXX This is bogus, as we can not assume any valid 342 * information present in the tm structure at this 343 * point to calculate a real value, so just check the 344 * range for now. 345 */ 346 if (!(_conv_num(&bp, &i, 0, 53))) 347 return (NULL); 348 break; 349 350 case 'w': /* The day of week, beginning on sunday. */ 351 _LEGAL_ALT(_ALT_O); 352 if (!(_conv_num(&bp, &tm->tm_wday, 0, 6))) 353 return (NULL); 354 fields |= FIELD_TM_WDAY; 355 break; 356 357 case 'u': /* The day of week, monday = 1. */ 358 _LEGAL_ALT(_ALT_O); 359 if (!(_conv_num(&bp, &i, 1, 7))) 360 return (NULL); 361 tm->tm_wday = i % 7; 362 fields |= FIELD_TM_WDAY; 363 continue; 364 365 case 'g': /* The year corresponding to the ISO week 366 * number but without the century. 367 */ 368 if (!(_conv_num(&bp, &i, 0, 99))) 369 return (NULL); 370 continue; 371 372 case 'G': /* The year corresponding to the ISO week 373 * number with century. 374 */ 375 do 376 bp++; 377 while (isdigit(*bp)); 378 continue; 379 380 case 'V': /* The ISO 8601:1988 week number as decimal */ 381 if (!(_conv_num(&bp, &i, 0, 53))) 382 return (NULL); 383 continue; 384 385 case 'Y': /* The year. */ 386 _LEGAL_ALT(_ALT_E); 387 if (!(_conv_num(&bp, &i, 0, 9999))) 388 return (NULL); 389 390 relyear = -1; 391 tm->tm_year = i - TM_YEAR_BASE; 392 fields |= FIELD_TM_YEAR; 393 break; 394 395 case 'y': /* The year within the century (2 digits). */ 396 _LEGAL_ALT(_ALT_E | _ALT_O); 397 if (!(_conv_num(&bp, &relyear, 0, 99))) 398 return (NULL); 399 break; 400 401 case 'Z': 402 tzset(); 403 if (strncmp((const char *)bp, gmt, 3) == 0) { 404 tm->tm_isdst = 0; 405 #ifdef TM_GMTOFF 406 tm->TM_GMTOFF = 0; 407 #endif 408 #ifdef TM_ZONE 409 tm->TM_ZONE = gmt; 410 #endif 411 bp += 3; 412 } else if (strncmp((const char *)bp, utc, 3) == 0) { 413 tm->tm_isdst = 0; 414 #ifdef TM_GMTOFF 415 tm->TM_GMTOFF = 0; 416 #endif 417 #ifdef TM_ZONE 418 tm->TM_ZONE = utc; 419 #endif 420 bp += 3; 421 } else { 422 ep = _find_string(bp, &i, 423 (const char * const *)tzname, 424 NULL, 2); 425 if (ep == NULL) 426 return (NULL); 427 428 tm->tm_isdst = i; 429 #ifdef TM_GMTOFF 430 tm->TM_GMTOFF = -(timezone); 431 #endif 432 #ifdef TM_ZONE 433 tm->TM_ZONE = tzname[i]; 434 #endif 435 bp = ep; 436 } 437 continue; 438 439 case 'z': 440 /* 441 * We recognize all ISO 8601 formats: 442 * Z = Zulu time/UTC 443 * [+-]hhmm 444 * [+-]hh:mm 445 * [+-]hh 446 * We recognize all RFC-822/RFC-2822 formats: 447 * UT|GMT 448 * North American : UTC offsets 449 * E[DS]T = Eastern : -4 | -5 450 * C[DS]T = Central : -5 | -6 451 * M[DS]T = Mountain: -6 | -7 452 * P[DS]T = Pacific : -7 | -8 453 * Military 454 * [A-IL-M] = -1 ... -9 (J not used) 455 * [N-Y] = +1 ... +12 456 */ 457 while (isspace(*bp)) 458 bp++; 459 460 switch (*bp++) { 461 case 'G': 462 if (*bp++ != 'M') 463 return NULL; 464 /*FALLTHROUGH*/ 465 case 'U': 466 if (*bp++ != 'T') 467 return NULL; 468 /*FALLTHROUGH*/ 469 case 'Z': 470 tm->tm_isdst = 0; 471 #ifdef TM_GMTOFF 472 tm->TM_GMTOFF = 0; 473 #endif 474 #ifdef TM_ZONE 475 tm->TM_ZONE = utc; 476 #endif 477 continue; 478 case '+': 479 neg = 0; 480 break; 481 case '-': 482 neg = 1; 483 break; 484 default: 485 --bp; 486 ep = _find_string(bp, &i, nast, NULL, 4); 487 if (ep != NULL) { 488 #ifdef TM_GMTOFF 489 tm->TM_GMTOFF = -5 - i; 490 #endif 491 #ifdef TM_ZONE 492 tm->TM_ZONE = (char *)nast[i]; 493 #endif 494 bp = ep; 495 continue; 496 } 497 ep = _find_string(bp, &i, nadt, NULL, 4); 498 if (ep != NULL) { 499 tm->tm_isdst = 1; 500 #ifdef TM_GMTOFF 501 tm->TM_GMTOFF = -4 - i; 502 #endif 503 #ifdef TM_ZONE 504 tm->TM_ZONE = (char *)nadt[i]; 505 #endif 506 bp = ep; 507 continue; 508 } 509 510 if ((*bp >= 'A' && *bp <= 'I') || 511 (*bp >= 'L' && *bp <= 'Y')) { 512 #ifdef TM_GMTOFF 513 /* Argh! No 'J'! */ 514 if (*bp >= 'A' && *bp <= 'I') 515 tm->TM_GMTOFF = 516 ('A' - 1) - (int)*bp; 517 else if (*bp >= 'L' && *bp <= 'M') 518 tm->TM_GMTOFF = 'A' - (int)*bp; 519 else if (*bp >= 'N' && *bp <= 'Y') 520 tm->TM_GMTOFF = (int)*bp - 'M'; 521 #endif 522 #ifdef TM_ZONE 523 tm->TM_ZONE = NULL; /* XXX */ 524 #endif 525 bp++; 526 continue; 527 } 528 return NULL; 529 } 530 offs = 0; 531 for (i = 0; i < 4; ) { 532 if (isdigit(*bp)) { 533 offs = offs * 10 + (*bp++ - '0'); 534 i++; 535 continue; 536 } 537 if (i == 2 && *bp == ':') { 538 bp++; 539 continue; 540 } 541 break; 542 } 543 switch (i) { 544 case 2: 545 offs *= 100; 546 break; 547 case 4: 548 i = offs % 100; 549 if (i >= 60) 550 return NULL; 551 /* Convert minutes into decimal */ 552 offs = (offs / 100) * 100 + (i * 50) / 30; 553 break; 554 default: 555 return NULL; 556 } 557 if (neg) 558 offs = -offs; 559 tm->tm_isdst = 0; /* XXX */ 560 #ifdef TM_GMTOFF 561 tm->TM_GMTOFF = offs; 562 #endif 563 #ifdef TM_ZONE 564 tm->TM_ZONE = NULL; /* XXX */ 565 #endif 566 continue; 567 568 /* 569 * Miscellaneous conversions. 570 */ 571 case 'n': /* Any kind of white-space. */ 572 case 't': 573 _LEGAL_ALT(0); 574 while (isspace(*bp)) 575 bp++; 576 break; 577 578 579 default: /* Unknown/unsupported conversion. */ 580 return (NULL); 581 } 582 583 584 } 585 586 /* 587 * We need to evaluate the two digit year spec (%y) 588 * last as we can get a century spec (%C) at any time. 589 */ 590 if (relyear != -1) { 591 if (century == TM_YEAR_BASE) { 592 if (relyear <= 68) 593 tm->tm_year = relyear + 2000 - TM_YEAR_BASE; 594 else 595 tm->tm_year = relyear + 1900 - TM_YEAR_BASE; 596 } else { 597 tm->tm_year = relyear + century - TM_YEAR_BASE; 598 } 599 fields |= FIELD_TM_YEAR; 600 } 601 602 /* Compute some missing values when possible. */ 603 if (fields & FIELD_TM_YEAR) { 604 const int year = tm->tm_year + TM_YEAR_BASE; 605 const int *mon_lens = mon_lengths[isleap(year)]; 606 if (!(fields & FIELD_TM_YDAY) && 607 (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) { 608 tm->tm_yday = tm->tm_mday - 1; 609 for (i = 0; i < tm->tm_mon; i++) 610 tm->tm_yday += mon_lens[i]; 611 fields |= FIELD_TM_YDAY; 612 } 613 if (fields & FIELD_TM_YDAY) { 614 int days = tm->tm_yday; 615 if (!(fields & FIELD_TM_WDAY)) { 616 tm->tm_wday = EPOCH_WDAY + 617 ((year - EPOCH_YEAR) % DAYSPERWEEK) * 618 (DAYSPERNYEAR % DAYSPERWEEK) + 619 leaps_thru_end_of(year - 1) - 620 leaps_thru_end_of(EPOCH_YEAR - 1) + 621 tm->tm_yday; 622 tm->tm_wday %= DAYSPERWEEK; 623 if (tm->tm_wday < 0) 624 tm->tm_wday += DAYSPERWEEK; 625 } 626 if (!(fields & FIELD_TM_MON)) { 627 tm->tm_mon = 0; 628 while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon]) 629 days -= mon_lens[tm->tm_mon++]; 630 } 631 if (!(fields & FIELD_TM_MDAY)) 632 tm->tm_mday = days + 1; 633 } 634 } 635 636 return ((char *)bp); 637 } 638 639 640 static int 641 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim) 642 { 643 int result = 0; 644 int rulim = ulim; 645 646 if (**buf < '0' || **buf > '9') 647 return (0); 648 649 /* we use rulim to break out of the loop when we run out of digits */ 650 do { 651 result *= 10; 652 result += *(*buf)++ - '0'; 653 rulim /= 10; 654 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9'); 655 656 if (result < llim || result > ulim) 657 return (0); 658 659 *dest = result; 660 return (1); 661 } 662 663 static const u_char * 664 _find_string(const u_char *bp, int *tgt, const char * const *n1, 665 const char * const *n2, int c) 666 { 667 int i; 668 unsigned int len; 669 670 /* check full name - then abbreviated ones */ 671 for (; n1 != NULL; n1 = n2, n2 = NULL) { 672 for (i = 0; i < c; i++, n1++) { 673 len = strlen(*n1); 674 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 675 *tgt = i; 676 return bp + len; 677 } 678 } 679 } 680 681 /* Nothing matched */ 682 return NULL; 683 } 684 685 static int 686 leaps_thru_end_of(const int y) 687 { 688 return (y >= 0) ? (y / 4 - y / 100 + y / 400) : 689 -(leaps_thru_end_of(-(y + 1)) + 1); 690 } 691