1 /* $NetBSD: strptime.c,v 1.38 2013/05/17 12:55:57 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code was contributed to The NetBSD Foundation by Klaus Klein. 8 * Heavily optimised by David Laight 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #if defined(LIBC_SCCS) && !defined(lint) 34 __RCSID("$NetBSD: strptime.c,v 1.38 2013/05/17 12:55:57 joerg Exp $"); 35 #endif 36 37 #include "namespace.h" 38 #include <sys/localedef.h> 39 #include <ctype.h> 40 #include <locale.h> 41 #include <string.h> 42 #include <time.h> 43 #include <tzfile.h> 44 #include "private.h" 45 #include "setlocale_local.h" 46 47 #ifdef __weak_alias 48 __weak_alias(strptime,_strptime) 49 __weak_alias(strptime_l, _strptime_l) 50 #endif 51 52 #define _TIME_LOCALE(loc) \ 53 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME])) 54 55 /* 56 * We do not implement alternate representations. However, we always 57 * check whether a given modifier is allowed for a certain conversion. 58 */ 59 #define ALT_E 0x01 60 #define ALT_O 0x02 61 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } 62 63 static char gmt[] = { "GMT" }; 64 static char utc[] = { "UTC" }; 65 /* RFC-822/RFC-2822 */ 66 static const char * const nast[5] = { 67 "EST", "CST", "MST", "PST", "\0\0\0" 68 }; 69 static const char * const nadt[5] = { 70 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 71 }; 72 73 static const u_char *conv_num(const unsigned char *, int *, uint, uint); 74 static const u_char *find_string(const u_char *, int *, const char * const *, 75 const char * const *, int); 76 77 char * 78 strptime(const char *buf, const char *fmt, struct tm *tm) 79 { 80 return strptime_l(buf, fmt, tm, _current_locale()); 81 } 82 83 char * 84 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc) 85 { 86 unsigned char c; 87 const unsigned char *bp, *ep; 88 int alt_format, i, split_year = 0, neg = 0, offs; 89 const char *new_fmt; 90 91 bp = (const u_char *)buf; 92 93 while (bp != NULL && (c = *fmt++) != '\0') { 94 /* Clear `alternate' modifier prior to new conversion. */ 95 alt_format = 0; 96 i = 0; 97 98 /* Eat up white-space. */ 99 if (isspace(c)) { 100 while (isspace(*bp)) 101 bp++; 102 continue; 103 } 104 105 if (c != '%') 106 goto literal; 107 108 109 again: switch (c = *fmt++) { 110 case '%': /* "%%" is converted to "%". */ 111 literal: 112 if (c != *bp++) 113 return NULL; 114 LEGAL_ALT(0); 115 continue; 116 117 /* 118 * "Alternative" modifiers. Just set the appropriate flag 119 * and start over again. 120 */ 121 case 'E': /* "%E?" alternative conversion modifier. */ 122 LEGAL_ALT(0); 123 alt_format |= ALT_E; 124 goto again; 125 126 case 'O': /* "%O?" alternative conversion modifier. */ 127 LEGAL_ALT(0); 128 alt_format |= ALT_O; 129 goto again; 130 131 /* 132 * "Complex" conversion rules, implemented through recursion. 133 */ 134 case 'c': /* Date and time, using the locale's format. */ 135 new_fmt = _TIME_LOCALE(loc)->d_t_fmt; 136 goto recurse; 137 138 case 'D': /* The date as "%m/%d/%y". */ 139 new_fmt = "%m/%d/%y"; 140 LEGAL_ALT(0); 141 goto recurse; 142 143 case 'F': /* The date as "%Y-%m-%d". */ 144 new_fmt = "%Y-%m-%d"; 145 LEGAL_ALT(0); 146 goto recurse; 147 148 case 'R': /* The time as "%H:%M". */ 149 new_fmt = "%H:%M"; 150 LEGAL_ALT(0); 151 goto recurse; 152 153 case 'r': /* The time in 12-hour clock representation. */ 154 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm; 155 LEGAL_ALT(0); 156 goto recurse; 157 158 case 'T': /* The time as "%H:%M:%S". */ 159 new_fmt = "%H:%M:%S"; 160 LEGAL_ALT(0); 161 goto recurse; 162 163 case 'X': /* The time, using the locale's format. */ 164 new_fmt = _TIME_LOCALE(loc)->t_fmt; 165 goto recurse; 166 167 case 'x': /* The date, using the locale's format. */ 168 new_fmt = _TIME_LOCALE(loc)->d_fmt; 169 recurse: 170 bp = (const u_char *)strptime((const char *)bp, 171 new_fmt, tm); 172 LEGAL_ALT(ALT_E); 173 continue; 174 175 /* 176 * "Elementary" conversion rules. 177 */ 178 case 'A': /* The day of week, using the locale's form. */ 179 case 'a': 180 bp = find_string(bp, &tm->tm_wday, 181 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7); 182 LEGAL_ALT(0); 183 continue; 184 185 case 'B': /* The month, using the locale's form. */ 186 case 'b': 187 case 'h': 188 bp = find_string(bp, &tm->tm_mon, 189 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon, 190 12); 191 LEGAL_ALT(0); 192 continue; 193 194 case 'C': /* The century number. */ 195 i = 20; 196 bp = conv_num(bp, &i, 0, 99); 197 198 i = i * 100 - TM_YEAR_BASE; 199 if (split_year) 200 i += tm->tm_year % 100; 201 split_year = 1; 202 tm->tm_year = i; 203 LEGAL_ALT(ALT_E); 204 continue; 205 206 case 'd': /* The day of month. */ 207 case 'e': 208 bp = conv_num(bp, &tm->tm_mday, 1, 31); 209 LEGAL_ALT(ALT_O); 210 continue; 211 212 case 'k': /* The hour (24-hour clock representation). */ 213 LEGAL_ALT(0); 214 /* FALLTHROUGH */ 215 case 'H': 216 bp = conv_num(bp, &tm->tm_hour, 0, 23); 217 LEGAL_ALT(ALT_O); 218 continue; 219 220 case 'l': /* The hour (12-hour clock representation). */ 221 LEGAL_ALT(0); 222 /* FALLTHROUGH */ 223 case 'I': 224 bp = conv_num(bp, &tm->tm_hour, 1, 12); 225 if (tm->tm_hour == 12) 226 tm->tm_hour = 0; 227 LEGAL_ALT(ALT_O); 228 continue; 229 230 case 'j': /* The day of year. */ 231 i = 1; 232 bp = conv_num(bp, &i, 1, 366); 233 tm->tm_yday = i - 1; 234 LEGAL_ALT(0); 235 continue; 236 237 case 'M': /* The minute. */ 238 bp = conv_num(bp, &tm->tm_min, 0, 59); 239 LEGAL_ALT(ALT_O); 240 continue; 241 242 case 'm': /* The month. */ 243 i = 1; 244 bp = conv_num(bp, &i, 1, 12); 245 tm->tm_mon = i - 1; 246 LEGAL_ALT(ALT_O); 247 continue; 248 249 case 'p': /* The locale's equivalent of AM/PM. */ 250 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm, 251 NULL, 2); 252 if (tm->tm_hour > 11) 253 return NULL; 254 tm->tm_hour += i * 12; 255 LEGAL_ALT(0); 256 continue; 257 258 case 'S': /* The seconds. */ 259 bp = conv_num(bp, &tm->tm_sec, 0, 61); 260 LEGAL_ALT(ALT_O); 261 continue; 262 263 #ifndef TIME_MAX 264 #define TIME_MAX INT64_MAX 265 #endif 266 case 's': /* seconds since the epoch */ 267 { 268 time_t sse = 0; 269 uint64_t rulim = TIME_MAX; 270 271 if (*bp < '0' || *bp > '9') { 272 bp = NULL; 273 continue; 274 } 275 276 do { 277 sse *= 10; 278 sse += *bp++ - '0'; 279 rulim /= 10; 280 } while ((sse * 10 <= TIME_MAX) && 281 rulim && *bp >= '0' && *bp <= '9'); 282 283 if (sse < 0 || (uint64_t)sse > TIME_MAX) { 284 bp = NULL; 285 continue; 286 } 287 288 if (localtime_r(&sse, tm) == NULL) 289 bp = NULL; 290 } 291 continue; 292 293 case 'U': /* The week of year, beginning on sunday. */ 294 case 'W': /* The week of year, beginning on monday. */ 295 /* 296 * XXX This is bogus, as we can not assume any valid 297 * information present in the tm structure at this 298 * point to calculate a real value, so just check the 299 * range for now. 300 */ 301 bp = conv_num(bp, &i, 0, 53); 302 LEGAL_ALT(ALT_O); 303 continue; 304 305 case 'w': /* The day of week, beginning on sunday. */ 306 bp = conv_num(bp, &tm->tm_wday, 0, 6); 307 LEGAL_ALT(ALT_O); 308 continue; 309 310 case 'u': /* The day of week, monday = 1. */ 311 bp = conv_num(bp, &i, 1, 7); 312 tm->tm_wday = i % 7; 313 LEGAL_ALT(ALT_O); 314 continue; 315 316 case 'g': /* The year corresponding to the ISO week 317 * number but without the century. 318 */ 319 bp = conv_num(bp, &i, 0, 99); 320 continue; 321 322 case 'G': /* The year corresponding to the ISO week 323 * number with century. 324 */ 325 do 326 bp++; 327 while (isdigit(*bp)); 328 continue; 329 330 case 'V': /* The ISO 8601:1988 week number as decimal */ 331 bp = conv_num(bp, &i, 0, 53); 332 continue; 333 334 case 'Y': /* The year. */ 335 i = TM_YEAR_BASE; /* just for data sanity... */ 336 bp = conv_num(bp, &i, 0, 9999); 337 tm->tm_year = i - TM_YEAR_BASE; 338 LEGAL_ALT(ALT_E); 339 continue; 340 341 case 'y': /* The year within 100 years of the epoch. */ 342 /* LEGAL_ALT(ALT_E | ALT_O); */ 343 bp = conv_num(bp, &i, 0, 99); 344 345 if (split_year) 346 /* preserve century */ 347 i += (tm->tm_year / 100) * 100; 348 else { 349 split_year = 1; 350 if (i <= 68) 351 i = i + 2000 - TM_YEAR_BASE; 352 else 353 i = i + 1900 - TM_YEAR_BASE; 354 } 355 tm->tm_year = i; 356 continue; 357 358 case 'Z': 359 tzset(); 360 if (strncmp((const char *)bp, gmt, 3) == 0) { 361 tm->tm_isdst = 0; 362 #ifdef TM_GMTOFF 363 tm->TM_GMTOFF = 0; 364 #endif 365 #ifdef TM_ZONE 366 tm->TM_ZONE = gmt; 367 #endif 368 bp += 3; 369 } else { 370 ep = find_string(bp, &i, 371 (const char * const *)tzname, 372 NULL, 2); 373 if (ep != NULL) { 374 tm->tm_isdst = i; 375 #ifdef TM_GMTOFF 376 tm->TM_GMTOFF = -(timezone); 377 #endif 378 #ifdef TM_ZONE 379 tm->TM_ZONE = tzname[i]; 380 #endif 381 } 382 bp = ep; 383 } 384 continue; 385 386 case 'z': 387 /* 388 * We recognize all ISO 8601 formats: 389 * Z = Zulu time/UTC 390 * [+-]hhmm 391 * [+-]hh:mm 392 * [+-]hh 393 * We recognize all RFC-822/RFC-2822 formats: 394 * UT|GMT 395 * North American : UTC offsets 396 * E[DS]T = Eastern : -4 | -5 397 * C[DS]T = Central : -5 | -6 398 * M[DS]T = Mountain: -6 | -7 399 * P[DS]T = Pacific : -7 | -8 400 * Military 401 * [A-IL-M] = -1 ... -9 (J not used) 402 * [N-Y] = +1 ... +12 403 */ 404 while (isspace(*bp)) 405 bp++; 406 407 switch (*bp++) { 408 case 'G': 409 if (*bp++ != 'M') 410 return NULL; 411 /*FALLTHROUGH*/ 412 case 'U': 413 if (*bp++ != 'T') 414 return NULL; 415 /*FALLTHROUGH*/ 416 case 'Z': 417 tm->tm_isdst = 0; 418 #ifdef TM_GMTOFF 419 tm->TM_GMTOFF = 0; 420 #endif 421 #ifdef TM_ZONE 422 tm->TM_ZONE = utc; 423 #endif 424 continue; 425 case '+': 426 neg = 0; 427 break; 428 case '-': 429 neg = 1; 430 break; 431 default: 432 --bp; 433 ep = find_string(bp, &i, nast, NULL, 4); 434 if (ep != NULL) { 435 #ifdef TM_GMTOFF 436 tm->TM_GMTOFF = -5 - i; 437 #endif 438 #ifdef TM_ZONE 439 tm->TM_ZONE = __UNCONST(nast[i]); 440 #endif 441 bp = ep; 442 continue; 443 } 444 ep = find_string(bp, &i, nadt, NULL, 4); 445 if (ep != NULL) { 446 tm->tm_isdst = 1; 447 #ifdef TM_GMTOFF 448 tm->TM_GMTOFF = -4 - i; 449 #endif 450 #ifdef TM_ZONE 451 tm->TM_ZONE = __UNCONST(nadt[i]); 452 #endif 453 bp = ep; 454 continue; 455 } 456 457 if ((*bp >= 'A' && *bp <= 'I') || 458 (*bp >= 'L' && *bp <= 'Y')) { 459 #ifdef TM_GMTOFF 460 /* Argh! No 'J'! */ 461 if (*bp >= 'A' && *bp <= 'I') 462 tm->TM_GMTOFF = 463 ('A' - 1) - (int)*bp; 464 else if (*bp >= 'L' && *bp <= 'M') 465 tm->TM_GMTOFF = 'A' - (int)*bp; 466 else if (*bp >= 'N' && *bp <= 'Y') 467 tm->TM_GMTOFF = (int)*bp - 'M'; 468 #endif 469 #ifdef TM_ZONE 470 tm->TM_ZONE = NULL; /* XXX */ 471 #endif 472 bp++; 473 continue; 474 } 475 return NULL; 476 } 477 offs = 0; 478 for (i = 0; i < 4; ) { 479 if (isdigit(*bp)) { 480 offs = offs * 10 + (*bp++ - '0'); 481 i++; 482 continue; 483 } 484 if (i == 2 && *bp == ':') { 485 bp++; 486 continue; 487 } 488 break; 489 } 490 switch (i) { 491 case 2: 492 offs *= 100; 493 break; 494 case 4: 495 i = offs % 100; 496 if (i >= 60) 497 return NULL; 498 /* Convert minutes into decimal */ 499 offs = (offs / 100) * 100 + (i * 50) / 30; 500 break; 501 default: 502 return NULL; 503 } 504 if (neg) 505 offs = -offs; 506 tm->tm_isdst = 0; /* XXX */ 507 #ifdef TM_GMTOFF 508 tm->TM_GMTOFF = offs; 509 #endif 510 #ifdef TM_ZONE 511 tm->TM_ZONE = NULL; /* XXX */ 512 #endif 513 continue; 514 515 /* 516 * Miscellaneous conversions. 517 */ 518 case 'n': /* Any kind of white-space. */ 519 case 't': 520 while (isspace(*bp)) 521 bp++; 522 LEGAL_ALT(0); 523 continue; 524 525 526 default: /* Unknown/unsupported conversion. */ 527 return NULL; 528 } 529 } 530 531 return __UNCONST(bp); 532 } 533 534 535 static const u_char * 536 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) 537 { 538 uint result = 0; 539 unsigned char ch; 540 541 /* The limit also determines the number of valid digits. */ 542 uint rulim = ulim; 543 544 ch = *buf; 545 if (ch < '0' || ch > '9') 546 return NULL; 547 548 do { 549 result *= 10; 550 result += ch - '0'; 551 rulim /= 10; 552 ch = *++buf; 553 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); 554 555 if (result < llim || result > ulim) 556 return NULL; 557 558 *dest = result; 559 return buf; 560 } 561 562 static const u_char * 563 find_string(const u_char *bp, int *tgt, const char * const *n1, 564 const char * const *n2, int c) 565 { 566 int i; 567 size_t len; 568 569 /* check full name - then abbreviated ones */ 570 for (; n1 != NULL; n1 = n2, n2 = NULL) { 571 for (i = 0; i < c; i++, n1++) { 572 len = strlen(*n1); 573 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 574 *tgt = i; 575 return bp + len; 576 } 577 } 578 } 579 580 /* Nothing matched */ 581 return NULL; 582 } 583