1 /* $OpenBSD: strptime.c,v 1.31 2023/03/02 16:21:51 millert Exp $ */
2 /* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <ctype.h>
32 #include <errno.h>
33 #include <limits.h>
34 #include <locale.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <time.h>
38
39 #include "localedef.h"
40 #include "private.h"
41 #include "tzfile.h"
42
43 #define _ctloc(x) (_CurrentTimeLocale->x)
44
45 /*
46 * We do not implement alternate representations. However, we always
47 * check whether a given modifier is allowed for a certain conversion.
48 */
49 #define _ALT_E 0x01
50 #define _ALT_O 0x02
51 #define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); }
52
53 /*
54 * We keep track of some of the fields we set in order to compute missing ones.
55 */
56 #define FIELD_TM_MON (1 << 0)
57 #define FIELD_TM_MDAY (1 << 1)
58 #define FIELD_TM_WDAY (1 << 2)
59 #define FIELD_TM_YDAY (1 << 3)
60 #define FIELD_TM_YEAR (1 << 4)
61
62 static char gmt[] = { "GMT" };
63 static char utc[] = { "UTC" };
64 /* RFC-822/RFC-2822 */
65 static const char * const nast[5] = {
66 "EST", "CST", "MST", "PST", "\0\0\0"
67 };
68 static const char * const nadt[5] = {
69 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
70 };
71
72 static const int mon_lengths[2][MONSPERYEAR] = {
73 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
74 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
75 };
76
77 static int _conv_num(const unsigned char **, int *, int, int);
78 static int epoch_to_tm(const unsigned char **, struct tm *);
79 static int leaps_thru_end_of(const int y);
80 static char *_strptime(const char *, const char *, struct tm *, int);
81 static const u_char *_find_string(const u_char *, int *, const char * const *,
82 const char * const *, int);
83
84
85 char *
strptime(const char * buf,const char * fmt,struct tm * tm)86 strptime(const char *buf, const char *fmt, struct tm *tm)
87 {
88 return(_strptime(buf, fmt, tm, 1));
89 }
90 DEF_WEAK(strptime);
91
92 static char *
_strptime(const char * buf,const char * fmt,struct tm * tm,int initialize)93 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
94 {
95 unsigned char c;
96 const unsigned char *bp, *ep;
97 size_t len;
98 int alt_format, i, offs;
99 int neg = 0;
100 static int century, relyear, fields;
101
102 if (initialize) {
103 century = TM_YEAR_BASE;
104 relyear = -1;
105 fields = 0;
106 }
107
108 bp = (const unsigned char *)buf;
109 while ((c = *fmt) != '\0') {
110 /* Clear `alternate' modifier prior to new conversion. */
111 alt_format = 0;
112
113 /* Eat up white-space. */
114 if (isspace(c)) {
115 while (isspace(*bp))
116 bp++;
117
118 fmt++;
119 continue;
120 }
121
122 if ((c = *fmt++) != '%')
123 goto literal;
124
125
126 again: switch (c = *fmt++) {
127 case '%': /* "%%" is converted to "%". */
128 literal:
129 if (c != *bp++)
130 return (NULL);
131
132 break;
133
134 /*
135 * "Alternative" modifiers. Just set the appropriate flag
136 * and start over again.
137 */
138 case 'E': /* "%E?" alternative conversion modifier. */
139 _LEGAL_ALT(0);
140 alt_format |= _ALT_E;
141 goto again;
142
143 case 'O': /* "%O?" alternative conversion modifier. */
144 _LEGAL_ALT(0);
145 alt_format |= _ALT_O;
146 goto again;
147
148 /*
149 * "Complex" conversion rules, implemented through recursion.
150 */
151 case 'c': /* Date and time, using the locale's format. */
152 _LEGAL_ALT(_ALT_E);
153 if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
154 return (NULL);
155 break;
156
157 case 'D': /* The date as "%m/%d/%y". */
158 _LEGAL_ALT(0);
159 if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
160 return (NULL);
161 break;
162
163 case 'F': /* The date as "%Y-%m-%d". */
164 _LEGAL_ALT(0);
165 if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
166 return (NULL);
167 continue;
168
169 case 'R': /* The time as "%H:%M". */
170 _LEGAL_ALT(0);
171 if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
172 return (NULL);
173 break;
174
175 case 'r': /* The time as "%I:%M:%S %p". */
176 _LEGAL_ALT(0);
177 if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
178 return (NULL);
179 break;
180
181 case 'T': /* The time as "%H:%M:%S". */
182 _LEGAL_ALT(0);
183 if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
184 return (NULL);
185 break;
186
187 case 'X': /* The time, using the locale's format. */
188 _LEGAL_ALT(_ALT_E);
189 if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
190 return (NULL);
191 break;
192
193 case 'x': /* The date, using the locale's format. */
194 _LEGAL_ALT(_ALT_E);
195 if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
196 return (NULL);
197 break;
198
199 /*
200 * "Elementary" conversion rules.
201 */
202 case 'A': /* The day of week, using the locale's form. */
203 case 'a':
204 _LEGAL_ALT(0);
205 for (i = 0; i < 7; i++) {
206 /* Full name. */
207 len = strlen(_ctloc(day[i]));
208 if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
209 break;
210
211 /* Abbreviated name. */
212 len = strlen(_ctloc(abday[i]));
213 if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
214 break;
215 }
216
217 /* Nothing matched. */
218 if (i == 7)
219 return (NULL);
220
221 tm->tm_wday = i;
222 bp += len;
223 fields |= FIELD_TM_WDAY;
224 break;
225
226 case 'B': /* The month, using the locale's form. */
227 case 'b':
228 case 'h':
229 _LEGAL_ALT(0);
230 for (i = 0; i < 12; i++) {
231 /* Full name. */
232 len = strlen(_ctloc(mon[i]));
233 if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
234 break;
235
236 /* Abbreviated name. */
237 len = strlen(_ctloc(abmon[i]));
238 if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
239 break;
240 }
241
242 /* Nothing matched. */
243 if (i == 12)
244 return (NULL);
245
246 tm->tm_mon = i;
247 bp += len;
248 fields |= FIELD_TM_MON;
249 break;
250
251 case 'C': /* The century number. */
252 _LEGAL_ALT(_ALT_E);
253 if (!(_conv_num(&bp, &i, 0, 99)))
254 return (NULL);
255
256 century = i * 100;
257 break;
258
259 case 'e': /* The day of month. */
260 if (isspace(*bp))
261 bp++;
262 /* FALLTHROUGH */
263 case 'd':
264 _LEGAL_ALT(_ALT_O);
265 if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
266 return (NULL);
267 fields |= FIELD_TM_MDAY;
268 break;
269
270 case 'k': /* The hour (24-hour clock representation). */
271 _LEGAL_ALT(0);
272 /* FALLTHROUGH */
273 case 'H':
274 _LEGAL_ALT(_ALT_O);
275 if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
276 return (NULL);
277 break;
278
279 case 'l': /* The hour (12-hour clock representation). */
280 _LEGAL_ALT(0);
281 /* FALLTHROUGH */
282 case 'I':
283 _LEGAL_ALT(_ALT_O);
284 if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
285 return (NULL);
286 break;
287
288 case 'j': /* The day of year. */
289 _LEGAL_ALT(0);
290 if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
291 return (NULL);
292 tm->tm_yday--;
293 fields |= FIELD_TM_YDAY;
294 break;
295
296 case 'M': /* The minute. */
297 _LEGAL_ALT(_ALT_O);
298 if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
299 return (NULL);
300 break;
301
302 case 'm': /* The month. */
303 _LEGAL_ALT(_ALT_O);
304 if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
305 return (NULL);
306 tm->tm_mon--;
307 fields |= FIELD_TM_MON;
308 break;
309
310 case 'p': /* The locale's equivalent of AM/PM. */
311 _LEGAL_ALT(0);
312 /* AM? */
313 len = strlen(_ctloc(am_pm[0]));
314 if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
315 if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */
316 return (NULL);
317 else if (tm->tm_hour == 12)
318 tm->tm_hour = 0;
319
320 bp += len;
321 break;
322 }
323 /* PM? */
324 len = strlen(_ctloc(am_pm[1]));
325 if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
326 if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */
327 return (NULL);
328 else if (tm->tm_hour < 12)
329 tm->tm_hour += 12;
330
331 bp += len;
332 break;
333 }
334
335 /* Nothing matched. */
336 return (NULL);
337
338 case 'S': /* The seconds. */
339 _LEGAL_ALT(_ALT_O);
340 if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
341 return (NULL);
342 break;
343 case 's': /* Seconds since epoch. */
344 if (!(epoch_to_tm(&bp, tm)))
345 return (NULL);
346 fields = 0xffff; /* everything */
347 break;
348 case 'U': /* The week of year, beginning on sunday. */
349 case 'W': /* The week of year, beginning on monday. */
350 _LEGAL_ALT(_ALT_O);
351 /*
352 * XXX This is bogus, as we can not assume any valid
353 * information present in the tm structure at this
354 * point to calculate a real value, so just check the
355 * range for now.
356 */
357 if (!(_conv_num(&bp, &i, 0, 53)))
358 return (NULL);
359 break;
360
361 case 'w': /* The day of week, beginning on sunday. */
362 _LEGAL_ALT(_ALT_O);
363 if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
364 return (NULL);
365 fields |= FIELD_TM_WDAY;
366 break;
367
368 case 'u': /* The day of week, monday = 1. */
369 _LEGAL_ALT(_ALT_O);
370 if (!(_conv_num(&bp, &i, 1, 7)))
371 return (NULL);
372 tm->tm_wday = i % 7;
373 fields |= FIELD_TM_WDAY;
374 continue;
375
376 case 'g': /* The year corresponding to the ISO week
377 * number but without the century.
378 */
379 if (!(_conv_num(&bp, &i, 0, 99)))
380 return (NULL);
381 continue;
382
383 case 'G': /* The year corresponding to the ISO week
384 * number with century.
385 */
386 do
387 bp++;
388 while (isdigit(*bp));
389 continue;
390
391 case 'V': /* The ISO 8601:1988 week number as decimal */
392 if (!(_conv_num(&bp, &i, 0, 53)))
393 return (NULL);
394 continue;
395
396 case 'Y': /* The year. */
397 _LEGAL_ALT(_ALT_E);
398 if (!(_conv_num(&bp, &i, 0, 9999)))
399 return (NULL);
400
401 relyear = -1;
402 tm->tm_year = i - TM_YEAR_BASE;
403 fields |= FIELD_TM_YEAR;
404 break;
405
406 case 'y': /* The year within the century (2 digits). */
407 _LEGAL_ALT(_ALT_E | _ALT_O);
408 if (!(_conv_num(&bp, &relyear, 0, 99)))
409 return (NULL);
410 break;
411
412 case 'Z':
413 tzset();
414 if (strncmp((const char *)bp, gmt, 3) == 0) {
415 tm->tm_isdst = 0;
416 tm->tm_gmtoff = 0;
417 tm->tm_zone = gmt;
418 bp += 3;
419 } else if (strncmp((const char *)bp, utc, 3) == 0) {
420 tm->tm_isdst = 0;
421 tm->tm_gmtoff = 0;
422 tm->tm_zone = utc;
423 bp += 3;
424 } else {
425 ep = _find_string(bp, &i,
426 (const char * const *)tzname,
427 NULL, 2);
428 if (ep == NULL)
429 return (NULL);
430
431 tm->tm_isdst = i;
432 tm->tm_gmtoff = -(timezone);
433 tm->tm_zone = tzname[i];
434 bp = ep;
435 }
436 continue;
437
438 case 'z':
439 /*
440 * We recognize all ISO 8601 formats:
441 * Z = Zulu time/UTC
442 * [+-]hhmm
443 * [+-]hh:mm
444 * [+-]hh
445 * We recognize all RFC-822/RFC-2822 formats:
446 * UT|GMT
447 * North American : UTC offsets
448 * E[DS]T = Eastern : -4 | -5
449 * C[DS]T = Central : -5 | -6
450 * M[DS]T = Mountain: -6 | -7
451 * P[DS]T = Pacific : -7 | -8
452 */
453 while (isspace(*bp))
454 bp++;
455
456 switch (*bp++) {
457 case 'G':
458 if (*bp++ != 'M')
459 return NULL;
460 /*FALLTHROUGH*/
461 case 'U':
462 if (*bp++ != 'T')
463 return NULL;
464 /*FALLTHROUGH*/
465 case 'Z':
466 tm->tm_isdst = 0;
467 tm->tm_gmtoff = 0;
468 tm->tm_zone = utc;
469 continue;
470 case '+':
471 neg = 0;
472 break;
473 case '-':
474 neg = 1;
475 break;
476 default:
477 --bp;
478 ep = _find_string(bp, &i, nast, NULL, 4);
479 if (ep != NULL) {
480 tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
481 tm->tm_zone = (char *)nast[i];
482 bp = ep;
483 continue;
484 }
485 ep = _find_string(bp, &i, nadt, NULL, 4);
486 if (ep != NULL) {
487 tm->tm_isdst = 1;
488 tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
489 tm->tm_zone = (char *)nadt[i];
490 bp = ep;
491 continue;
492 }
493 return NULL;
494 }
495 if (!isdigit(bp[0]) || !isdigit(bp[1]))
496 return NULL;
497 offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
498 bp += 2;
499 if (*bp == ':')
500 bp++;
501 if (isdigit(*bp)) {
502 offs += (*bp++ - '0') * 10 * SECSPERMIN;
503 if (!isdigit(*bp))
504 return NULL;
505 offs += (*bp++ - '0') * SECSPERMIN;
506 }
507 if (neg)
508 offs = -offs;
509 tm->tm_isdst = 0; /* XXX */
510 tm->tm_gmtoff = offs;
511 tm->tm_zone = NULL; /* XXX */
512 continue;
513
514 /*
515 * Miscellaneous conversions.
516 */
517 case 'n': /* Any kind of white-space. */
518 case 't':
519 _LEGAL_ALT(0);
520 while (isspace(*bp))
521 bp++;
522 break;
523
524
525 default: /* Unknown/unsupported conversion. */
526 return (NULL);
527 }
528
529
530 }
531
532 /*
533 * We need to evaluate the two digit year spec (%y)
534 * last as we can get a century spec (%C) at any time.
535 */
536 if (relyear != -1) {
537 if (century == TM_YEAR_BASE) {
538 if (relyear <= 68)
539 tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
540 else
541 tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
542 } else {
543 tm->tm_year = relyear + century - TM_YEAR_BASE;
544 }
545 fields |= FIELD_TM_YEAR;
546 }
547
548 /* Compute some missing values when possible. */
549 if (fields & FIELD_TM_YEAR) {
550 const int year = tm->tm_year + TM_YEAR_BASE;
551 const int *mon_lens = mon_lengths[isleap(year)];
552 if (!(fields & FIELD_TM_YDAY) &&
553 (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
554 tm->tm_yday = tm->tm_mday - 1;
555 for (i = 0; i < tm->tm_mon; i++)
556 tm->tm_yday += mon_lens[i];
557 fields |= FIELD_TM_YDAY;
558 }
559 if (fields & FIELD_TM_YDAY) {
560 int days = tm->tm_yday;
561 if (!(fields & FIELD_TM_WDAY)) {
562 tm->tm_wday = EPOCH_WDAY +
563 ((year - EPOCH_YEAR) % DAYSPERWEEK) *
564 (DAYSPERNYEAR % DAYSPERWEEK) +
565 leaps_thru_end_of(year - 1) -
566 leaps_thru_end_of(EPOCH_YEAR - 1) +
567 tm->tm_yday;
568 tm->tm_wday %= DAYSPERWEEK;
569 if (tm->tm_wday < 0)
570 tm->tm_wday += DAYSPERWEEK;
571 }
572 if (!(fields & FIELD_TM_MON)) {
573 tm->tm_mon = 0;
574 while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
575 days -= mon_lens[tm->tm_mon++];
576 }
577 if (!(fields & FIELD_TM_MDAY))
578 tm->tm_mday = days + 1;
579 }
580 }
581
582 return ((char *)bp);
583 }
584
585
586 static int
_conv_num(const unsigned char ** buf,int * dest,int llim,int ulim)587 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
588 {
589 int result = 0;
590 int rulim = ulim;
591
592 if (**buf < '0' || **buf > '9')
593 return (0);
594
595 /* we use rulim to break out of the loop when we run out of digits */
596 do {
597 result *= 10;
598 result += *(*buf)++ - '0';
599 rulim /= 10;
600 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
601
602 if (result < llim || result > ulim)
603 return (0);
604
605 *dest = result;
606 return (1);
607 }
608
609 static int
epoch_to_tm(const unsigned char ** buf,struct tm * tm)610 epoch_to_tm(const unsigned char **buf, struct tm *tm)
611 {
612 int saved_errno = errno;
613 int ret = 0;
614 time_t secs;
615 char *ep;
616
617 errno = 0;
618 secs = strtoll(*buf, &ep, 10);
619 if (*buf == (unsigned char *)ep)
620 goto done;
621 if (secs < 0 ||
622 secs == LLONG_MAX && errno == ERANGE)
623 goto done;
624 if (localtime_r(&secs, tm) == NULL)
625 goto done;
626 ret = 1;
627 done:
628 *buf = ep;
629 errno = saved_errno;
630 return (ret);
631 }
632
633 static const u_char *
_find_string(const u_char * bp,int * tgt,const char * const * n1,const char * const * n2,int c)634 _find_string(const u_char *bp, int *tgt, const char * const *n1,
635 const char * const *n2, int c)
636 {
637 int i;
638 unsigned int len;
639
640 /* check full name - then abbreviated ones */
641 for (; n1 != NULL; n1 = n2, n2 = NULL) {
642 for (i = 0; i < c; i++, n1++) {
643 len = strlen(*n1);
644 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
645 *tgt = i;
646 return bp + len;
647 }
648 }
649 }
650
651 /* Nothing matched */
652 return NULL;
653 }
654
655 static int
leaps_thru_end_of(const int y)656 leaps_thru_end_of(const int y)
657 {
658 return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
659 -(leaps_thru_end_of(-(y + 1)) + 1);
660 }
661