xref: /dragonfly/lib/libc/stdtime/strptime.c (revision f2a91d31)
1 /*-
2  * Copyright (c) 2014 Gary Mills
3  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer
18  *    in the documentation and/or other materials provided with the
19  *    distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
30  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * The views and conclusions contained in the software and documentation
34  * are those of the authors and should not be interpreted as representing
35  * official policies, either expressed or implied, of Powerdog Industries.
36  *
37  * @(#)strptime.c	0.1 (Powerdog) 94/03/27
38  * @(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.
39  * $FreeBSD: head/lib/libc/stdtime/strptime.c 272679 2014-10-07 06:34:05Z ache $
40  */
41 
42 #include "namespace.h"
43 #include <time.h>
44 #include <ctype.h>
45 #include <errno.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <pthread.h>
49 #include "un-namespace.h"
50 #include "libc_private.h"
51 #include "timelocal.h"
52 #include "tzfile.h"
53 
54 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
55 
56 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
57 
58 #define	FLAG_NONE	(1 << 0)
59 #define	FLAG_YEAR	(1 << 1)
60 #define	FLAG_MONTH	(1 << 2)
61 #define	FLAG_YDAY	(1 << 3)
62 #define	FLAG_MDAY	(1 << 4)
63 #define	FLAG_WDAY	(1 << 5)
64 
65 /*
66  * Calculate the week day of the first day of a year. Valid for
67  * the Gregorian calendar, which began Sept 14, 1752 in the UK
68  * and its colonies. Ref:
69  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
70  */
71 
72 static int
73 first_wday_of(int year)
74 {
75 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
76 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
77 }
78 
79 static char *
80 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
81 		locale_t locale)
82 {
83 	char	c;
84 	const char *ptr;
85 	int	day_offset = -1, wday_offset;
86 	int week_offset;
87 	int	i, len;
88 	int flags;
89 	int Ealternative, Oalternative;
90 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
91 	static int start_of_month[2][13] = {
92 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
93 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
94 	};
95 
96 	flags = FLAG_NONE;
97 
98 	ptr = fmt;
99 	while (*ptr != 0) {
100 		c = *ptr++;
101 
102 		if (c != '%') {
103 			if (isspace_l((unsigned char)c, locale))
104 				while (*buf != 0 &&
105 				       isspace_l((unsigned char)*buf, locale))
106 					buf++;
107 			else if (c != *buf++)
108 				return (NULL);
109 			continue;
110 		}
111 
112 		Ealternative = 0;
113 		Oalternative = 0;
114 label:
115 		c = *ptr++;
116 		switch (c) {
117 		case '%':
118 			if (*buf++ != '%')
119 				return (NULL);
120 			break;
121 
122 		case '+':
123 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
124 			if (buf == NULL)
125 				return (NULL);
126 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
127 			break;
128 
129 		case 'C':
130 			if (!isdigit_l((unsigned char)*buf, locale))
131 				return (NULL);
132 
133 			/* XXX This will break for 3-digit centuries. */
134 			len = 2;
135 			for (i = 0; len && *buf != 0 &&
136 			     isdigit_l((unsigned char)*buf, locale); buf++) {
137 				i *= 10;
138 				i += *buf - '0';
139 				len--;
140 			}
141 			if (i < 19)
142 				return (NULL);
143 
144 			tm->tm_year = i * 100 - TM_YEAR_BASE;
145 			flags |= FLAG_YEAR;
146 
147 			break;
148 
149 		case 'c':
150 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
151 			if (buf == NULL)
152 				return (NULL);
153 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
154 			break;
155 
156 		case 'D':
157 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
158 			if (buf == NULL)
159 				return (NULL);
160 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
161 			break;
162 
163 		case 'E':
164 			if (Ealternative || Oalternative)
165 				break;
166 			Ealternative++;
167 			goto label;
168 
169 		case 'O':
170 			if (Ealternative || Oalternative)
171 				break;
172 			Oalternative++;
173 			goto label;
174 
175 		case 'F':
176 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
177 			if (buf == NULL)
178 				return (NULL);
179 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
180 			break;
181 
182 		case 'R':
183 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
184 			if (buf == NULL)
185 				return (NULL);
186 			break;
187 
188 		case 'r':
189 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
190 			if (buf == NULL)
191 				return (NULL);
192 			break;
193 
194 		case 'T':
195 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
196 			if (buf == NULL)
197 				return (NULL);
198 			break;
199 
200 		case 'X':
201 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
202 			if (buf == NULL)
203 				return (NULL);
204 			break;
205 
206 		case 'x':
207 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
208 			if (buf == NULL)
209 				return (NULL);
210 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
211 			break;
212 
213 		case 'j':
214 			if (!isdigit_l((unsigned char)*buf, locale))
215 				return (NULL);
216 
217 			len = 3;
218 			for (i = 0; len && *buf != 0 &&
219 			     isdigit_l((unsigned char)*buf, locale); buf++){
220 				i *= 10;
221 				i += *buf - '0';
222 				len--;
223 			}
224 			if (i < 1 || i > 366)
225 				return (NULL);
226 
227 			tm->tm_yday = i - 1;
228 			flags |= FLAG_YDAY;
229 
230 			break;
231 
232 		case 'M':
233 		case 'S':
234 			if (*buf == 0 ||
235 				isspace_l((unsigned char)*buf, locale))
236 				break;
237 
238 			if (!isdigit_l((unsigned char)*buf, locale))
239 				return (NULL);
240 
241 			len = 2;
242 			for (i = 0; len && *buf != 0 &&
243 				isdigit_l((unsigned char)*buf, locale); buf++){
244 				i *= 10;
245 				i += *buf - '0';
246 				len--;
247 			}
248 
249 			if (c == 'M') {
250 				if (i > 59)
251 					return (NULL);
252 				tm->tm_min = i;
253 			} else {
254 				if (i > 60)
255 					return (NULL);
256 				tm->tm_sec = i;
257 			}
258 
259 			break;
260 
261 		case 'H':
262 		case 'I':
263 		case 'k':
264 		case 'l':
265 			/*
266 			 * Of these, %l is the only specifier explicitly
267 			 * documented as not being zero-padded.  However,
268 			 * there is no harm in allowing zero-padding.
269 			 *
270 			 * XXX The %l specifier may gobble one too many
271 			 * digits if used incorrectly.
272 			 */
273 			if (!isdigit_l((unsigned char)*buf, locale))
274 				return (NULL);
275 
276 			len = 2;
277 			for (i = 0; len && *buf != 0 &&
278 			     isdigit_l((unsigned char)*buf, locale); buf++) {
279 				i *= 10;
280 				i += *buf - '0';
281 				len--;
282 			}
283 			if (c == 'H' || c == 'k') {
284 				if (i > 23)
285 					return (NULL);
286 			} else if (i > 12)
287 				return (NULL);
288 
289 			tm->tm_hour = i;
290 
291 			break;
292 
293 		case 'p':
294 			/*
295 			 * XXX This is bogus if parsed before hour-related
296 			 * specifiers.
297 			 */
298 			len = strlen(tptr->am);
299 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
300 				if (tm->tm_hour > 12)
301 					return (NULL);
302 				if (tm->tm_hour == 12)
303 					tm->tm_hour = 0;
304 				buf += len;
305 				break;
306 			}
307 
308 			len = strlen(tptr->pm);
309 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
310 				if (tm->tm_hour > 12)
311 					return (NULL);
312 				if (tm->tm_hour != 12)
313 					tm->tm_hour += 12;
314 				buf += len;
315 				break;
316 			}
317 
318 			return (NULL);
319 
320 		case 'A':
321 		case 'a':
322 			for (i = 0; i < asizeof(tptr->weekday); i++) {
323 				len = strlen(tptr->weekday[i]);
324 				if (strncasecmp_l(buf, tptr->weekday[i],
325 						len, locale) == 0)
326 					break;
327 				len = strlen(tptr->wday[i]);
328 				if (strncasecmp_l(buf, tptr->wday[i],
329 						len, locale) == 0)
330 					break;
331 			}
332 			if (i == asizeof(tptr->weekday))
333 				return (NULL);
334 
335 			buf += len;
336 			tm->tm_wday = i;
337 			flags |= FLAG_WDAY;
338 			break;
339 
340 		case 'U':
341 		case 'W':
342 			/*
343 			 * XXX This is bogus, as we can not assume any valid
344 			 * information present in the tm structure at this
345 			 * point to calculate a real value, so just check the
346 			 * range for now.
347 			 */
348 			if (!isdigit_l((unsigned char)*buf, locale))
349 				return (NULL);
350 
351 			len = 2;
352 			for (i = 0; len && *buf != 0 &&
353 			     isdigit_l((unsigned char)*buf, locale); buf++) {
354 				i *= 10;
355 				i += *buf - '0';
356 				len--;
357 			}
358 			if (i > 53)
359 				return (NULL);
360 
361 			if (c == 'U')
362 				day_offset = TM_SUNDAY;
363 			else
364 				day_offset = TM_MONDAY;
365 
366 
367 			week_offset = i;
368 
369 			break;
370 
371 		case 'w':
372 			if (!isdigit_l((unsigned char)*buf, locale))
373 				return (NULL);
374 
375 			i = *buf - '0';
376 			buf++;
377 			if (i > 6)
378 				return (NULL);
379 
380 			tm->tm_wday = i;
381 			flags |= FLAG_WDAY;
382 
383 			break;
384 
385 		case 'e':
386 			/*
387 			 * With %e format, our strftime(3) adds a blank space
388 			 * before single digits.
389 			 */
390 			if (*buf != 0 &&
391 			    isspace_l((unsigned char)*buf, locale))
392 			       buf++;
393 			/* FALLTHROUGH */
394 		case 'd':
395 			/*
396 			 * The %e specifier was once explicitly documented as
397 			 * not being zero-padded but was later changed to
398 			 * equivalent to %d.  There is no harm in allowing
399 			 * such padding.
400 			 *
401 			 * XXX The %e specifier may gobble one too many
402 			 * digits if used incorrectly.
403 			 */
404 			if (!isdigit_l((unsigned char)*buf, locale))
405 				return (NULL);
406 
407 			len = 2;
408 			for (i = 0; len && *buf != 0 &&
409 			     isdigit_l((unsigned char)*buf, locale); buf++) {
410 				i *= 10;
411 				i += *buf - '0';
412 				len--;
413 			}
414 			if (i > 31)
415 				return (NULL);
416 
417 			tm->tm_mday = i;
418 			flags |= FLAG_MDAY;
419 
420 			break;
421 
422 		case 'B':
423 		case 'b':
424 		case 'h':
425 			for (i = 0; i < asizeof(tptr->month); i++) {
426 				if (Oalternative) {
427 					if (c == 'B') {
428 						len = strlen(tptr->alt_month[i]);
429 						if (strncasecmp_l(buf,
430 								tptr->alt_month[i],
431 								len, locale) == 0)
432 							break;
433 					}
434 				} else {
435 					len = strlen(tptr->month[i]);
436 					if (strncasecmp_l(buf, tptr->month[i],
437 							len, locale) == 0)
438 						break;
439 				}
440 			}
441 			/*
442 			 * Try the abbreviated month name if the full name
443 			 * wasn't found and Oalternative was not requested.
444 			 */
445 			if (i == asizeof(tptr->month) && !Oalternative) {
446 				for (i = 0; i < asizeof(tptr->month); i++) {
447 					len = strlen(tptr->mon[i]);
448 					if (strncasecmp_l(buf, tptr->mon[i],
449 							len, locale) == 0)
450 						break;
451 				}
452 			}
453 			if (i == asizeof(tptr->month))
454 				return (NULL);
455 
456 			tm->tm_mon = i;
457 			buf += len;
458 			flags |= FLAG_MONTH;
459 
460 			break;
461 
462 		case 'm':
463 			if (!isdigit_l((unsigned char)*buf, locale))
464 				return (NULL);
465 
466 			len = 2;
467 			for (i = 0; len && *buf != 0 &&
468 			     isdigit_l((unsigned char)*buf, locale); buf++) {
469 				i *= 10;
470 				i += *buf - '0';
471 				len--;
472 			}
473 			if (i < 1 || i > 12)
474 				return (NULL);
475 
476 			tm->tm_mon = i - 1;
477 			flags |= FLAG_MONTH;
478 
479 			break;
480 
481 		case 's':
482 			{
483 			char *cp;
484 			int sverrno;
485 			long n;
486 			time_t t;
487 
488 			sverrno = errno;
489 			errno = 0;
490 			n = strtol_l(buf, &cp, 10, locale);
491 			if (errno == ERANGE || (long)(t = n) != n) {
492 				errno = sverrno;
493 				return (NULL);
494 			}
495 			errno = sverrno;
496 			buf = cp;
497 			if (gmtime_r(&t, tm) == NULL)
498 				return (NULL);
499 			*GMTp = 1;
500 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
501 			    FLAG_MDAY | FLAG_YEAR;
502 			}
503 			break;
504 
505 		case 'Y':
506 		case 'y':
507 			if (*buf == 0 ||
508 			    isspace_l((unsigned char)*buf, locale))
509 				break;
510 
511 			if (!isdigit_l((unsigned char)*buf, locale))
512 				return (NULL);
513 
514 			len = (c == 'Y') ? 4 : 2;
515 			for (i = 0; len && *buf != 0 &&
516 			     isdigit_l((unsigned char)*buf, locale); buf++) {
517 				i *= 10;
518 				i += *buf - '0';
519 				len--;
520 			}
521 			if (c == 'Y')
522 				i -= TM_YEAR_BASE;
523 			if (c == 'y' && i < 69)
524 				i += 100;
525 			if (i < 0)
526 				return (NULL);
527 
528 			tm->tm_year = i;
529 			flags |= FLAG_YEAR;
530 
531 			break;
532 
533 		case 'Z':
534 			{
535 			const char *cp;
536 			char *zonestr;
537 
538 			for (cp = buf; *cp &&
539 			     isupper_l((unsigned char)*cp, locale); ++cp) {
540 				/*empty*/}
541 			if (cp - buf) {
542 				zonestr = alloca(cp - buf + 1);
543 				strncpy(zonestr, buf, cp - buf);
544 				zonestr[cp - buf] = '\0';
545 				tzset();
546 				if (0 == strcmp(zonestr, "GMT") ||
547 				    0 == strcmp(zonestr, "UTC")) {
548 				    *GMTp = 1;
549 				} else if (0 == strcmp(zonestr, tzname[0])) {
550 				    tm->tm_isdst = 0;
551 				} else if (0 == strcmp(zonestr, tzname[1])) {
552 				    tm->tm_isdst = 1;
553 				} else {
554 				    return (NULL);
555 				}
556 				buf += cp - buf;
557 			}
558 			}
559 			break;
560 
561 		case 'z':
562 			{
563 			int sign = 1;
564 			len = 4;			/* RFC 822/ISO 8601 */
565 
566 			if (*buf != '+') {
567 				if (*buf == '-')
568 					sign = -1;
569 				else if (*buf == 'Z')	/* ISO 8601 Z (UTC) */
570 					len = 0;
571 				else
572 					return (NULL);
573 			}
574 
575 			buf++;
576 			i = 0;
577 			for (; len > 0; len--) {
578 				if (isdigit_l((unsigned char)*buf, locale)) {
579 					i *= 10;
580 					i += *buf - '0';
581 					buf++;
582 				} else if (*buf == ':' && len == 2) {
583 					buf++;		/* ISO 8601 +hh:mm */
584 					if (isdigit_l((unsigned char)*buf,
585 					    locale)) {
586 						i *= 10;
587 						i += *buf - '0';
588 						buf++;
589 					} else {
590 						return (NULL);
591 					}
592 				} else if (len == 2) {
593 					i *= 100;	/* ISO 8601 +hh */
594 					break;
595 				} else {
596 					return (NULL);
597 				}
598 			}
599 
600 			tm->tm_hour -= sign * (i / 100);
601 			tm->tm_min  -= sign * (i % 100);
602 			*GMTp = 1;
603 			}
604 			break;
605 
606 		case 'n':
607 		case 't':
608 			while (isspace_l((unsigned char)*buf, locale))
609 				buf++;
610 			break;
611 
612 		default:
613 			return (NULL);
614 		}
615 	}
616 
617 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
618 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
619 		    (FLAG_MONTH | FLAG_MDAY)) {
620 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
621 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
622 			flags |= FLAG_YDAY;
623 		} else if (day_offset != -1) {
624 			/* Set the date to the first Sunday (or Monday)
625 			 * of the specified week of the year.
626 			 */
627 			if (!(flags & FLAG_WDAY)) {
628 				tm->tm_wday = day_offset;
629 				flags |= FLAG_WDAY;
630 			}
631 			tm->tm_yday = (7 -
632 			    first_wday_of(tm->tm_year + TM_YEAR_BASE) +
633 			    day_offset) % 7 + (week_offset - 1 +
634 			    (tm->tm_wday == 0 ? day_offset : 0)) * 7 +
635 			    tm->tm_wday - day_offset;
636 			flags |= FLAG_YDAY;
637 		}
638 	}
639 
640 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
641 		if (!(flags & FLAG_MONTH)) {
642 			i = 0;
643 			while (tm->tm_yday >=
644 			    start_of_month[isleap(tm->tm_year +
645 			    TM_YEAR_BASE)][i])
646 				i++;
647 			if (i > 12) {
648 				i = 1;
649 				tm->tm_yday -=
650 				    start_of_month[isleap(tm->tm_year +
651 				    TM_YEAR_BASE)][12];
652 				tm->tm_year++;
653 			}
654 			tm->tm_mon = i - 1;
655 			flags |= FLAG_MONTH;
656 		}
657 		if (!(flags & FLAG_MDAY)) {
658 			tm->tm_mday = tm->tm_yday -
659 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
660 			    [tm->tm_mon] + 1;
661 			flags |= FLAG_MDAY;
662 		}
663 		if (!(flags & FLAG_WDAY)) {
664 			i = 0;
665 			wday_offset = first_wday_of(tm->tm_year);
666 			while (i++ <= tm->tm_yday) {
667 				if (wday_offset++ >= 6)
668 					wday_offset = 0;
669 			}
670 			tm->tm_wday = wday_offset;
671 			flags |= FLAG_WDAY;
672 		}
673 	}
674 
675 	return ((char *)buf);
676 }
677 
678 char *
679 strptime_l(const char * __restrict buf, const char * __restrict fmt,
680     struct tm * __restrict tm, locale_t loc)
681 {
682 	char *ret;
683 	int gmt;
684 	FIX_LOCALE(loc);
685 
686 	gmt = 0;
687 	ret = _strptime(buf, fmt, tm, &gmt, loc);
688 	if (ret && gmt) {
689 		time_t t = timegm(tm);
690 
691 		localtime_r(&t, tm);
692 	}
693 
694 	return (ret);
695 }
696 
697 char *
698 strptime(const char * __restrict buf, const char * __restrict fmt,
699     struct tm * __restrict tm)
700 {
701 	return strptime_l(buf, fmt, tm, __get_locale());
702 }
703