xref: /dragonfly/lib/libc/stdtime/strptime.c (revision c9c5aa9e)
1 /*-
2  * Copyright (c) 2014 Gary Mills
3  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer
18  *    in the documentation and/or other materials provided with the
19  *    distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
30  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * The views and conclusions contained in the software and documentation
34  * are those of the authors and should not be interpreted as representing
35  * official policies, either expressed or implied, of Powerdog Industries.
36  *
37  * @(#)strptime.c	0.1 (Powerdog) 94/03/27
38  * @(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.
39  * $FreeBSD: head/lib/libc/stdtime/strptime.c 272679 2014-10-07 06:34:05Z ache $
40  */
41 
42 #include "namespace.h"
43 #include <time.h>
44 #include <ctype.h>
45 #include <errno.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include "un-namespace.h"
49 #include "libc_private.h"
50 #include "timelocal.h"
51 #include "tzfile.h"
52 
53 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
54 
55 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
56 
57 #define	FLAG_NONE	(1 << 0)
58 #define	FLAG_YEAR	(1 << 1)
59 #define	FLAG_MONTH	(1 << 2)
60 #define	FLAG_YDAY	(1 << 3)
61 #define	FLAG_MDAY	(1 << 4)
62 #define	FLAG_WDAY	(1 << 5)
63 
64 /*
65  * Calculate the week day of the first day of a year. Valid for
66  * the Gregorian calendar, which began Sept 14, 1752 in the UK
67  * and its colonies. Ref:
68  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
69  */
70 
71 static int
72 first_wday_of(int year)
73 {
74 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
75 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
76 }
77 
78 static char *
79 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
80 		locale_t locale)
81 {
82 	char	c;
83 	const char *ptr;
84 	int	day_offset = -1, wday_offset;
85 	int week_offset;
86 	int	i, len;
87 	int flags;
88 	int Ealternative, Oalternative;
89 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
90 	static int start_of_month[2][13] = {
91 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
92 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
93 	};
94 
95 	flags = FLAG_NONE;
96 
97 	ptr = fmt;
98 	while (*ptr != 0) {
99 		c = *ptr++;
100 
101 		if (c != '%') {
102 			if (isspace_l((unsigned char)c, locale))
103 				while (*buf != 0 &&
104 				       isspace_l((unsigned char)*buf, locale))
105 					buf++;
106 			else if (c != *buf++)
107 				return (NULL);
108 			continue;
109 		}
110 
111 		Ealternative = 0;
112 		Oalternative = 0;
113 label:
114 		c = *ptr++;
115 		switch (c) {
116 		case '%':
117 			if (*buf++ != '%')
118 				return (NULL);
119 			break;
120 
121 		case '+':
122 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
123 			if (buf == NULL)
124 				return (NULL);
125 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
126 			break;
127 
128 		case 'C':
129 			if (!isdigit_l((unsigned char)*buf, locale))
130 				return (NULL);
131 
132 			/* XXX This will break for 3-digit centuries. */
133 			len = 2;
134 			for (i = 0; len && *buf != 0 &&
135 			     isdigit_l((unsigned char)*buf, locale); buf++) {
136 				i *= 10;
137 				i += *buf - '0';
138 				len--;
139 			}
140 			if (i < 19)
141 				return (NULL);
142 
143 			tm->tm_year = i * 100 - TM_YEAR_BASE;
144 			flags |= FLAG_YEAR;
145 
146 			break;
147 
148 		case 'c':
149 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
150 			if (buf == NULL)
151 				return (NULL);
152 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
153 			break;
154 
155 		case 'D':
156 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
157 			if (buf == NULL)
158 				return (NULL);
159 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
160 			break;
161 
162 		case 'E':
163 			if (Ealternative || Oalternative)
164 				break;
165 			Ealternative++;
166 			goto label;
167 
168 		case 'O':
169 			if (Ealternative || Oalternative)
170 				break;
171 			Oalternative++;
172 			goto label;
173 
174 		case 'F':
175 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
176 			if (buf == NULL)
177 				return (NULL);
178 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
179 			break;
180 
181 		case 'R':
182 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
183 			if (buf == NULL)
184 				return (NULL);
185 			break;
186 
187 		case 'r':
188 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
189 			if (buf == NULL)
190 				return (NULL);
191 			break;
192 
193 		case 'T':
194 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
195 			if (buf == NULL)
196 				return (NULL);
197 			break;
198 
199 		case 'X':
200 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
201 			if (buf == NULL)
202 				return (NULL);
203 			break;
204 
205 		case 'x':
206 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
207 			if (buf == NULL)
208 				return (NULL);
209 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
210 			break;
211 
212 		case 'j':
213 			if (!isdigit_l((unsigned char)*buf, locale))
214 				return (NULL);
215 
216 			len = 3;
217 			for (i = 0; len && *buf != 0 &&
218 			     isdigit_l((unsigned char)*buf, locale); buf++){
219 				i *= 10;
220 				i += *buf - '0';
221 				len--;
222 			}
223 			if (i < 1 || i > 366)
224 				return (NULL);
225 
226 			tm->tm_yday = i - 1;
227 			flags |= FLAG_YDAY;
228 
229 			break;
230 
231 		case 'M':
232 		case 'S':
233 			if (*buf == 0 ||
234 				isspace_l((unsigned char)*buf, locale))
235 				break;
236 
237 			if (!isdigit_l((unsigned char)*buf, locale))
238 				return (NULL);
239 
240 			len = 2;
241 			for (i = 0; len && *buf != 0 &&
242 				isdigit_l((unsigned char)*buf, locale); buf++){
243 				i *= 10;
244 				i += *buf - '0';
245 				len--;
246 			}
247 
248 			if (c == 'M') {
249 				if (i > 59)
250 					return (NULL);
251 				tm->tm_min = i;
252 			} else {
253 				if (i > 60)
254 					return (NULL);
255 				tm->tm_sec = i;
256 			}
257 
258 			break;
259 
260 		case 'H':
261 		case 'I':
262 		case 'k':
263 		case 'l':
264 			/*
265 			 * Of these, %l is the only specifier explicitly
266 			 * documented as not being zero-padded.  However,
267 			 * there is no harm in allowing zero-padding.
268 			 *
269 			 * XXX The %l specifier may gobble one too many
270 			 * digits if used incorrectly.
271 			 */
272 			if (!isdigit_l((unsigned char)*buf, locale))
273 				return (NULL);
274 
275 			len = 2;
276 			for (i = 0; len && *buf != 0 &&
277 			     isdigit_l((unsigned char)*buf, locale); buf++) {
278 				i *= 10;
279 				i += *buf - '0';
280 				len--;
281 			}
282 			if (c == 'H' || c == 'k') {
283 				if (i > 23)
284 					return (NULL);
285 			} else if (i > 12)
286 				return (NULL);
287 
288 			tm->tm_hour = i;
289 
290 			break;
291 
292 		case 'p':
293 			/*
294 			 * XXX This is bogus if parsed before hour-related
295 			 * specifiers.
296 			 */
297 			len = strlen(tptr->am);
298 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
299 				if (tm->tm_hour > 12)
300 					return (NULL);
301 				if (tm->tm_hour == 12)
302 					tm->tm_hour = 0;
303 				buf += len;
304 				break;
305 			}
306 
307 			len = strlen(tptr->pm);
308 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
309 				if (tm->tm_hour > 12)
310 					return (NULL);
311 				if (tm->tm_hour != 12)
312 					tm->tm_hour += 12;
313 				buf += len;
314 				break;
315 			}
316 
317 			return (NULL);
318 
319 		case 'A':
320 		case 'a':
321 			for (i = 0; i < asizeof(tptr->weekday); i++) {
322 				len = strlen(tptr->weekday[i]);
323 				if (strncasecmp_l(buf, tptr->weekday[i],
324 						len, locale) == 0)
325 					break;
326 				len = strlen(tptr->wday[i]);
327 				if (strncasecmp_l(buf, tptr->wday[i],
328 						len, locale) == 0)
329 					break;
330 			}
331 			if (i == asizeof(tptr->weekday))
332 				return (NULL);
333 
334 			buf += len;
335 			tm->tm_wday = i;
336 			flags |= FLAG_WDAY;
337 			break;
338 
339 		case 'U':
340 		case 'W':
341 			/*
342 			 * XXX This is bogus, as we can not assume any valid
343 			 * information present in the tm structure at this
344 			 * point to calculate a real value, so just check the
345 			 * range for now.
346 			 */
347 			if (!isdigit_l((unsigned char)*buf, locale))
348 				return (NULL);
349 
350 			len = 2;
351 			for (i = 0; len && *buf != 0 &&
352 			     isdigit_l((unsigned char)*buf, locale); buf++) {
353 				i *= 10;
354 				i += *buf - '0';
355 				len--;
356 			}
357 			if (i > 53)
358 				return (NULL);
359 
360 			if (c == 'U')
361 				day_offset = TM_SUNDAY;
362 			else
363 				day_offset = TM_MONDAY;
364 
365 
366 			week_offset = i;
367 
368 			break;
369 
370 		case 'w':
371 			if (!isdigit_l((unsigned char)*buf, locale))
372 				return (NULL);
373 
374 			i = *buf - '0';
375 			buf++;
376 			if (i > 6)
377 				return (NULL);
378 
379 			tm->tm_wday = i;
380 			flags |= FLAG_WDAY;
381 
382 			break;
383 
384 		case 'e':
385 			/*
386 			 * With %e format, our strftime(3) adds a blank space
387 			 * before single digits.
388 			 */
389 			if (*buf != 0 &&
390 			    isspace_l((unsigned char)*buf, locale))
391 			       buf++;
392 			/* FALLTHROUGH */
393 		case 'd':
394 			/*
395 			 * The %e specifier was once explicitly documented as
396 			 * not being zero-padded but was later changed to
397 			 * equivalent to %d.  There is no harm in allowing
398 			 * such padding.
399 			 *
400 			 * XXX The %e specifier may gobble one too many
401 			 * digits if used incorrectly.
402 			 */
403 			if (!isdigit_l((unsigned char)*buf, locale))
404 				return (NULL);
405 
406 			len = 2;
407 			for (i = 0; len && *buf != 0 &&
408 			     isdigit_l((unsigned char)*buf, locale); buf++) {
409 				i *= 10;
410 				i += *buf - '0';
411 				len--;
412 			}
413 			if (i > 31)
414 				return (NULL);
415 
416 			tm->tm_mday = i;
417 			flags |= FLAG_MDAY;
418 
419 			break;
420 
421 		case 'B':
422 		case 'b':
423 		case 'h':
424 			for (i = 0; i < asizeof(tptr->month); i++) {
425 				if (Oalternative) {
426 					if (c == 'B') {
427 						len = strlen(tptr->alt_month[i]);
428 						if (strncasecmp_l(buf,
429 								tptr->alt_month[i],
430 								len, locale) == 0)
431 							break;
432 					}
433 				} else {
434 					len = strlen(tptr->month[i]);
435 					if (strncasecmp_l(buf, tptr->month[i],
436 							len, locale) == 0)
437 						break;
438 				}
439 			}
440 			/*
441 			 * Try the abbreviated month name if the full name
442 			 * wasn't found and Oalternative was not requested.
443 			 */
444 			if (i == asizeof(tptr->month) && !Oalternative) {
445 				for (i = 0; i < asizeof(tptr->month); i++) {
446 					len = strlen(tptr->mon[i]);
447 					if (strncasecmp_l(buf, tptr->mon[i],
448 							len, locale) == 0)
449 						break;
450 				}
451 			}
452 			if (i == asizeof(tptr->month))
453 				return (NULL);
454 
455 			tm->tm_mon = i;
456 			buf += len;
457 			flags |= FLAG_MONTH;
458 
459 			break;
460 
461 		case 'm':
462 			if (!isdigit_l((unsigned char)*buf, locale))
463 				return (NULL);
464 
465 			len = 2;
466 			for (i = 0; len && *buf != 0 &&
467 			     isdigit_l((unsigned char)*buf, locale); buf++) {
468 				i *= 10;
469 				i += *buf - '0';
470 				len--;
471 			}
472 			if (i < 1 || i > 12)
473 				return (NULL);
474 
475 			tm->tm_mon = i - 1;
476 			flags |= FLAG_MONTH;
477 
478 			break;
479 
480 		case 's':
481 			{
482 			char *cp;
483 			int sverrno;
484 			long n;
485 			time_t t;
486 
487 			sverrno = errno;
488 			errno = 0;
489 			n = strtol_l(buf, &cp, 10, locale);
490 			if (errno == ERANGE || (long)(t = n) != n) {
491 				errno = sverrno;
492 				return (NULL);
493 			}
494 			errno = sverrno;
495 			buf = cp;
496 			if (gmtime_r(&t, tm) == NULL)
497 				return (NULL);
498 			*GMTp = 1;
499 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
500 			    FLAG_MDAY | FLAG_YEAR;
501 			}
502 			break;
503 
504 		case 'Y':
505 		case 'y':
506 			if (*buf == 0 ||
507 			    isspace_l((unsigned char)*buf, locale))
508 				break;
509 
510 			if (!isdigit_l((unsigned char)*buf, locale))
511 				return (NULL);
512 
513 			len = (c == 'Y') ? 4 : 2;
514 			for (i = 0; len && *buf != 0 &&
515 			     isdigit_l((unsigned char)*buf, locale); buf++) {
516 				i *= 10;
517 				i += *buf - '0';
518 				len--;
519 			}
520 			if (c == 'Y')
521 				i -= TM_YEAR_BASE;
522 			if (c == 'y' && i < 69)
523 				i += 100;
524 			if (i < 0)
525 				return (NULL);
526 
527 			tm->tm_year = i;
528 			flags |= FLAG_YEAR;
529 
530 			break;
531 
532 		case 'Z':
533 			{
534 			const char *cp;
535 			char *zonestr;
536 
537 			for (cp = buf; *cp &&
538 			     isupper_l((unsigned char)*cp, locale); ++cp) {
539 				/*empty*/}
540 			if (cp - buf) {
541 				zonestr = alloca(cp - buf + 1);
542 				strncpy(zonestr, buf, cp - buf);
543 				zonestr[cp - buf] = '\0';
544 				tzset();
545 				if (0 == strcmp(zonestr, "GMT") ||
546 				    0 == strcmp(zonestr, "UTC")) {
547 				    *GMTp = 1;
548 				} else if (0 == strcmp(zonestr, tzname[0])) {
549 				    tm->tm_isdst = 0;
550 				} else if (0 == strcmp(zonestr, tzname[1])) {
551 				    tm->tm_isdst = 1;
552 				} else {
553 				    return (NULL);
554 				}
555 				buf += cp - buf;
556 			}
557 			}
558 			break;
559 
560 		case 'z':
561 			{
562 			int sign = 1;
563 			len = 4;			/* RFC 822/ISO 8601 */
564 
565 			if (*buf != '+') {
566 				if (*buf == '-')
567 					sign = -1;
568 				else if (*buf == 'Z')	/* ISO 8601 Z (UTC) */
569 					len = 0;
570 				else
571 					return (NULL);
572 			}
573 
574 			buf++;
575 			i = 0;
576 			for (; len > 0; len--) {
577 				if (isdigit_l((unsigned char)*buf, locale)) {
578 					i *= 10;
579 					i += *buf - '0';
580 					buf++;
581 				} else if (*buf == ':' && len == 2) {
582 					buf++;		/* ISO 8601 +hh:mm */
583 					if (isdigit_l((unsigned char)*buf,
584 					    locale)) {
585 						i *= 10;
586 						i += *buf - '0';
587 						buf++;
588 					} else {
589 						return (NULL);
590 					}
591 				} else if (len == 2) {
592 					i *= 100;	/* ISO 8601 +hh */
593 					break;
594 				} else {
595 					return (NULL);
596 				}
597 			}
598 
599 			tm->tm_hour -= sign * (i / 100);
600 			tm->tm_min  -= sign * (i % 100);
601 			*GMTp = 1;
602 			}
603 			break;
604 
605 		case 'n':
606 		case 't':
607 			while (isspace_l((unsigned char)*buf, locale))
608 				buf++;
609 			break;
610 
611 		default:
612 			return (NULL);
613 		}
614 	}
615 
616 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
617 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
618 		    (FLAG_MONTH | FLAG_MDAY)) {
619 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
620 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
621 			flags |= FLAG_YDAY;
622 		} else if (day_offset != -1) {
623 			/* Set the date to the first Sunday (or Monday)
624 			 * of the specified week of the year.
625 			 */
626 			if (!(flags & FLAG_WDAY)) {
627 				tm->tm_wday = day_offset;
628 				flags |= FLAG_WDAY;
629 			}
630 			tm->tm_yday = (7 -
631 			    first_wday_of(tm->tm_year + TM_YEAR_BASE) +
632 			    day_offset) % 7 + (week_offset - 1 +
633 			    (tm->tm_wday == 0 ? day_offset : 0)) * 7 +
634 			    tm->tm_wday - day_offset;
635 			flags |= FLAG_YDAY;
636 		}
637 	}
638 
639 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
640 		if (!(flags & FLAG_MONTH)) {
641 			i = 0;
642 			while (tm->tm_yday >=
643 			    start_of_month[isleap(tm->tm_year +
644 			    TM_YEAR_BASE)][i])
645 				i++;
646 			if (i > 12) {
647 				i = 1;
648 				tm->tm_yday -=
649 				    start_of_month[isleap(tm->tm_year +
650 				    TM_YEAR_BASE)][12];
651 				tm->tm_year++;
652 			}
653 			tm->tm_mon = i - 1;
654 			flags |= FLAG_MONTH;
655 		}
656 		if (!(flags & FLAG_MDAY)) {
657 			tm->tm_mday = tm->tm_yday -
658 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
659 			    [tm->tm_mon] + 1;
660 			flags |= FLAG_MDAY;
661 		}
662 		if (!(flags & FLAG_WDAY)) {
663 			i = 0;
664 			wday_offset = first_wday_of(tm->tm_year);
665 			while (i++ <= tm->tm_yday) {
666 				if (wday_offset++ >= 6)
667 					wday_offset = 0;
668 			}
669 			tm->tm_wday = wday_offset;
670 			flags |= FLAG_WDAY;
671 		}
672 	}
673 
674 	return ((char *)buf);
675 }
676 
677 char *
678 strptime_l(const char * __restrict buf, const char * __restrict fmt,
679     struct tm * __restrict tm, locale_t loc)
680 {
681 	char *ret;
682 	int gmt;
683 	FIX_LOCALE(loc);
684 
685 	gmt = 0;
686 	ret = _strptime(buf, fmt, tm, &gmt, loc);
687 	if (ret && gmt) {
688 		time_t t = timegm(tm);
689 
690 		localtime_r(&t, tm);
691 	}
692 
693 	return (ret);
694 }
695 
696 char *
697 strptime(const char * __restrict buf, const char * __restrict fmt,
698     struct tm * __restrict tm)
699 {
700 	return strptime_l(buf, fmt, tm, __get_locale());
701 }
702