xref: /openbsd/lib/libc/time/strptime.c (revision 3d8817e4)
1 /*	$OpenBSD: strptime.c,v 1.14 2011/01/19 16:50:14 landry Exp $ */
2 /*	$NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code was contributed to The NetBSD Foundation by Klaus Klein.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/localedef.h>
33 #include <ctype.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <time.h>
37 #include <tzfile.h>
38 
39 #define	_ctloc(x)		(_CurrentTimeLocale->x)
40 
41 /*
42  * We do not implement alternate representations. However, we always
43  * check whether a given modifier is allowed for a certain conversion.
44  */
45 #define _ALT_E			0x01
46 #define _ALT_O			0x02
47 #define	_LEGAL_ALT(x)		{ if (alt_format & ~(x)) return (0); }
48 
49 static char gmt[] = { "GMT" };
50 #ifdef TM_ZONE
51 static char utc[] = { "UTC" };
52 #endif
53 /* RFC-822/RFC-2822 */
54 static const char * const nast[5] = {
55        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
56 };
57 static const char * const nadt[5] = {
58        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
59 };
60 
61 static	int _conv_num(const unsigned char **, int *, int, int);
62 static	char *_strptime(const char *, const char *, struct tm *, int);
63 static	const u_char *_find_string(const u_char *, int *, const char * const *,
64 	    const char * const *, int);
65 
66 
67 char *
68 strptime(const char *buf, const char *fmt, struct tm *tm)
69 {
70 	return(_strptime(buf, fmt, tm, 1));
71 }
72 
73 static char *
74 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
75 {
76 	unsigned char c;
77 	const unsigned char *bp, *ep;
78 	size_t len;
79 	int alt_format, i, offs;
80 	int neg = 0;
81 	static int century, relyear;
82 
83 	if (initialize) {
84 		century = TM_YEAR_BASE;
85 		relyear = -1;
86 	}
87 
88 	bp = (unsigned char *)buf;
89 	while ((c = *fmt) != '\0') {
90 		/* Clear `alternate' modifier prior to new conversion. */
91 		alt_format = 0;
92 
93 		/* Eat up white-space. */
94 		if (isspace(c)) {
95 			while (isspace(*bp))
96 				bp++;
97 
98 			fmt++;
99 			continue;
100 		}
101 
102 		if ((c = *fmt++) != '%')
103 			goto literal;
104 
105 
106 again:		switch (c = *fmt++) {
107 		case '%':	/* "%%" is converted to "%". */
108 literal:
109 		if (c != *bp++)
110 			return (NULL);
111 
112 		break;
113 
114 		/*
115 		 * "Alternative" modifiers. Just set the appropriate flag
116 		 * and start over again.
117 		 */
118 		case 'E':	/* "%E?" alternative conversion modifier. */
119 			_LEGAL_ALT(0);
120 			alt_format |= _ALT_E;
121 			goto again;
122 
123 		case 'O':	/* "%O?" alternative conversion modifier. */
124 			_LEGAL_ALT(0);
125 			alt_format |= _ALT_O;
126 			goto again;
127 
128 		/*
129 		 * "Complex" conversion rules, implemented through recursion.
130 		 */
131 		case 'c':	/* Date and time, using the locale's format. */
132 			_LEGAL_ALT(_ALT_E);
133 			if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
134 				return (NULL);
135 			break;
136 
137 		case 'D':	/* The date as "%m/%d/%y". */
138 			_LEGAL_ALT(0);
139 			if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
140 				return (NULL);
141 			break;
142 
143 		case 'F':	/* The date as "%Y-%m-%d". */
144 			_LEGAL_ALT(0);
145 			if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
146 				return (NULL);
147 			continue;
148 
149 		case 'R':	/* The time as "%H:%M". */
150 			_LEGAL_ALT(0);
151 			if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
152 				return (NULL);
153 			break;
154 
155 		case 'r':	/* The time as "%I:%M:%S %p". */
156 			_LEGAL_ALT(0);
157 			if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
158 				return (NULL);
159 			break;
160 
161 		case 'T':	/* The time as "%H:%M:%S". */
162 			_LEGAL_ALT(0);
163 			if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
164 				return (NULL);
165 			break;
166 
167 		case 'X':	/* The time, using the locale's format. */
168 			_LEGAL_ALT(_ALT_E);
169 			if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
170 				return (NULL);
171 			break;
172 
173 		case 'x':	/* The date, using the locale's format. */
174 			_LEGAL_ALT(_ALT_E);
175 			if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
176 				return (NULL);
177 			break;
178 
179 		/*
180 		 * "Elementary" conversion rules.
181 		 */
182 		case 'A':	/* The day of week, using the locale's form. */
183 		case 'a':
184 			_LEGAL_ALT(0);
185 			for (i = 0; i < 7; i++) {
186 				/* Full name. */
187 				len = strlen(_ctloc(day[i]));
188 				if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
189 					break;
190 
191 				/* Abbreviated name. */
192 				len = strlen(_ctloc(abday[i]));
193 				if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
194 					break;
195 			}
196 
197 			/* Nothing matched. */
198 			if (i == 7)
199 				return (NULL);
200 
201 			tm->tm_wday = i;
202 			bp += len;
203 			break;
204 
205 		case 'B':	/* The month, using the locale's form. */
206 		case 'b':
207 		case 'h':
208 			_LEGAL_ALT(0);
209 			for (i = 0; i < 12; i++) {
210 				/* Full name. */
211 				len = strlen(_ctloc(mon[i]));
212 				if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
213 					break;
214 
215 				/* Abbreviated name. */
216 				len = strlen(_ctloc(abmon[i]));
217 				if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
218 					break;
219 			}
220 
221 			/* Nothing matched. */
222 			if (i == 12)
223 				return (NULL);
224 
225 			tm->tm_mon = i;
226 			bp += len;
227 			break;
228 
229 		case 'C':	/* The century number. */
230 			_LEGAL_ALT(_ALT_E);
231 			if (!(_conv_num(&bp, &i, 0, 99)))
232 				return (NULL);
233 
234 			century = i * 100;
235 			break;
236 
237 		case 'd':	/* The day of month. */
238 		case 'e':
239 			_LEGAL_ALT(_ALT_O);
240 			if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
241 				return (NULL);
242 			break;
243 
244 		case 'k':	/* The hour (24-hour clock representation). */
245 			_LEGAL_ALT(0);
246 			/* FALLTHROUGH */
247 		case 'H':
248 			_LEGAL_ALT(_ALT_O);
249 			if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
250 				return (NULL);
251 			break;
252 
253 		case 'l':	/* The hour (12-hour clock representation). */
254 			_LEGAL_ALT(0);
255 			/* FALLTHROUGH */
256 		case 'I':
257 			_LEGAL_ALT(_ALT_O);
258 			if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
259 				return (NULL);
260 			break;
261 
262 		case 'j':	/* The day of year. */
263 			_LEGAL_ALT(0);
264 			if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
265 				return (NULL);
266 			tm->tm_yday--;
267 			break;
268 
269 		case 'M':	/* The minute. */
270 			_LEGAL_ALT(_ALT_O);
271 			if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
272 				return (NULL);
273 			break;
274 
275 		case 'm':	/* The month. */
276 			_LEGAL_ALT(_ALT_O);
277 			if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
278 				return (NULL);
279 			tm->tm_mon--;
280 			break;
281 
282 		case 'p':	/* The locale's equivalent of AM/PM. */
283 			_LEGAL_ALT(0);
284 			/* AM? */
285 			len = strlen(_ctloc(am_pm[0]));
286 			if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
287 				if (tm->tm_hour > 12)	/* i.e., 13:00 AM ?! */
288 					return (NULL);
289 				else if (tm->tm_hour == 12)
290 					tm->tm_hour = 0;
291 
292 				bp += len;
293 				break;
294 			}
295 			/* PM? */
296 			len = strlen(_ctloc(am_pm[1]));
297 			if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
298 				if (tm->tm_hour > 12)	/* i.e., 13:00 PM ?! */
299 					return (NULL);
300 				else if (tm->tm_hour < 12)
301 					tm->tm_hour += 12;
302 
303 				bp += len;
304 				break;
305 			}
306 
307 			/* Nothing matched. */
308 			return (NULL);
309 
310 		case 'S':	/* The seconds. */
311 			_LEGAL_ALT(_ALT_O);
312 			if (!(_conv_num(&bp, &tm->tm_sec, 0, 61)))
313 				return (NULL);
314 			break;
315 
316 		case 'U':	/* The week of year, beginning on sunday. */
317 		case 'W':	/* The week of year, beginning on monday. */
318 			_LEGAL_ALT(_ALT_O);
319 			/*
320 			 * XXX This is bogus, as we can not assume any valid
321 			 * information present in the tm structure at this
322 			 * point to calculate a real value, so just check the
323 			 * range for now.
324 			 */
325 			 if (!(_conv_num(&bp, &i, 0, 53)))
326 				return (NULL);
327 			 break;
328 
329 		case 'w':	/* The day of week, beginning on sunday. */
330 			_LEGAL_ALT(_ALT_O);
331 			if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
332 				return (NULL);
333 			break;
334 
335 		case 'u':	/* The day of week, monday = 1. */
336 			_LEGAL_ALT(_ALT_O);
337 			if (!(_conv_num(&bp, &i, 1, 7)))
338 				return (NULL);
339 			tm->tm_wday = i % 7;
340 			continue;
341 
342 		case 'g':	/* The year corresponding to the ISO week
343 				 * number but without the century.
344 				 */
345 			if (!(_conv_num(&bp, &i, 0, 99)))
346 				return (NULL);
347 			continue;
348 
349 		case 'G':	/* The year corresponding to the ISO week
350 				 * number with century.
351 				 */
352 			do
353 				bp++;
354 			while (isdigit(*bp));
355 			continue;
356 
357 		case 'V':	/* The ISO 8601:1988 week number as decimal */
358 			if (!(_conv_num(&bp, &i, 0, 53)))
359 				return (NULL);
360 			continue;
361 
362 		case 'Y':	/* The year. */
363 			_LEGAL_ALT(_ALT_E);
364 			if (!(_conv_num(&bp, &i, 0, 9999)))
365 				return (NULL);
366 
367 			relyear = -1;
368 			tm->tm_year = i - TM_YEAR_BASE;
369 			break;
370 
371 		case 'y':	/* The year within the century (2 digits). */
372 			_LEGAL_ALT(_ALT_E | _ALT_O);
373 			if (!(_conv_num(&bp, &relyear, 0, 99)))
374 				return (NULL);
375 			break;
376 
377 		case 'Z':
378 			tzset();
379 			if (strncmp((const char *)bp, gmt, 3) == 0) {
380 				tm->tm_isdst = 0;
381 #ifdef TM_GMTOFF
382 				tm->TM_GMTOFF = 0;
383 #endif
384 #ifdef TM_ZONE
385 				tm->TM_ZONE = gmt;
386 #endif
387 				bp += 3;
388 			} else {
389 				ep = _find_string(bp, &i,
390 					       	 (const char * const *)tzname,
391 					       	  NULL, 2);
392 				if (ep != NULL) {
393 					tm->tm_isdst = i;
394 #ifdef TM_GMTOFF
395 					tm->TM_GMTOFF = -(timezone);
396 #endif
397 #ifdef TM_ZONE
398 					tm->TM_ZONE = tzname[i];
399 #endif
400 				}
401 				bp = ep;
402 			}
403 			continue;
404 
405 		case 'z':
406 			/*
407 			 * We recognize all ISO 8601 formats:
408 			 * Z	= Zulu time/UTC
409 			 * [+-]hhmm
410 			 * [+-]hh:mm
411 			 * [+-]hh
412 			 * We recognize all RFC-822/RFC-2822 formats:
413 			 * UT|GMT
414 			 *          North American : UTC offsets
415 			 * E[DS]T = Eastern : -4 | -5
416 			 * C[DS]T = Central : -5 | -6
417 			 * M[DS]T = Mountain: -6 | -7
418 			 * P[DS]T = Pacific : -7 | -8
419 			 *          Military
420 			 * [A-IL-M] = -1 ... -9 (J not used)
421 			 * [N-Y]  = +1 ... +12
422 			 */
423 			while (isspace(*bp))
424 				bp++;
425 
426 			switch (*bp++) {
427 			case 'G':
428 				if (*bp++ != 'M')
429 					return NULL;
430 				/*FALLTHROUGH*/
431 			case 'U':
432 				if (*bp++ != 'T')
433 					return NULL;
434 				/*FALLTHROUGH*/
435 			case 'Z':
436 				tm->tm_isdst = 0;
437 #ifdef TM_GMTOFF
438 				tm->TM_GMTOFF = 0;
439 #endif
440 #ifdef TM_ZONE
441 				tm->TM_ZONE = utc;
442 #endif
443 				continue;
444 			case '+':
445 				neg = 0;
446 				break;
447 			case '-':
448 				neg = 1;
449 				break;
450 			default:
451 				--bp;
452 				ep = _find_string(bp, &i, nast, NULL, 4);
453 				if (ep != NULL) {
454 #ifdef TM_GMTOFF
455 					tm->TM_GMTOFF = -5 - i;
456 #endif
457 #ifdef TM_ZONE
458 					tm->TM_ZONE = __UNCONST(nast[i]);
459 #endif
460 					bp = ep;
461 					continue;
462 				}
463 				ep = _find_string(bp, &i, nadt, NULL, 4);
464 				if (ep != NULL) {
465 					tm->tm_isdst = 1;
466 #ifdef TM_GMTOFF
467 					tm->TM_GMTOFF = -4 - i;
468 #endif
469 #ifdef TM_ZONE
470 					tm->TM_ZONE = __UNCONST(nadt[i]);
471 #endif
472 					bp = ep;
473 					continue;
474 				}
475 
476 				if ((*bp >= 'A' && *bp <= 'I') ||
477 				    (*bp >= 'L' && *bp <= 'Y')) {
478 #ifdef TM_GMTOFF
479 					/* Argh! No 'J'! */
480 					if (*bp >= 'A' && *bp <= 'I')
481 						tm->TM_GMTOFF =
482 						    ('A' - 1) - (int)*bp;
483 					else if (*bp >= 'L' && *bp <= 'M')
484 						tm->TM_GMTOFF = 'A' - (int)*bp;
485 					else if (*bp >= 'N' && *bp <= 'Y')
486 						tm->TM_GMTOFF = (int)*bp - 'M';
487 #endif
488 #ifdef TM_ZONE
489 					tm->TM_ZONE = NULL; /* XXX */
490 #endif
491 					bp++;
492 					continue;
493 				}
494 				return NULL;
495 			}
496 			offs = 0;
497 			for (i = 0; i < 4; ) {
498 				if (isdigit(*bp)) {
499 					offs = offs * 10 + (*bp++ - '0');
500 					i++;
501 					continue;
502 				}
503 				if (i == 2 && *bp == ':') {
504 					bp++;
505 					continue;
506 				}
507 				break;
508 			}
509 			switch (i) {
510 			case 2:
511 				offs *= 100;
512 				break;
513 			case 4:
514 				i = offs % 100;
515 				if (i >= 60)
516 					return NULL;
517 				/* Convert minutes into decimal */
518 				offs = (offs / 100) * 100 + (i * 50) / 30;
519 				break;
520 			default:
521 				return NULL;
522 			}
523 			if (neg)
524 				offs = -offs;
525 			tm->tm_isdst = 0;	/* XXX */
526 #ifdef TM_GMTOFF
527 			tm->TM_GMTOFF = offs;
528 #endif
529 #ifdef TM_ZONE
530 			tm->TM_ZONE = NULL;	/* XXX */
531 #endif
532 			continue;
533 
534 		/*
535 		 * Miscellaneous conversions.
536 		 */
537 		case 'n':	/* Any kind of white-space. */
538 		case 't':
539 			_LEGAL_ALT(0);
540 			while (isspace(*bp))
541 				bp++;
542 			break;
543 
544 
545 		default:	/* Unknown/unsupported conversion. */
546 			return (NULL);
547 		}
548 
549 
550 	}
551 
552 	/*
553 	 * We need to evaluate the two digit year spec (%y)
554 	 * last as we can get a century spec (%C) at any time.
555 	 */
556 	if (relyear != -1) {
557 		if (century == TM_YEAR_BASE) {
558 			if (relyear <= 68)
559 				tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
560 			else
561 				tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
562 		} else {
563 			tm->tm_year = relyear + century - TM_YEAR_BASE;
564 		}
565 	}
566 
567 	return ((char *)bp);
568 }
569 
570 
571 static int
572 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
573 {
574 	int result = 0;
575 	int rulim = ulim;
576 
577 	if (**buf < '0' || **buf > '9')
578 		return (0);
579 
580 	/* we use rulim to break out of the loop when we run out of digits */
581 	do {
582 		result *= 10;
583 		result += *(*buf)++ - '0';
584 		rulim /= 10;
585 	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
586 
587 	if (result < llim || result > ulim)
588 		return (0);
589 
590 	*dest = result;
591 	return (1);
592 }
593 
594 static const u_char *
595 _find_string(const u_char *bp, int *tgt, const char * const *n1,
596 		const char * const *n2, int c)
597 {
598 	int i;
599 	unsigned int len;
600 
601 	/* check full name - then abbreviated ones */
602 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
603 		for (i = 0; i < c; i++, n1++) {
604 			len = strlen(*n1);
605 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
606 				*tgt = i;
607 				return bp + len;
608 			}
609 		}
610 	}
611 
612 	/* Nothing matched */
613 	return NULL;
614 }
615