xref: /minix/lib/libc/time/strptime.c (revision 84d9c625)
1 /*	$NetBSD: strptime.c,v 1.38 2013/05/17 12:55:57 joerg Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
8  * Heavily optimised by David Laight
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.38 2013/05/17 12:55:57 joerg Exp $");
35 #endif
36 
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45 #include "setlocale_local.h"
46 
47 #ifdef __weak_alias
48 __weak_alias(strptime,_strptime)
49 __weak_alias(strptime_l, _strptime_l)
50 #endif
51 
52 #define _TIME_LOCALE(loc) \
53     ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
54 
55 /*
56  * We do not implement alternate representations. However, we always
57  * check whether a given modifier is allowed for a certain conversion.
58  */
59 #define ALT_E			0x01
60 #define ALT_O			0x02
61 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
62 
63 static char gmt[] = { "GMT" };
64 static char utc[] = { "UTC" };
65 /* RFC-822/RFC-2822 */
66 static const char * const nast[5] = {
67        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
68 };
69 static const char * const nadt[5] = {
70        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
71 };
72 
73 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
74 static const u_char *find_string(const u_char *, int *, const char * const *,
75 	const char * const *, int);
76 
77 char *
78 strptime(const char *buf, const char *fmt, struct tm *tm)
79 {
80 	return strptime_l(buf, fmt, tm, _current_locale());
81 }
82 
83 char *
84 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
85 {
86 	unsigned char c;
87 	const unsigned char *bp, *ep;
88 	int alt_format, i, split_year = 0, neg = 0, offs;
89 	const char *new_fmt;
90 
91 	bp = (const u_char *)buf;
92 
93 	while (bp != NULL && (c = *fmt++) != '\0') {
94 		/* Clear `alternate' modifier prior to new conversion. */
95 		alt_format = 0;
96 		i = 0;
97 
98 		/* Eat up white-space. */
99 		if (isspace(c)) {
100 			while (isspace(*bp))
101 				bp++;
102 			continue;
103 		}
104 
105 		if (c != '%')
106 			goto literal;
107 
108 
109 again:		switch (c = *fmt++) {
110 		case '%':	/* "%%" is converted to "%". */
111 literal:
112 			if (c != *bp++)
113 				return NULL;
114 			LEGAL_ALT(0);
115 			continue;
116 
117 		/*
118 		 * "Alternative" modifiers. Just set the appropriate flag
119 		 * and start over again.
120 		 */
121 		case 'E':	/* "%E?" alternative conversion modifier. */
122 			LEGAL_ALT(0);
123 			alt_format |= ALT_E;
124 			goto again;
125 
126 		case 'O':	/* "%O?" alternative conversion modifier. */
127 			LEGAL_ALT(0);
128 			alt_format |= ALT_O;
129 			goto again;
130 
131 		/*
132 		 * "Complex" conversion rules, implemented through recursion.
133 		 */
134 		case 'c':	/* Date and time, using the locale's format. */
135 			new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
136 			goto recurse;
137 
138 		case 'D':	/* The date as "%m/%d/%y". */
139 			new_fmt = "%m/%d/%y";
140 			LEGAL_ALT(0);
141 			goto recurse;
142 
143 		case 'F':	/* The date as "%Y-%m-%d". */
144 			new_fmt = "%Y-%m-%d";
145 			LEGAL_ALT(0);
146 			goto recurse;
147 
148 		case 'R':	/* The time as "%H:%M". */
149 			new_fmt = "%H:%M";
150 			LEGAL_ALT(0);
151 			goto recurse;
152 
153 		case 'r':	/* The time in 12-hour clock representation. */
154 			new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
155 			LEGAL_ALT(0);
156 			goto recurse;
157 
158 		case 'T':	/* The time as "%H:%M:%S". */
159 			new_fmt = "%H:%M:%S";
160 			LEGAL_ALT(0);
161 			goto recurse;
162 
163 		case 'X':	/* The time, using the locale's format. */
164 			new_fmt = _TIME_LOCALE(loc)->t_fmt;
165 			goto recurse;
166 
167 		case 'x':	/* The date, using the locale's format. */
168 			new_fmt = _TIME_LOCALE(loc)->d_fmt;
169 		    recurse:
170 			bp = (const u_char *)strptime((const char *)bp,
171 							    new_fmt, tm);
172 			LEGAL_ALT(ALT_E);
173 			continue;
174 
175 		/*
176 		 * "Elementary" conversion rules.
177 		 */
178 		case 'A':	/* The day of week, using the locale's form. */
179 		case 'a':
180 			bp = find_string(bp, &tm->tm_wday,
181 			    _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
182 			LEGAL_ALT(0);
183 			continue;
184 
185 		case 'B':	/* The month, using the locale's form. */
186 		case 'b':
187 		case 'h':
188 			bp = find_string(bp, &tm->tm_mon,
189 			    _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
190 			    12);
191 			LEGAL_ALT(0);
192 			continue;
193 
194 		case 'C':	/* The century number. */
195 			i = 20;
196 			bp = conv_num(bp, &i, 0, 99);
197 
198 			i = i * 100 - TM_YEAR_BASE;
199 			if (split_year)
200 				i += tm->tm_year % 100;
201 			split_year = 1;
202 			tm->tm_year = i;
203 			LEGAL_ALT(ALT_E);
204 			continue;
205 
206 		case 'd':	/* The day of month. */
207 		case 'e':
208 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
209 			LEGAL_ALT(ALT_O);
210 			continue;
211 
212 		case 'k':	/* The hour (24-hour clock representation). */
213 			LEGAL_ALT(0);
214 			/* FALLTHROUGH */
215 		case 'H':
216 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
217 			LEGAL_ALT(ALT_O);
218 			continue;
219 
220 		case 'l':	/* The hour (12-hour clock representation). */
221 			LEGAL_ALT(0);
222 			/* FALLTHROUGH */
223 		case 'I':
224 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
225 			if (tm->tm_hour == 12)
226 				tm->tm_hour = 0;
227 			LEGAL_ALT(ALT_O);
228 			continue;
229 
230 		case 'j':	/* The day of year. */
231 			i = 1;
232 			bp = conv_num(bp, &i, 1, 366);
233 			tm->tm_yday = i - 1;
234 			LEGAL_ALT(0);
235 			continue;
236 
237 		case 'M':	/* The minute. */
238 			bp = conv_num(bp, &tm->tm_min, 0, 59);
239 			LEGAL_ALT(ALT_O);
240 			continue;
241 
242 		case 'm':	/* The month. */
243 			i = 1;
244 			bp = conv_num(bp, &i, 1, 12);
245 			tm->tm_mon = i - 1;
246 			LEGAL_ALT(ALT_O);
247 			continue;
248 
249 		case 'p':	/* The locale's equivalent of AM/PM. */
250 			bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
251 			    NULL, 2);
252 			if (tm->tm_hour > 11)
253 				return NULL;
254 			tm->tm_hour += i * 12;
255 			LEGAL_ALT(0);
256 			continue;
257 
258 		case 'S':	/* The seconds. */
259 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
260 			LEGAL_ALT(ALT_O);
261 			continue;
262 
263 #ifndef TIME_MAX
264 #define TIME_MAX	INT64_MAX
265 #endif
266 		case 's':	/* seconds since the epoch */
267 			{
268 				time_t sse = 0;
269 				uint64_t rulim = TIME_MAX;
270 
271 				if (*bp < '0' || *bp > '9') {
272 					bp = NULL;
273 					continue;
274 				}
275 
276 				do {
277 					sse *= 10;
278 					sse += *bp++ - '0';
279 					rulim /= 10;
280 				} while ((sse * 10 <= TIME_MAX) &&
281 					 rulim && *bp >= '0' && *bp <= '9');
282 
283 				if (sse < 0 || (uint64_t)sse > TIME_MAX) {
284 					bp = NULL;
285 					continue;
286 				}
287 
288 				if (localtime_r(&sse, tm) == NULL)
289 					bp = NULL;
290 			}
291 			continue;
292 
293 		case 'U':	/* The week of year, beginning on sunday. */
294 		case 'W':	/* The week of year, beginning on monday. */
295 			/*
296 			 * XXX This is bogus, as we can not assume any valid
297 			 * information present in the tm structure at this
298 			 * point to calculate a real value, so just check the
299 			 * range for now.
300 			 */
301 			 bp = conv_num(bp, &i, 0, 53);
302 			 LEGAL_ALT(ALT_O);
303 			 continue;
304 
305 		case 'w':	/* The day of week, beginning on sunday. */
306 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
307 			LEGAL_ALT(ALT_O);
308 			continue;
309 
310 		case 'u':	/* The day of week, monday = 1. */
311 			bp = conv_num(bp, &i, 1, 7);
312 			tm->tm_wday = i % 7;
313 			LEGAL_ALT(ALT_O);
314 			continue;
315 
316 		case 'g':	/* The year corresponding to the ISO week
317 				 * number but without the century.
318 				 */
319 			bp = conv_num(bp, &i, 0, 99);
320 			continue;
321 
322 		case 'G':	/* The year corresponding to the ISO week
323 				 * number with century.
324 				 */
325 			do
326 				bp++;
327 			while (isdigit(*bp));
328 			continue;
329 
330 		case 'V':	/* The ISO 8601:1988 week number as decimal */
331 			bp = conv_num(bp, &i, 0, 53);
332 			continue;
333 
334 		case 'Y':	/* The year. */
335 			i = TM_YEAR_BASE;	/* just for data sanity... */
336 			bp = conv_num(bp, &i, 0, 9999);
337 			tm->tm_year = i - TM_YEAR_BASE;
338 			LEGAL_ALT(ALT_E);
339 			continue;
340 
341 		case 'y':	/* The year within 100 years of the epoch. */
342 			/* LEGAL_ALT(ALT_E | ALT_O); */
343 			bp = conv_num(bp, &i, 0, 99);
344 
345 			if (split_year)
346 				/* preserve century */
347 				i += (tm->tm_year / 100) * 100;
348 			else {
349 				split_year = 1;
350 				if (i <= 68)
351 					i = i + 2000 - TM_YEAR_BASE;
352 				else
353 					i = i + 1900 - TM_YEAR_BASE;
354 			}
355 			tm->tm_year = i;
356 			continue;
357 
358 		case 'Z':
359 			tzset();
360 			if (strncmp((const char *)bp, gmt, 3) == 0) {
361 				tm->tm_isdst = 0;
362 #ifdef TM_GMTOFF
363 				tm->TM_GMTOFF = 0;
364 #endif
365 #ifdef TM_ZONE
366 				tm->TM_ZONE = gmt;
367 #endif
368 				bp += 3;
369 			} else {
370 				ep = find_string(bp, &i,
371 					       	 (const char * const *)tzname,
372 					       	  NULL, 2);
373 				if (ep != NULL) {
374 					tm->tm_isdst = i;
375 #ifdef TM_GMTOFF
376 					tm->TM_GMTOFF = -(timezone);
377 #endif
378 #ifdef TM_ZONE
379 					tm->TM_ZONE = tzname[i];
380 #endif
381 				}
382 				bp = ep;
383 			}
384 			continue;
385 
386 		case 'z':
387 			/*
388 			 * We recognize all ISO 8601 formats:
389 			 * Z	= Zulu time/UTC
390 			 * [+-]hhmm
391 			 * [+-]hh:mm
392 			 * [+-]hh
393 			 * We recognize all RFC-822/RFC-2822 formats:
394 			 * UT|GMT
395 			 *          North American : UTC offsets
396 			 * E[DS]T = Eastern : -4 | -5
397 			 * C[DS]T = Central : -5 | -6
398 			 * M[DS]T = Mountain: -6 | -7
399 			 * P[DS]T = Pacific : -7 | -8
400 			 *          Military
401 			 * [A-IL-M] = -1 ... -9 (J not used)
402 			 * [N-Y]  = +1 ... +12
403 			 */
404 			while (isspace(*bp))
405 				bp++;
406 
407 			switch (*bp++) {
408 			case 'G':
409 				if (*bp++ != 'M')
410 					return NULL;
411 				/*FALLTHROUGH*/
412 			case 'U':
413 				if (*bp++ != 'T')
414 					return NULL;
415 				/*FALLTHROUGH*/
416 			case 'Z':
417 				tm->tm_isdst = 0;
418 #ifdef TM_GMTOFF
419 				tm->TM_GMTOFF = 0;
420 #endif
421 #ifdef TM_ZONE
422 				tm->TM_ZONE = utc;
423 #endif
424 				continue;
425 			case '+':
426 				neg = 0;
427 				break;
428 			case '-':
429 				neg = 1;
430 				break;
431 			default:
432 				--bp;
433 				ep = find_string(bp, &i, nast, NULL, 4);
434 				if (ep != NULL) {
435 #ifdef TM_GMTOFF
436 					tm->TM_GMTOFF = -5 - i;
437 #endif
438 #ifdef TM_ZONE
439 					tm->TM_ZONE = __UNCONST(nast[i]);
440 #endif
441 					bp = ep;
442 					continue;
443 				}
444 				ep = find_string(bp, &i, nadt, NULL, 4);
445 				if (ep != NULL) {
446 					tm->tm_isdst = 1;
447 #ifdef TM_GMTOFF
448 					tm->TM_GMTOFF = -4 - i;
449 #endif
450 #ifdef TM_ZONE
451 					tm->TM_ZONE = __UNCONST(nadt[i]);
452 #endif
453 					bp = ep;
454 					continue;
455 				}
456 
457 				if ((*bp >= 'A' && *bp <= 'I') ||
458 				    (*bp >= 'L' && *bp <= 'Y')) {
459 #ifdef TM_GMTOFF
460 					/* Argh! No 'J'! */
461 					if (*bp >= 'A' && *bp <= 'I')
462 						tm->TM_GMTOFF =
463 						    ('A' - 1) - (int)*bp;
464 					else if (*bp >= 'L' && *bp <= 'M')
465 						tm->TM_GMTOFF = 'A' - (int)*bp;
466 					else if (*bp >= 'N' && *bp <= 'Y')
467 						tm->TM_GMTOFF = (int)*bp - 'M';
468 #endif
469 #ifdef TM_ZONE
470 					tm->TM_ZONE = NULL; /* XXX */
471 #endif
472 					bp++;
473 					continue;
474 				}
475 				return NULL;
476 			}
477 			offs = 0;
478 			for (i = 0; i < 4; ) {
479 				if (isdigit(*bp)) {
480 					offs = offs * 10 + (*bp++ - '0');
481 					i++;
482 					continue;
483 				}
484 				if (i == 2 && *bp == ':') {
485 					bp++;
486 					continue;
487 				}
488 				break;
489 			}
490 			switch (i) {
491 			case 2:
492 				offs *= 100;
493 				break;
494 			case 4:
495 				i = offs % 100;
496 				if (i >= 60)
497 					return NULL;
498 				/* Convert minutes into decimal */
499 				offs = (offs / 100) * 100 + (i * 50) / 30;
500 				break;
501 			default:
502 				return NULL;
503 			}
504 			if (neg)
505 				offs = -offs;
506 			tm->tm_isdst = 0;	/* XXX */
507 #ifdef TM_GMTOFF
508 			tm->TM_GMTOFF = offs;
509 #endif
510 #ifdef TM_ZONE
511 			tm->TM_ZONE = NULL;	/* XXX */
512 #endif
513 			continue;
514 
515 		/*
516 		 * Miscellaneous conversions.
517 		 */
518 		case 'n':	/* Any kind of white-space. */
519 		case 't':
520 			while (isspace(*bp))
521 				bp++;
522 			LEGAL_ALT(0);
523 			continue;
524 
525 
526 		default:	/* Unknown/unsupported conversion. */
527 			return NULL;
528 		}
529 	}
530 
531 	return __UNCONST(bp);
532 }
533 
534 
535 static const u_char *
536 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
537 {
538 	uint result = 0;
539 	unsigned char ch;
540 
541 	/* The limit also determines the number of valid digits. */
542 	uint rulim = ulim;
543 
544 	ch = *buf;
545 	if (ch < '0' || ch > '9')
546 		return NULL;
547 
548 	do {
549 		result *= 10;
550 		result += ch - '0';
551 		rulim /= 10;
552 		ch = *++buf;
553 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
554 
555 	if (result < llim || result > ulim)
556 		return NULL;
557 
558 	*dest = result;
559 	return buf;
560 }
561 
562 static const u_char *
563 find_string(const u_char *bp, int *tgt, const char * const *n1,
564 		const char * const *n2, int c)
565 {
566 	int i;
567 	size_t len;
568 
569 	/* check full name - then abbreviated ones */
570 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
571 		for (i = 0; i < c; i++, n1++) {
572 			len = strlen(*n1);
573 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
574 				*tgt = i;
575 				return bp + len;
576 			}
577 		}
578 	}
579 
580 	/* Nothing matched */
581 	return NULL;
582 }
583