xref: /dragonfly/lib/libc/stdtime/strptime.c (revision f4228790)
1 /*	$NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $	*/
2 /*	$DragonFly: src/lib/libc/stdtime/strptime.c,v 1.5 2005/12/04 23:25:40 swildner Exp $ */
3 
4 /*-
5  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code was contributed to The NetBSD Foundation by Klaus Klein.
9  * Heavily optimised by David Laight
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/localedef.h>
34 #include <ctype.h>
35 #include <locale.h>
36 #include <string.h>
37 #include <time.h>
38 #include "private.h"
39 #include "tzfile.h"
40 
41 #define	_ctloc(x)		(_CurrentTimeLocale->x)
42 
43 /*
44  * We do not implement alternate representations. However, we always
45  * check whether a given modifier is allowed for a certain conversion.
46  */
47 #define	ALT_E			0x01
48 #define	ALT_O			0x02
49 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
50 
51 static char gmt[] = { "GMT" };
52 static char utc[] = { "UTC" };
53 
54 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
55 static const u_char *find_string(const u_char *, int *, const char * const *,
56 	const char * const *, int);
57 
58 char *
59 strptime(const char *buf, const char *fmt, struct tm *tm)
60 {
61 	unsigned char c;
62 	const unsigned char *bp;
63 	int alt_format, i, split_year = 0, neg, offs;
64 	const char *new_fmt;
65 
66 	bp = (const u_char *)buf;
67 
68 	while (bp != NULL && (c = *fmt++) != '\0') {
69 		/* Clear `alternate' modifier prior to new conversion. */
70 		alt_format = 0;
71 		i = 0;
72 
73 		/* Eat up white-space. */
74 		if (isspace(c)) {
75 			while (isspace(*bp))
76 				bp++;
77 			continue;
78 		}
79 
80 		if (c != '%')
81 			goto literal;
82 
83 
84 again:		switch (c = *fmt++) {
85 		case '%':	/* "%%" is converted to "%". */
86 literal:
87 			if (c != *bp++)
88 				return NULL;
89 			LEGAL_ALT(0);
90 			continue;
91 
92 		/*
93 		 * "Alternative" modifiers. Just set the appropriate flag
94 		 * and start over again.
95 		 */
96 		case 'E':	/* "%E?" alternative conversion modifier. */
97 			LEGAL_ALT(0);
98 			alt_format |= ALT_E;
99 			goto again;
100 
101 		case 'O':	/* "%O?" alternative conversion modifier. */
102 			LEGAL_ALT(0);
103 			alt_format |= ALT_O;
104 			goto again;
105 
106 		/*
107 		 * "Complex" conversion rules, implemented through recursion.
108 		 */
109 		case 'c':	/* Date and time, using the locale's format. */
110 			new_fmt = _ctloc(d_t_fmt);
111 			goto recurse;
112 
113 		case 'D':	/* The date as "%m/%d/%y". */
114 			new_fmt = "%m/%d/%y";
115 			LEGAL_ALT(0);
116 			goto recurse;
117 
118 		case 'F':	/* The date as "%Y-%m-%d". */
119 			new_fmt = "%Y-%m-%d";
120 			LEGAL_ALT(0);
121 			goto recurse;
122 
123 		case 'R':	/* The time as "%H:%M". */
124 			new_fmt = "%H:%M";
125 			LEGAL_ALT(0);
126 			goto recurse;
127 
128 		case 'r':	/* The time in 12-hour clock representation. */
129 			new_fmt =_ctloc(t_fmt_ampm);
130 			LEGAL_ALT(0);
131 			goto recurse;
132 
133 		case 'T':	/* The time as "%H:%M:%S". */
134 			new_fmt = "%H:%M:%S";
135 			LEGAL_ALT(0);
136 			goto recurse;
137 
138 		case 'X':	/* The time, using the locale's format. */
139 			new_fmt =_ctloc(t_fmt);
140 			goto recurse;
141 
142 		case 'x':	/* The date, using the locale's format. */
143 			new_fmt =_ctloc(d_fmt);
144 		    recurse:
145 			bp = (const u_char *)strptime((const char *)bp,
146 							    new_fmt, tm);
147 			LEGAL_ALT(ALT_E);
148 			continue;
149 
150 		/*
151 		 * "Elementary" conversion rules.
152 		 */
153 		case 'A':	/* The day of week, using the locale's form. */
154 		case 'a':
155 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
156 					_ctloc(abday), 7);
157 			LEGAL_ALT(0);
158 			continue;
159 
160 		case 'B':	/* The month, using the locale's form. */
161 		case 'b':
162 		case 'h':
163 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
164 					_ctloc(abmon), 12);
165 			LEGAL_ALT(0);
166 			continue;
167 
168 		case 'C':	/* The century number. */
169 			i = 20;
170 			bp = conv_num(bp, &i, 0, 99);
171 
172 			i = i * 100 - TM_YEAR_BASE;
173 			if (split_year)
174 				i += tm->tm_year % 100;
175 			split_year = 1;
176 			tm->tm_year = i;
177 			LEGAL_ALT(ALT_E);
178 			continue;
179 
180 		case 'd':	/* The day of month. */
181 		case 'e':
182 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
183 			LEGAL_ALT(ALT_O);
184 			continue;
185 
186 		case 'k':	/* The hour (24-hour clock representation). */
187 			LEGAL_ALT(0);
188 			/* FALLTHROUGH */
189 		case 'H':
190 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
191 			LEGAL_ALT(ALT_O);
192 			continue;
193 
194 		case 'l':	/* The hour (12-hour clock representation). */
195 			LEGAL_ALT(0);
196 			/* FALLTHROUGH */
197 		case 'I':
198 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
199 			if (tm->tm_hour == 12)
200 				tm->tm_hour = 0;
201 			LEGAL_ALT(ALT_O);
202 			continue;
203 
204 		case 'j':	/* The day of year. */
205 			i = 1;
206 			bp = conv_num(bp, &i, 1, 366);
207 			tm->tm_yday = i - 1;
208 			LEGAL_ALT(0);
209 			continue;
210 
211 		case 'M':	/* The minute. */
212 			bp = conv_num(bp, &tm->tm_min, 0, 59);
213 			LEGAL_ALT(ALT_O);
214 			continue;
215 
216 		case 'm':	/* The month. */
217 			i = 1;
218 			bp = conv_num(bp, &i, 1, 12);
219 			tm->tm_mon = i - 1;
220 			LEGAL_ALT(ALT_O);
221 			continue;
222 
223 		case 'p':	/* The locale's equivalent of AM/PM. */
224 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
225 			if (tm->tm_hour > 11)
226 				return NULL;
227 			tm->tm_hour += i * 12;
228 			LEGAL_ALT(0);
229 			continue;
230 
231 		case 'S':	/* The seconds. */
232 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
233 			LEGAL_ALT(ALT_O);
234 			continue;
235 
236 		case 'U':	/* The week of year, beginning on sunday. */
237 		case 'W':	/* The week of year, beginning on monday. */
238 			/*
239 			 * XXX This is bogus, as we can not assume any valid
240 			 * information present in the tm structure at this
241 			 * point to calculate a real value, so just check the
242 			 * range for now.
243 			 */
244 			 bp = conv_num(bp, &i, 0, 53);
245 			 LEGAL_ALT(ALT_O);
246 			 continue;
247 
248 		case 'w':	/* The day of week, beginning on sunday. */
249 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
250 			LEGAL_ALT(ALT_O);
251 			continue;
252 
253 		case 'u':	/* The day of week, monday = 1. */
254 			bp = conv_num(bp, &i, 1, 7);
255 			tm->tm_wday = i % 7;
256 			LEGAL_ALT(ALT_O);
257 			continue;
258 
259 		case 'g':	/* The year corresponding to the ISO week
260 				 * number but without the century.
261 				 */
262 			bp = conv_num(bp, &i, 0, 99);
263 			continue;
264 
265 		case 'G':	/* The year corresponding to the ISO week
266 				 * number with century.
267 				 */
268 			do
269 				bp++;
270 			while (isdigit(*bp));
271 			continue;
272 
273 		case 'V':	/* The ISO 8601:1988 week number as decimal */
274 			bp = conv_num(bp, &i, 0, 53);
275 			continue;
276 
277 		case 'Y':	/* The year. */
278 			i = TM_YEAR_BASE;	/* just for data sanity... */
279 			bp = conv_num(bp, &i, 0, 9999);
280 			tm->tm_year = i - TM_YEAR_BASE;
281 			LEGAL_ALT(ALT_E);
282 			continue;
283 
284 		case 'y':	/* The year within 100 years of the epoch. */
285 			/* LEGAL_ALT(ALT_E | ALT_O); */
286 			bp = conv_num(bp, &i, 0, 99);
287 
288 			if (split_year)
289 				/* preserve century */
290 				i += (tm->tm_year / 100) * 100;
291 			else {
292 				split_year = 1;
293 				if (i <= 68)
294 					i = i + 2000 - TM_YEAR_BASE;
295 				else
296 					i = i + 1900 - TM_YEAR_BASE;
297 			}
298 			tm->tm_year = i;
299 			continue;
300 
301 		case 'Z':
302 			tzset();
303 			if (strncmp((const char *)bp, gmt, 3) == 0) {
304 				tm->tm_isdst = 0;
305 #ifdef TM_GMTOFF
306 				tm->TM_GMTOFF = 0;
307 #endif
308 #ifdef TM_ZONE
309 				tm->TM_ZONE = gmt;
310 #endif
311 				bp += 3;
312 			} else {
313 				const unsigned char *ep;
314 
315 				ep = find_string(bp, &i,
316 						 (const char * const *)tzname,
317 						  NULL, 2);
318 				if (ep != NULL) {
319 					tm->tm_isdst = i;
320 #ifdef TM_GMTOFF
321 					tm->TM_GMTOFF = -(timezone);
322 #endif
323 #ifdef TM_ZONE
324 					tm->TM_ZONE = tzname[i];
325 #endif
326 				}
327 				bp = ep;
328 			}
329 			continue;
330 
331 		case 'z':
332 			/*
333 			 * We recognize all ISO 8601 formats:
334 			 * Z	= Zulu time/UTC
335 			 * [+-]hhmm
336 			 * [+-]hh:mm
337 			 * [+-]hh
338 			 */
339 			while (isspace(*bp))
340 				bp++;
341 
342 			switch (*bp++) {
343 			case 'Z':
344 				tm->tm_isdst = 0;
345 #ifdef TM_GMTOFF
346 				tm->TM_GMTOFF = 0;
347 #endif
348 #ifdef TM_ZONE
349 				tm->TM_ZONE = utc;
350 #endif
351 				continue;
352 			case '+':
353 				neg = 0;
354 				break;
355 			case '-':
356 				neg = 1;
357 				break;
358 			default:
359 				return NULL;
360 			}
361 			offs = 0;
362 			for (i = 0; i < 4; ) {
363 				if (isdigit(*bp)) {
364 					offs = offs * 10 + (*bp++ - '0');
365 					i++;
366 					continue;
367 				}
368 				if (i == 2 && *bp == ':') {
369 					bp++;
370 					continue;
371 				}
372 				break;
373 			}
374 			switch (i) {
375 			case 2:
376 				offs *= 100;
377 				break;
378 			case 4:
379 				i = offs % 100;
380 				if (i >= 60)
381 					return NULL;
382 				/* Convert minutes into decimal */
383 				offs = (offs / 100) * 100 + (i * 50) / 30;
384 				break;
385 			default:
386 				return NULL;
387 			}
388 			if (neg)
389 				offs = -offs;
390 			tm->tm_isdst = 0;	/* XXX */
391 #ifdef TM_GMTOFF
392 			tm->TM_GMTOFF = offs;
393 #endif
394 #ifdef TM_ZONE
395 			tm->TM_ZONE = NULL;	/* XXX */
396 #endif
397 			continue;
398 
399 		/*
400 		 * Miscellaneous conversions.
401 		 */
402 		case 'n':	/* Any kind of white-space. */
403 		case 't':
404 			while (isspace(*bp))
405 				bp++;
406 			LEGAL_ALT(0);
407 			continue;
408 
409 
410 		default:	/* Unknown/unsupported conversion. */
411 			return NULL;
412 		}
413 	}
414 
415 	return __DECONST(char *, bp);
416 }
417 
418 
419 static const u_char *
420 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
421 {
422 	uint result = 0;
423 	unsigned char ch;
424 
425 	/* The limit also determines the number of valid digits. */
426 	uint rulim = ulim;
427 
428 	ch = *buf;
429 	if (ch < '0' || ch > '9')
430 		return NULL;
431 
432 	do {
433 		result *= 10;
434 		result += ch - '0';
435 		rulim /= 10;
436 		ch = *++buf;
437 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
438 
439 	if (result < llim || result > ulim)
440 		return NULL;
441 
442 	*dest = result;
443 	return buf;
444 }
445 
446 static const u_char *
447 find_string(const u_char *bp, int *tgt, const char * const *n1,
448 		const char * const *n2, int c)
449 {
450 	int i;
451 	unsigned int len;
452 
453 	/* check full name - then abbreviated ones */
454 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
455 		for (i = 0; i < c; i++, n1++) {
456 			len = strlen(*n1);
457 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
458 				*tgt = i;
459 				return bp + len;
460 			}
461 		}
462 	}
463 
464 	/* Nothing matched */
465 	return NULL;
466 }
467