xref: /dragonfly/lib/libc/stdtime/strptime.c (revision e293de53)
1 /*	$NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $	*/
2 /*	$DragonFly: src/lib/libc/stdtime/strptime.c,v 1.5 2005/12/04 23:25:40 swildner Exp $ */
3 
4 /*-
5  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code was contributed to The NetBSD Foundation by Klaus Klein.
9  * Heavily optimised by David Laight
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/localedef.h>
34 #include <ctype.h>
35 #include <locale.h>
36 #include <string.h>
37 #include <time.h>
38 #include "private.h"
39 #include "tzfile.h"
40 
41 #define	_ctloc(x)		(_CurrentTimeLocale->x)
42 
43 /*
44  * We do not implement alternate representations. However, we always
45  * check whether a given modifier is allowed for a certain conversion.
46  */
47 #define	ALT_E			0x01
48 #define	ALT_O			0x02
49 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
50 
51 static char gmt[] = { "GMT" };
52 static char utc[] = { "UTC" };
53 
54 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
55 static const u_char *find_string(const u_char *, int *, const char * const *,
56 	const char * const *, int);
57 
58 char *
59 strptime(const char * __restrict buf, const char * __restrict fmt,
60 	 struct tm * __restrict tm)
61 {
62 	unsigned char c;
63 	const unsigned char *bp;
64 	int alt_format, i, split_year = 0, neg, offs;
65 	const char *new_fmt;
66 
67 	bp = (const u_char *)buf;
68 
69 	while (bp != NULL && (c = *fmt++) != '\0') {
70 		/* Clear `alternate' modifier prior to new conversion. */
71 		alt_format = 0;
72 		i = 0;
73 
74 		/* Eat up white-space. */
75 		if (isspace(c)) {
76 			while (isspace(*bp))
77 				bp++;
78 			continue;
79 		}
80 
81 		if (c != '%')
82 			goto literal;
83 
84 
85 again:		switch (c = *fmt++) {
86 		case '%':	/* "%%" is converted to "%". */
87 literal:
88 			if (c != *bp++)
89 				return NULL;
90 			LEGAL_ALT(0);
91 			continue;
92 
93 		/*
94 		 * "Alternative" modifiers. Just set the appropriate flag
95 		 * and start over again.
96 		 */
97 		case 'E':	/* "%E?" alternative conversion modifier. */
98 			LEGAL_ALT(0);
99 			alt_format |= ALT_E;
100 			goto again;
101 
102 		case 'O':	/* "%O?" alternative conversion modifier. */
103 			LEGAL_ALT(0);
104 			alt_format |= ALT_O;
105 			goto again;
106 
107 		/*
108 		 * "Complex" conversion rules, implemented through recursion.
109 		 */
110 		case 'c':	/* Date and time, using the locale's format. */
111 			new_fmt = _ctloc(d_t_fmt);
112 			goto recurse;
113 
114 		case 'D':	/* The date as "%m/%d/%y". */
115 			new_fmt = "%m/%d/%y";
116 			LEGAL_ALT(0);
117 			goto recurse;
118 
119 		case 'F':	/* The date as "%Y-%m-%d". */
120 			new_fmt = "%Y-%m-%d";
121 			LEGAL_ALT(0);
122 			goto recurse;
123 
124 		case 'R':	/* The time as "%H:%M". */
125 			new_fmt = "%H:%M";
126 			LEGAL_ALT(0);
127 			goto recurse;
128 
129 		case 'r':	/* The time in 12-hour clock representation. */
130 			new_fmt =_ctloc(t_fmt_ampm);
131 			LEGAL_ALT(0);
132 			goto recurse;
133 
134 		case 'T':	/* The time as "%H:%M:%S". */
135 			new_fmt = "%H:%M:%S";
136 			LEGAL_ALT(0);
137 			goto recurse;
138 
139 		case 'X':	/* The time, using the locale's format. */
140 			new_fmt =_ctloc(t_fmt);
141 			goto recurse;
142 
143 		case 'x':	/* The date, using the locale's format. */
144 			new_fmt =_ctloc(d_fmt);
145 		    recurse:
146 			bp = (const u_char *)strptime((const char *)bp,
147 							    new_fmt, tm);
148 			LEGAL_ALT(ALT_E);
149 			continue;
150 
151 		/*
152 		 * "Elementary" conversion rules.
153 		 */
154 		case 'A':	/* The day of week, using the locale's form. */
155 		case 'a':
156 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
157 					_ctloc(abday), 7);
158 			LEGAL_ALT(0);
159 			continue;
160 
161 		case 'B':	/* The month, using the locale's form. */
162 		case 'b':
163 		case 'h':
164 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
165 					_ctloc(abmon), 12);
166 			LEGAL_ALT(0);
167 			continue;
168 
169 		case 'C':	/* The century number. */
170 			i = 20;
171 			bp = conv_num(bp, &i, 0, 99);
172 
173 			i = i * 100 - TM_YEAR_BASE;
174 			if (split_year)
175 				i += tm->tm_year % 100;
176 			split_year = 1;
177 			tm->tm_year = i;
178 			LEGAL_ALT(ALT_E);
179 			continue;
180 
181 		case 'd':	/* The day of month. */
182 		case 'e':
183 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
184 			LEGAL_ALT(ALT_O);
185 			continue;
186 
187 		case 'k':	/* The hour (24-hour clock representation). */
188 			LEGAL_ALT(0);
189 			/* FALLTHROUGH */
190 		case 'H':
191 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
192 			LEGAL_ALT(ALT_O);
193 			continue;
194 
195 		case 'l':	/* The hour (12-hour clock representation). */
196 			LEGAL_ALT(0);
197 			/* FALLTHROUGH */
198 		case 'I':
199 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
200 			if (tm->tm_hour == 12)
201 				tm->tm_hour = 0;
202 			LEGAL_ALT(ALT_O);
203 			continue;
204 
205 		case 'j':	/* The day of year. */
206 			i = 1;
207 			bp = conv_num(bp, &i, 1, 366);
208 			tm->tm_yday = i - 1;
209 			LEGAL_ALT(0);
210 			continue;
211 
212 		case 'M':	/* The minute. */
213 			bp = conv_num(bp, &tm->tm_min, 0, 59);
214 			LEGAL_ALT(ALT_O);
215 			continue;
216 
217 		case 'm':	/* The month. */
218 			i = 1;
219 			bp = conv_num(bp, &i, 1, 12);
220 			tm->tm_mon = i - 1;
221 			LEGAL_ALT(ALT_O);
222 			continue;
223 
224 		case 'p':	/* The locale's equivalent of AM/PM. */
225 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
226 			if (tm->tm_hour > 11)
227 				return NULL;
228 			tm->tm_hour += i * 12;
229 			LEGAL_ALT(0);
230 			continue;
231 
232 		case 'S':	/* The seconds. */
233 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
234 			LEGAL_ALT(ALT_O);
235 			continue;
236 
237 		case 'U':	/* The week of year, beginning on sunday. */
238 		case 'W':	/* The week of year, beginning on monday. */
239 			/*
240 			 * XXX This is bogus, as we can not assume any valid
241 			 * information present in the tm structure at this
242 			 * point to calculate a real value, so just check the
243 			 * range for now.
244 			 */
245 			 bp = conv_num(bp, &i, 0, 53);
246 			 LEGAL_ALT(ALT_O);
247 			 continue;
248 
249 		case 'w':	/* The day of week, beginning on sunday. */
250 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
251 			LEGAL_ALT(ALT_O);
252 			continue;
253 
254 		case 'u':	/* The day of week, monday = 1. */
255 			bp = conv_num(bp, &i, 1, 7);
256 			tm->tm_wday = i % 7;
257 			LEGAL_ALT(ALT_O);
258 			continue;
259 
260 		case 'g':	/* The year corresponding to the ISO week
261 				 * number but without the century.
262 				 */
263 			bp = conv_num(bp, &i, 0, 99);
264 			continue;
265 
266 		case 'G':	/* The year corresponding to the ISO week
267 				 * number with century.
268 				 */
269 			do
270 				bp++;
271 			while (isdigit(*bp));
272 			continue;
273 
274 		case 'V':	/* The ISO 8601:1988 week number as decimal */
275 			bp = conv_num(bp, &i, 0, 53);
276 			continue;
277 
278 		case 'Y':	/* The year. */
279 			i = TM_YEAR_BASE;	/* just for data sanity... */
280 			bp = conv_num(bp, &i, 0, 9999);
281 			tm->tm_year = i - TM_YEAR_BASE;
282 			LEGAL_ALT(ALT_E);
283 			continue;
284 
285 		case 'y':	/* The year within 100 years of the epoch. */
286 			/* LEGAL_ALT(ALT_E | ALT_O); */
287 			bp = conv_num(bp, &i, 0, 99);
288 
289 			if (split_year)
290 				/* preserve century */
291 				i += (tm->tm_year / 100) * 100;
292 			else {
293 				split_year = 1;
294 				if (i <= 68)
295 					i = i + 2000 - TM_YEAR_BASE;
296 				else
297 					i = i + 1900 - TM_YEAR_BASE;
298 			}
299 			tm->tm_year = i;
300 			continue;
301 
302 		case 'Z':
303 			tzset();
304 			if (strncmp((const char *)bp, gmt, 3) == 0) {
305 				tm->tm_isdst = 0;
306 #ifdef TM_GMTOFF
307 				tm->TM_GMTOFF = 0;
308 #endif
309 #ifdef TM_ZONE
310 				tm->TM_ZONE = gmt;
311 #endif
312 				bp += 3;
313 			} else {
314 				const unsigned char *ep;
315 
316 				ep = find_string(bp, &i,
317 						 (const char * const *)tzname,
318 						  NULL, 2);
319 				if (ep != NULL) {
320 					tm->tm_isdst = i;
321 #ifdef TM_GMTOFF
322 					tm->TM_GMTOFF = -(timezone);
323 #endif
324 #ifdef TM_ZONE
325 					tm->TM_ZONE = tzname[i];
326 #endif
327 				}
328 				bp = ep;
329 			}
330 			continue;
331 
332 		case 'z':
333 			/*
334 			 * We recognize all ISO 8601 formats:
335 			 * Z	= Zulu time/UTC
336 			 * [+-]hhmm
337 			 * [+-]hh:mm
338 			 * [+-]hh
339 			 */
340 			while (isspace(*bp))
341 				bp++;
342 
343 			switch (*bp++) {
344 			case 'Z':
345 				tm->tm_isdst = 0;
346 #ifdef TM_GMTOFF
347 				tm->TM_GMTOFF = 0;
348 #endif
349 #ifdef TM_ZONE
350 				tm->TM_ZONE = utc;
351 #endif
352 				continue;
353 			case '+':
354 				neg = 0;
355 				break;
356 			case '-':
357 				neg = 1;
358 				break;
359 			default:
360 				return NULL;
361 			}
362 			offs = 0;
363 			for (i = 0; i < 4; ) {
364 				if (isdigit(*bp)) {
365 					offs = offs * 10 + (*bp++ - '0');
366 					i++;
367 					continue;
368 				}
369 				if (i == 2 && *bp == ':') {
370 					bp++;
371 					continue;
372 				}
373 				break;
374 			}
375 			switch (i) {
376 			case 2:
377 				offs *= 100;
378 				break;
379 			case 4:
380 				i = offs % 100;
381 				if (i >= 60)
382 					return NULL;
383 				/* Convert minutes into decimal */
384 				offs = (offs / 100) * 100 + (i * 50) / 30;
385 				break;
386 			default:
387 				return NULL;
388 			}
389 			if (neg)
390 				offs = -offs;
391 			tm->tm_isdst = 0;	/* XXX */
392 #ifdef TM_GMTOFF
393 			tm->TM_GMTOFF = offs;
394 #endif
395 #ifdef TM_ZONE
396 			tm->TM_ZONE = NULL;	/* XXX */
397 #endif
398 			continue;
399 
400 		/*
401 		 * Miscellaneous conversions.
402 		 */
403 		case 'n':	/* Any kind of white-space. */
404 		case 't':
405 			while (isspace(*bp))
406 				bp++;
407 			LEGAL_ALT(0);
408 			continue;
409 
410 
411 		default:	/* Unknown/unsupported conversion. */
412 			return NULL;
413 		}
414 	}
415 
416 	return __DECONST(char *, bp);
417 }
418 
419 
420 static const u_char *
421 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
422 {
423 	uint result = 0;
424 	unsigned char ch;
425 
426 	/* The limit also determines the number of valid digits. */
427 	uint rulim = ulim;
428 
429 	ch = *buf;
430 	if (ch < '0' || ch > '9')
431 		return NULL;
432 
433 	do {
434 		result *= 10;
435 		result += ch - '0';
436 		rulim /= 10;
437 		ch = *++buf;
438 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
439 
440 	if (result < llim || result > ulim)
441 		return NULL;
442 
443 	*dest = result;
444 	return buf;
445 }
446 
447 static const u_char *
448 find_string(const u_char *bp, int *tgt, const char * const *n1,
449 		const char * const *n2, int c)
450 {
451 	int i;
452 	unsigned int len;
453 
454 	/* check full name - then abbreviated ones */
455 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
456 		for (i = 0; i < c; i++, n1++) {
457 			len = strlen(*n1);
458 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
459 				*tgt = i;
460 				return bp + len;
461 			}
462 		}
463 	}
464 
465 	/* Nothing matched */
466 	return NULL;
467 }
468