1 //=============================================================================
2 // File: dw_date.cpp
3 // Contents: Date parsing function
4 // Maintainer: Doug Sauder <dwsauder@fwb.gulf.net>
5 // WWW: http://www.fwb.gulf.net/~dwsauder/mimepp.html
6 // $Revision: 1.6 $
7 // $Date: 1997/09/27 11:53:45 $
8 //
9 // Copyright (c) 1996, 1997 Douglas W. Sauder
10 // All rights reserved.
11 //
12 // IN NO EVENT SHALL DOUGLAS W. SAUDER BE LIABLE TO ANY PARTY FOR DIRECT,
13 // INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
14 // THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF DOUGLAS W. SAUDER
15 // HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 //
17 // DOUGLAS W. SAUDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT
18 // NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
19 // PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
20 // BASIS, AND DOUGLAS W. SAUDER HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
21 // SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
22 //
23 //=============================================================================
24
25 /*
26 * For maximum code reuse, the functions in this file are written in C.
27 */
28
29 #include <mimelib/config.h>
30 #include <mimelib/debug.h>
31 #include <ctype.h>
32 #include <time.h>
33
34
CommentLength(const char * str)35 static int CommentLength(const char *str)
36 {
37 int ch, pos, level, quoteNext, done, len;
38
39 level = 0;
40 quoteNext = 0;
41 pos = 0;
42 len = 0;
43 ch = str[pos];
44 done = 0;
45 while (1) {
46 switch (ch) {
47 case 0:
48 len = pos;
49 done = 1;
50 break;
51 case '\\':
52 quoteNext = 1;
53 break;
54 case '(':
55 if (!quoteNext) {
56 ++level;
57 }
58 quoteNext = 0;
59 break;
60 case ')':
61 if (!quoteNext) {
62 --level;
63 if (level == 0) {
64 len = pos + 1;
65 done = 1;
66 }
67 }
68 quoteNext = 0;
69 break;
70 default:
71 quoteNext = 0;
72 }
73 if (done) {
74 break;
75 }
76 ++pos;
77 ch = str[pos];
78 }
79 return len;
80 }
81
82
83 /*
84 * ParseRfc822Date() -- Parse a date in RFC-822 (RFC-1123) format
85 *
86 * If the parsing succeeds:
87 * - tms is set to contain the year, month, day, hour, minute, and second
88 * - z is set to contain the time zone in minutes offset from UTC
89 * - 0 is returned
90 * If the parsing fails:
91 * - (-1) is returned
92 * - the information in tms and z is undefined
93 */
94 #ifdef __cplusplus
95 extern "C"
96 #endif
ParseRfc822Date(const char * str,struct tm * tms,int * z)97 int ParseRfc822Date(const char *str, struct tm *tms, int *z)
98 {
99 int pos, ch, n, sgn, numDigits;
100 int day=1, month=0, year=1970, hour=0, minute=0, second=0, zone=0;
101 int isValid = 1;
102
103 if (!str) {
104 return -1;
105 }
106 /*
107 * Ignore optional day of the week.
108 */
109
110 /*
111 * Day -- one or two digits
112 */
113 /* -- skip over non-digits */
114 pos = 0;
115 ch = str[pos];
116 while (ch && !('0' <= ch && ch <= '9')) {
117 if (ch == '(') {
118 pos += CommentLength(&str[pos]);
119 }
120 else {
121 ++pos;
122 }
123 ch = str[pos];
124 }
125 /* -- convert next one or two digits */
126 n = -1;
127 if ('0' <= ch && ch <= '9') {
128 n = ch - '0';
129 ++pos;
130 ch = str[pos];
131 }
132 if ('0' <= ch && ch <= '9') {
133 n *= 10;
134 n += ch - '0';
135 ++pos;
136 ch = str[pos];
137 }
138 if (1 <= n && n <= 31) {
139 day = n;
140 }
141 else {
142 isValid = 0;
143 }
144 /*
145 * Month. Use case-insensitive string compare for added robustness
146 */
147 /* -- skip over chars to first possible month char */
148 while (ch && !('A' <= ch && ch <= 'S') && !('a' <= ch && ch <= 's')) {
149 if (ch == '(') {
150 pos += CommentLength(&str[pos]);
151 }
152 else {
153 ++pos;
154 }
155 ch = str[pos];
156 }
157 /* -- convert the month name */
158 n = -1;
159 switch (ch) {
160 case 'A':
161 case 'a':
162 /* Apr */
163 if ((str[pos+1] == 'p' || str[pos+1] == 'P')
164 && (str[pos+2] == 'r' || str[pos+2] == 'R')) {
165 n = 3;
166 pos += 3;
167 ch = str[pos];
168 }
169 /* Aug */
170 else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
171 && (str[pos+2] == 'g' || str[pos+2] == 'G')) {
172 n = 7;
173 pos += 3;
174 ch = str[pos];
175 }
176 break;
177 case 'D':
178 case 'd':
179 /* Dec */
180 if ((str[pos+1] == 'e' || str[pos+1] == 'E')
181 && (str[pos+2] == 'c' || str[pos+2] == 'C')) {
182 n = 11;
183 pos += 3;
184 ch = str[pos];
185 }
186 break;
187 case 'F':
188 case 'f':
189 /* Feb */
190 if ((str[pos+1] == 'e' || str[pos+1] == 'E')
191 && (str[pos+2] == 'b' || str[pos+2] == 'B')) {
192 n = 1;
193 pos += 3;
194 ch = str[pos];
195 }
196 break;
197 case 'J':
198 case 'j':
199 /* Jan */
200 if ((str[pos+1] == 'a' || str[pos+1] == 'A')
201 && (str[pos+2] == 'n' || str[pos+2] == 'N')) {
202 n = 0;
203 pos += 3;
204 ch = str[pos];
205 }
206 /* Jul */
207 else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
208 && (str[pos+2] == 'l' || str[pos+2] == 'L')) {
209 n = 6;
210 pos += 3;
211 ch = str[pos];
212 }
213 /* Jun */
214 else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
215 && (str[pos+2] == 'n' || str[pos+2] == 'N')) {
216 n = 5;
217 pos += 3;
218 ch = str[pos];
219 }
220 break;
221 case 'M':
222 case 'm':
223 /* Mar */
224 if ((str[pos+1] == 'a' || str[pos+1] == 'A')
225 && (str[pos+2] == 'r' || str[pos+2] == 'R')) {
226 n = 2;
227 pos += 3;
228 ch = str[pos];
229 }
230 /* May */
231 else if ((str[pos+1] == 'a' || str[pos+1] == 'A')
232 && (str[pos+2] == 'y' || str[pos+2] == 'Y')) {
233 n = 4;
234 pos += 3;
235 ch = str[pos];
236 }
237 break;
238 case 'N':
239 case 'n':
240 /* Nov */
241 if ((str[pos+1] == 'o' || str[pos+1] == 'O')
242 && (str[pos+2] == 'v' || str[pos+2] == 'V')) {
243 n = 10;
244 pos += 3;
245 ch = str[pos];
246 }
247 break;
248 case 'O':
249 case 'o':
250 /* Oct */
251 if ((str[pos+1] == 'c' || str[pos+1] == 'c')
252 && (str[pos+2] == 't' || str[pos+2] == 'T')) {
253 n = 9;
254 pos += 3;
255 ch = str[pos];
256 }
257 break;
258 case 'S':
259 case 's':
260 /* Sep */
261 if ((str[pos+1] == 'e' || str[pos+1] == 'E')
262 && (str[pos+2] == 'p' || str[pos+2] == 'P')) {
263 n = 8;
264 pos += 3;
265 ch = str[pos];
266 }
267 break;
268 }
269 if (0 <= n && n <= 11) {
270 month = n;
271 }
272 else {
273 isValid = 0;
274 }
275 /*
276 * Year -- two or four digits (four preferred)
277 */
278 /* -- skip over non-digits */
279 while (ch && !('0' <= ch && ch <= '9')) {
280 if (ch == '(') {
281 pos += CommentLength(&str[pos]);
282 }
283 else {
284 ++pos;
285 }
286 ch = str[pos];
287 }
288 /* -- convert up to four digits */
289 n = -1;
290 if ('0' <= ch && ch <= '9') {
291 n = ch - '0';
292 ++pos;
293 ch = str[pos];
294 }
295 if ('0' <= ch && ch <= '9') {
296 n *= 10;
297 n += ch - '0';
298 ++pos;
299 ch = str[pos];
300 }
301 if ('0' <= ch && ch <= '9') {
302 n *= 10;
303 n += ch - '0';
304 ++pos;
305 ch = str[pos];
306 }
307 if ('0' <= ch && ch <= '9') {
308 n *= 10;
309 n += ch - '0';
310 ++pos;
311 ch = str[pos];
312 }
313 if (n != -1) {
314 year = (n < 1900) ? n+1900 : n;
315 }
316 else {
317 isValid = 0;
318 }
319 /*
320 * Hour -- two digits
321 */
322 /* -- skip over non-digits */
323 while (ch && !('0' <= ch && ch <= '9')) {
324 if (ch == '(') {
325 pos += CommentLength(&str[pos]);
326 }
327 else {
328 ++pos;
329 }
330 ch = str[pos];
331 }
332 /* -- convert next one or two digits */
333 n = -1;
334 if ('0' <= ch && ch <= '9') {
335 n = ch - '0';
336 ++pos;
337 ch = str[pos];
338 }
339 if ('0' <= ch && ch <= '9') {
340 n *= 10;
341 n += ch - '0';
342 ++pos;
343 ch = str[pos];
344 }
345 if (0 <= n && n <= 23) {
346 hour = n;
347 }
348 else {
349 isValid = 0;
350 }
351 /*
352 * Minute -- two digits
353 */
354 /* -- scan for ':' */
355 while (ch && ch != ':') {
356 if (ch == '(') {
357 pos += CommentLength(&str[pos]);
358 }
359 else {
360 ++pos;
361 }
362 ch = str[pos];
363 }
364 /* -- skip over non-digits */
365 while (ch && !('0' <= ch && ch <= '9')) {
366 if (ch == '(') {
367 pos += CommentLength(&str[pos]);
368 }
369 else {
370 ++pos;
371 }
372 ch = str[pos];
373 }
374 /* -- convert next one or two digits */
375 n = -1;
376 if ('0' <= ch && ch <= '9') {
377 n = ch - '0';
378 ++pos;
379 ch = str[pos];
380 }
381 if ('0' <= ch && ch <= '9') {
382 n *= 10;
383 n += ch - '0';
384 ++pos;
385 ch = str[pos];
386 }
387 if (0 <= n && n <= 59) {
388 minute = n;
389 }
390 else {
391 isValid = 0;
392 }
393 /*
394 * Second (optional) -- two digits
395 */
396 /* -- scan for ':' or start of time zone */
397 while (ch && !(ch == ':' || ch == '+' || ch == '-' || isalpha(ch))) {
398 if (ch == '(') {
399 pos += CommentLength(&str[pos]);
400 }
401 else {
402 ++pos;
403 }
404 ch = str[pos];
405 }
406 /* -- get the seconds, if it's there */
407 if (ch == ':') {
408 ++pos;
409 /* -- skip non-digits */
410 ch = str[pos];
411 while (ch && !('0' <= ch && ch <= '9')) {
412 if (ch == '(') {
413 pos += CommentLength(&str[pos]);
414 }
415 else {
416 ++pos;
417 }
418 ch = str[pos];
419 }
420 /* -- convert next one or two digits */
421 n = -1;
422 if ('0' <= ch && ch <= '9') {
423 n = ch - '0';
424 ++pos;
425 ch = str[pos];
426 }
427 if ('0' <= ch && ch <= '9') {
428 n *= 10;
429 n += ch - '0';
430 ++pos;
431 ch = str[pos];
432 }
433 if (0 <= n && n <= 59) {
434 second = n;
435 }
436 else {
437 isValid = 0;
438 }
439 /* -- scan for start of time zone */
440 while (ch && !(ch == '+' || ch == '-' || isalpha(ch))) {
441 if (ch == '(') {
442 pos += CommentLength(&str[pos]);
443 }
444 else {
445 ++pos;
446 }
447 ch = str[pos];
448 }
449 }
450 else /* if (ch != ':') */ {
451 second = 0;
452 }
453 /*
454 * Time zone
455 *
456 * Note: According to RFC-1123, the military time zones are specified
457 * incorrectly in RFC-822. RFC-1123 then states that "military time
458 * zones in RFC-822 headers carry no information."
459 * Here, we follow the specification in RFC-822. What else could we
460 * do? Military time zones should *never* be used!
461 */
462 sgn = 1;
463 switch (ch) {
464 case '-':
465 sgn = -1;
466 /* fall through */
467 case '+':
468 ++pos;
469 /* -- skip non-digits */
470 ch = str[pos];
471 while (ch && !('0' <= ch && ch <= '9')) {
472 ++pos;
473 ch = str[pos];
474 }
475 /* -- convert next four digits */
476 numDigits = 0;
477 n = 0;
478 if ('0' <= ch && ch <= '9') {
479 n = (ch - '0')*600;
480 ++pos;
481 ch = str[pos];
482 ++numDigits;
483 }
484 if ('0' <= ch && ch <= '9') {
485 n += (ch - '0')*60;
486 ++pos;
487 ch = str[pos];
488 ++numDigits;
489 }
490 if ('0' <= ch && ch <= '9') {
491 n += (ch - '0')*10;
492 ++pos;
493 ch = str[pos];
494 ++numDigits;
495 }
496 if ('0' <= ch && ch <= '9') {
497 n += ch - '0';
498 ++numDigits;
499 }
500 if (numDigits == 4) {
501 zone = sgn*n;
502 }
503 else {
504 isValid = 0;
505 }
506 break;
507 case 'U':
508 case 'u':
509 if (str[pos+1] == 'T' || str[pos+1] == 't') {
510 zone = 0;
511 }
512 else {
513 /* Military time zone */
514 zone = 480;
515 }
516 break;
517 case 'G':
518 case 'g':
519 if ((str[pos+1] == 'M' || str[pos+1] == 'm')
520 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
521 zone = 0;
522 }
523 else {
524 /* Military time zone */
525 zone = -420;
526 }
527 break;
528 case 'E':
529 case 'e':
530 if ((str[pos+1] == 'S' || str[pos+1] == 's')
531 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
532 zone = -300;
533 }
534 else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
535 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
536 zone = -240;
537 }
538 else {
539 /* Military time zone */
540 zone = -300;
541 }
542 break;
543 case 'C':
544 case 'c':
545 if ((str[pos+1] == 'S' || str[pos+1] == 's')
546 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
547 zone = -360;
548 }
549 else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
550 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
551 zone = -300;
552 }
553 else {
554 /* Military time zone */
555 zone = -180;
556 }
557 break;
558 case 'M':
559 case 'm':
560 if ((str[pos+1] == 'S' || str[pos+1] == 's')
561 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
562 zone = -420;
563 }
564 else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
565 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
566 zone = -360;
567 }
568 else {
569 /* Military time zone */
570 zone = -720;
571 }
572 break;
573 case 'P':
574 case 'p':
575 if ((str[pos+1] == 'S' || str[pos+1] == 's')
576 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
577 zone = -480;
578 }
579 else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
580 && (str[pos+2] == 'T' || str[pos+2] == 't')) {
581 zone = -420;
582 }
583 else {
584 /* Military time zone */
585 zone = 180;
586 }
587 break;
588 case 'Z':
589 /* Military time zone */
590 zone = 0;
591 break;
592 default:
593 /* Military time zone */
594 if ('A' <= ch && ch <= 'I') {
595 zone = 'A' - 1 - ch;
596 }
597 else if ('K' <= ch && ch <= 'M') {
598 zone = 'A' - ch;
599 }
600 else if ('N' <= ch && ch <= 'Y') {
601 zone = ch - 'N' + 1;
602 }
603 else {
604 isValid = 0;
605 }
606 break;
607 }
608 if (isValid) {
609 if (tms) {
610 tms->tm_year = year - 1900;
611 tms->tm_mon = month;
612 tms->tm_mday = day;
613 tms->tm_hour = hour;
614 tms->tm_min = minute;
615 tms->tm_sec = second;
616 }
617 if (z) {
618 *z = zone;
619 }
620 }
621 else {
622 if (tms) {
623 tms->tm_year = 70;
624 tms->tm_mon = 0;
625 tms->tm_mday = 1;
626 tms->tm_hour = 0;
627 tms->tm_min = 0;
628 tms->tm_sec = 0;
629 }
630 if (z) {
631 *z = 0;
632 }
633 }
634 return isValid ? 0 : -1;
635 }
636
637
638 #ifdef DW_TESTING_DATEPARSER
639
640 #include <stdio.h>
641 #include <stdlib.h>
642 #include <limits.h>
643
644 const char* testStr[] = {
645 ""
646 };
647
648 const char* wdays[] = {
649 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
650 };
651
652 const char* months[] = {
653 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
654 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
655 };
656
main()657 int main()
658 {
659 struct tm *ptms, tms1, tms2;
660 time_t tt;
661 int i, zone1, zone2;
662 char buf[100], sgn;
663
664 /* try a bunch of random dates */
665 srand(100);
666 for (i=0; i < 1000; ++i) {
667 tt = rand()*((double)0x7fffffff/RAND_MAX);
668 zone1 = (rand()%49 - 24)*30;
669 ptms = gmtime(&tt);
670 tms1 = *ptms;
671 sgn = (zone1 >= 0) ? '+' : '-';
672 sprintf(buf, "%s, %2d %s %d %d%d:%d%d:%d%d %c%d%d%d%d",
673 wdays[tms1.tm_wday], tms1.tm_mday, months[tms1.tm_mon],
674 tms1.tm_year+1900,
675 tms1.tm_hour/10, tms1.tm_hour%10,
676 tms1.tm_min/10, tms1.tm_min%10,
677 tms1.tm_sec/10, tms1.tm_sec%10,
678 sgn, abs(zone1)/60/10, abs(zone1)/60%10,
679 abs(zone1)%60/10, abs(zone1)%60%10);
680 ParseRfc822Date(buf, &tms2, &zone2);
681 if (tms1.tm_year != tms2.tm_year) {
682 fprintf(stderr, "Bad year\n");
683 }
684 if (tms1.tm_mon != tms2.tm_mon) {
685 fprintf(stderr, "Bad month\n");
686 }
687 if (tms1.tm_mday != tms2.tm_mday) {
688 fprintf(stderr, "Bad day\n");
689 }
690 if (tms1.tm_hour != tms2.tm_hour) {
691 fprintf(stderr, "Bad hour\n");
692 }
693 if (tms1.tm_min != tms2.tm_min) {
694 fprintf(stderr, "Bad minute\n");
695 }
696 if (tms1.tm_sec != tms2.tm_sec) {
697 fprintf(stderr, "Bad second\n");
698 }
699 if (zone1 != zone2) {
700 fprintf(stderr, "Bad zone\n");
701 }
702 }
703 return 0;
704 }
705
706 #endif
707