1 //=============================================================================
2 // File:       dw_date.cpp
3 // Contents:   Date parsing function
4 // Maintainer: Doug Sauder <dwsauder@fwb.gulf.net>
5 // WWW:        http://www.fwb.gulf.net/~dwsauder/mimepp.html
6 // $Revision: 1.6 $
7 // $Date: 1997/09/27 11:53:45 $
8 //
9 // Copyright (c) 1996, 1997 Douglas W. Sauder
10 // All rights reserved.
11 //
12 // IN NO EVENT SHALL DOUGLAS W. SAUDER BE LIABLE TO ANY PARTY FOR DIRECT,
13 // INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
14 // THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF DOUGLAS W. SAUDER
15 // HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 //
17 // DOUGLAS W. SAUDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT
18 // NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
19 // PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
20 // BASIS, AND DOUGLAS W. SAUDER HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
21 // SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
22 //
23 //=============================================================================
24 
25 /*
26  * For maximum code reuse, the functions in this file are written in C.
27  */
28 
29 #include <mimelib/config.h>
30 #include <mimelib/debug.h>
31 #include <ctype.h>
32 #include <time.h>
33 
34 
CommentLength(const char * str)35 static int CommentLength(const char *str)
36 {
37     int ch, pos, level, quoteNext, done, len;
38 
39     level = 0;
40     quoteNext = 0;
41     pos = 0;
42     len = 0;
43     ch = str[pos];
44     done = 0;
45     while (1) {
46         switch (ch) {
47         case 0:
48             len = pos;
49             done = 1;
50             break;
51         case '\\':
52             quoteNext = 1;
53             break;
54         case '(':
55             if (!quoteNext) {
56                 ++level;
57             }
58             quoteNext = 0;
59             break;
60         case ')':
61             if (!quoteNext) {
62                 --level;
63                 if (level == 0) {
64                     len = pos + 1;
65                     done = 1;
66                 }
67             }
68             quoteNext = 0;
69             break;
70         default:
71             quoteNext = 0;
72         }
73         if (done) {
74             break;
75         }
76         ++pos;
77         ch = str[pos];
78     }
79     return len;
80 }
81 
82 
83 /*
84  * ParseRfc822Date() -- Parse a date in RFC-822 (RFC-1123) format
85  *
86  * If the parsing succeeds:
87  *  - tms is set to contain the year, month, day, hour, minute, and second
88  *  - z is set to contain the time zone in minutes offset from UTC
89  *  - 0 is returned
90  * If the parsing fails:
91  *  - (-1) is returned
92  *  - the information in tms and z is undefined
93  */
94 #ifdef __cplusplus
95 extern "C"
96 #endif
ParseRfc822Date(const char * str,struct tm * tms,int * z)97 int ParseRfc822Date(const char *str, struct tm *tms, int *z)
98 {
99     int pos, ch, n, sgn, numDigits;
100     int day=1, month=0, year=1970, hour=0, minute=0, second=0, zone=0;
101     int isValid = 1;
102 
103     if (!str) {
104         return -1;
105     }
106     /*
107      * Ignore optional day of the week.
108      */
109 
110     /*
111      * Day -- one or two digits
112      */
113     /* -- skip over non-digits */
114     pos = 0;
115     ch = str[pos];
116     while (ch && !('0' <= ch && ch <= '9')) {
117         if (ch == '(') {
118             pos += CommentLength(&str[pos]);
119         }
120         else {
121             ++pos;
122         }
123         ch = str[pos];
124     }
125     /* -- convert next one or two digits */
126     n = -1;
127     if ('0' <= ch && ch <= '9') {
128         n = ch - '0';
129         ++pos;
130         ch = str[pos];
131     }
132     if ('0' <= ch && ch <= '9') {
133         n *= 10;
134         n += ch - '0';
135         ++pos;
136         ch = str[pos];
137     }
138     if (1 <= n && n <= 31) {
139         day = n;
140     }
141     else {
142         isValid = 0;
143     }
144     /*
145      * Month.  Use case-insensitive string compare for added robustness
146      */
147     /* -- skip over chars to first possible month char */
148     while (ch && !('A' <= ch && ch <= 'S') && !('a' <= ch && ch <= 's')) {
149         if (ch == '(') {
150             pos += CommentLength(&str[pos]);
151         }
152         else {
153             ++pos;
154         }
155         ch = str[pos];
156     }
157     /* -- convert the month name */
158     n = -1;
159     switch (ch) {
160     case 'A':
161     case 'a':
162         /* Apr */
163         if ((str[pos+1] == 'p' || str[pos+1] == 'P')
164             && (str[pos+2] == 'r' || str[pos+2] == 'R')) {
165             n = 3;
166             pos += 3;
167             ch = str[pos];
168         }
169         /* Aug */
170         else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
171             && (str[pos+2] == 'g' || str[pos+2] == 'G')) {
172             n = 7;
173             pos += 3;
174             ch = str[pos];
175         }
176         break;
177     case 'D':
178     case 'd':
179         /* Dec */
180         if ((str[pos+1] == 'e' || str[pos+1] == 'E')
181             && (str[pos+2] == 'c' || str[pos+2] == 'C')) {
182             n = 11;
183             pos += 3;
184             ch = str[pos];
185         }
186         break;
187     case 'F':
188     case 'f':
189         /* Feb */
190         if ((str[pos+1] == 'e' || str[pos+1] == 'E')
191             && (str[pos+2] == 'b' || str[pos+2] == 'B')) {
192             n = 1;
193             pos += 3;
194             ch = str[pos];
195         }
196         break;
197     case 'J':
198     case 'j':
199         /* Jan */
200         if ((str[pos+1] == 'a' || str[pos+1] == 'A')
201             && (str[pos+2] == 'n' || str[pos+2] == 'N')) {
202             n = 0;
203             pos += 3;
204             ch = str[pos];
205         }
206         /* Jul */
207         else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
208             && (str[pos+2] == 'l' || str[pos+2] == 'L')) {
209             n = 6;
210             pos += 3;
211             ch = str[pos];
212         }
213         /* Jun */
214         else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
215             && (str[pos+2] == 'n' || str[pos+2] == 'N')) {
216             n = 5;
217             pos += 3;
218             ch = str[pos];
219         }
220         break;
221     case 'M':
222     case 'm':
223         /* Mar */
224         if ((str[pos+1] == 'a' || str[pos+1] == 'A')
225             && (str[pos+2] == 'r' || str[pos+2] == 'R')) {
226             n = 2;
227             pos += 3;
228             ch = str[pos];
229         }
230         /* May */
231         else if ((str[pos+1] == 'a' || str[pos+1] == 'A')
232             && (str[pos+2] == 'y' || str[pos+2] == 'Y')) {
233             n = 4;
234             pos += 3;
235             ch = str[pos];
236         }
237         break;
238     case 'N':
239     case 'n':
240         /* Nov */
241         if ((str[pos+1] == 'o' || str[pos+1] == 'O')
242             && (str[pos+2] == 'v' || str[pos+2] == 'V')) {
243             n = 10;
244             pos += 3;
245             ch = str[pos];
246         }
247         break;
248     case 'O':
249     case 'o':
250         /* Oct */
251         if ((str[pos+1] == 'c' || str[pos+1] == 'c')
252             && (str[pos+2] == 't' || str[pos+2] == 'T')) {
253             n = 9;
254             pos += 3;
255             ch = str[pos];
256         }
257         break;
258     case 'S':
259     case 's':
260         /* Sep */
261         if ((str[pos+1] == 'e' || str[pos+1] == 'E')
262             && (str[pos+2] == 'p' || str[pos+2] == 'P')) {
263             n = 8;
264             pos += 3;
265             ch = str[pos];
266         }
267         break;
268     }
269     if (0 <= n && n <= 11) {
270         month = n;
271     }
272     else {
273         isValid = 0;
274     }
275     /*
276      * Year -- two or four digits (four preferred)
277      */
278     /* -- skip over non-digits */
279     while (ch && !('0' <= ch && ch <= '9')) {
280         if (ch == '(') {
281             pos += CommentLength(&str[pos]);
282         }
283         else {
284             ++pos;
285         }
286         ch = str[pos];
287     }
288     /* -- convert up to four digits */
289     n = -1;
290     if ('0' <= ch && ch <= '9') {
291         n = ch - '0';
292         ++pos;
293         ch = str[pos];
294     }
295     if ('0' <= ch && ch <= '9') {
296         n *= 10;
297         n += ch - '0';
298         ++pos;
299         ch = str[pos];
300     }
301     if ('0' <= ch && ch <= '9') {
302         n *= 10;
303         n += ch - '0';
304         ++pos;
305         ch = str[pos];
306     }
307     if ('0' <= ch && ch <= '9') {
308         n *= 10;
309         n += ch - '0';
310         ++pos;
311         ch = str[pos];
312     }
313     if (n != -1) {
314         year = (n < 1900) ? n+1900 : n;
315     }
316     else {
317         isValid = 0;
318     }
319     /*
320      * Hour -- two digits
321      */
322     /* -- skip over non-digits */
323     while (ch && !('0' <= ch && ch <= '9')) {
324         if (ch == '(') {
325             pos += CommentLength(&str[pos]);
326         }
327         else {
328             ++pos;
329         }
330         ch = str[pos];
331     }
332     /* -- convert next one or two digits */
333     n = -1;
334     if ('0' <= ch && ch <= '9') {
335         n = ch - '0';
336         ++pos;
337         ch = str[pos];
338     }
339     if ('0' <= ch && ch <= '9') {
340         n *= 10;
341         n += ch - '0';
342         ++pos;
343         ch = str[pos];
344     }
345     if (0 <= n && n <= 23) {
346         hour = n;
347     }
348     else {
349         isValid = 0;
350     }
351     /*
352      * Minute -- two digits
353      */
354     /* -- scan for ':' */
355     while (ch && ch != ':') {
356         if (ch == '(') {
357             pos += CommentLength(&str[pos]);
358         }
359         else {
360             ++pos;
361         }
362         ch = str[pos];
363     }
364     /* -- skip over non-digits */
365     while (ch && !('0' <= ch && ch <= '9')) {
366         if (ch == '(') {
367             pos += CommentLength(&str[pos]);
368         }
369         else {
370             ++pos;
371         }
372         ch = str[pos];
373     }
374     /* -- convert next one or two digits */
375     n = -1;
376     if ('0' <= ch && ch <= '9') {
377         n = ch - '0';
378         ++pos;
379         ch = str[pos];
380     }
381     if ('0' <= ch && ch <= '9') {
382         n *= 10;
383         n += ch - '0';
384         ++pos;
385         ch = str[pos];
386     }
387     if (0 <= n && n <= 59) {
388         minute = n;
389     }
390     else {
391         isValid = 0;
392     }
393     /*
394      * Second (optional) -- two digits
395      */
396     /* -- scan for ':' or start of time zone */
397     while (ch && !(ch == ':' || ch == '+' || ch == '-' || isalpha(ch))) {
398         if (ch == '(') {
399             pos += CommentLength(&str[pos]);
400         }
401         else {
402             ++pos;
403         }
404         ch = str[pos];
405     }
406     /* -- get the seconds, if it's there */
407     if (ch == ':') {
408         ++pos;
409         /* -- skip non-digits */
410         ch = str[pos];
411         while (ch && !('0' <= ch && ch <= '9')) {
412             if (ch == '(') {
413                 pos += CommentLength(&str[pos]);
414             }
415             else {
416                 ++pos;
417             }
418             ch = str[pos];
419         }
420         /* -- convert next one or two digits */
421         n = -1;
422         if ('0' <= ch && ch <= '9') {
423             n = ch - '0';
424             ++pos;
425             ch = str[pos];
426         }
427         if ('0' <= ch && ch <= '9') {
428             n *= 10;
429             n += ch - '0';
430             ++pos;
431             ch = str[pos];
432         }
433         if (0 <= n && n <= 59) {
434             second = n;
435         }
436         else {
437             isValid = 0;
438         }
439         /* -- scan for start of time zone */
440         while (ch && !(ch == '+' || ch == '-' || isalpha(ch))) {
441             if (ch == '(') {
442                 pos += CommentLength(&str[pos]);
443             }
444             else {
445                 ++pos;
446             }
447             ch = str[pos];
448         }
449     }
450     else /* if (ch != ':') */ {
451         second = 0;
452     }
453     /*
454      * Time zone
455      *
456      * Note: According to RFC-1123, the military time zones are specified
457      * incorrectly in RFC-822.  RFC-1123 then states that "military time
458      * zones in RFC-822 headers carry no information."
459      * Here, we follow the specification in RFC-822.  What else could we
460      * do?  Military time zones should *never* be used!
461      */
462     sgn = 1;
463     switch (ch) {
464     case '-':
465         sgn = -1;
466         /* fall through */
467     case '+':
468         ++pos;
469         /* -- skip non-digits */
470         ch = str[pos];
471         while (ch && !('0' <= ch && ch <= '9')) {
472             ++pos;
473             ch = str[pos];
474         }
475         /* -- convert next four digits */
476         numDigits = 0;
477         n = 0;
478         if ('0' <= ch && ch <= '9') {
479             n = (ch - '0')*600;
480             ++pos;
481             ch = str[pos];
482             ++numDigits;
483         }
484         if ('0' <= ch && ch <= '9') {
485             n += (ch - '0')*60;
486             ++pos;
487             ch = str[pos];
488             ++numDigits;
489         }
490         if ('0' <= ch && ch <= '9') {
491             n += (ch - '0')*10;
492             ++pos;
493             ch = str[pos];
494             ++numDigits;
495         }
496         if ('0' <= ch && ch <= '9') {
497             n += ch - '0';
498             ++numDigits;
499         }
500         if (numDigits == 4) {
501             zone = sgn*n;
502         }
503         else {
504             isValid = 0;
505         }
506         break;
507     case 'U':
508     case 'u':
509         if (str[pos+1] == 'T' || str[pos+1] == 't') {
510             zone = 0;
511         }
512         else {
513             /* Military time zone */
514             zone = 480;
515         }
516         break;
517     case 'G':
518     case 'g':
519         if ((str[pos+1] == 'M' || str[pos+1] == 'm')
520             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
521             zone = 0;
522         }
523         else {
524             /* Military time zone */
525             zone = -420;
526         }
527         break;
528     case 'E':
529     case 'e':
530         if ((str[pos+1] == 'S' || str[pos+1] == 's')
531             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
532             zone = -300;
533         }
534         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
535             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
536             zone = -240;
537         }
538         else {
539             /* Military time zone */
540             zone = -300;
541         }
542         break;
543     case 'C':
544     case 'c':
545         if ((str[pos+1] == 'S' || str[pos+1] == 's')
546             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
547             zone = -360;
548         }
549         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
550             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
551             zone = -300;
552         }
553         else {
554             /* Military time zone */
555             zone = -180;
556         }
557         break;
558     case 'M':
559     case 'm':
560         if ((str[pos+1] == 'S' || str[pos+1] == 's')
561             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
562             zone = -420;
563         }
564         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
565             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
566             zone = -360;
567         }
568         else {
569             /* Military time zone */
570             zone = -720;
571         }
572         break;
573     case 'P':
574     case 'p':
575         if ((str[pos+1] == 'S' || str[pos+1] == 's')
576             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
577             zone = -480;
578         }
579         else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
580             && (str[pos+2] == 'T' || str[pos+2] == 't')) {
581             zone = -420;
582         }
583         else {
584             /* Military time zone */
585             zone = 180;
586         }
587         break;
588     case 'Z':
589         /* Military time zone */
590         zone = 0;
591         break;
592     default:
593         /* Military time zone */
594         if ('A' <= ch && ch <= 'I') {
595             zone = 'A' - 1 - ch;
596         }
597         else if ('K' <= ch && ch <= 'M') {
598             zone = 'A' - ch;
599         }
600         else if ('N' <= ch && ch <= 'Y') {
601             zone = ch - 'N' + 1;
602         }
603         else {
604             isValid = 0;
605         }
606         break;
607     }
608     if (isValid) {
609         if (tms) {
610             tms->tm_year = year - 1900;
611             tms->tm_mon  = month;
612             tms->tm_mday = day;
613             tms->tm_hour = hour;
614             tms->tm_min  = minute;
615             tms->tm_sec  = second;
616         }
617         if (z) {
618             *z = zone;
619         }
620     }
621     else {
622         if (tms) {
623             tms->tm_year = 70;
624             tms->tm_mon  = 0;
625             tms->tm_mday = 1;
626             tms->tm_hour = 0;
627             tms->tm_min  = 0;
628             tms->tm_sec  = 0;
629         }
630         if (z) {
631             *z = 0;
632         }
633     }
634     return isValid ? 0 : -1;
635 }
636 
637 
638 #ifdef DW_TESTING_DATEPARSER
639 
640 #include <stdio.h>
641 #include <stdlib.h>
642 #include <limits.h>
643 
644 const char* testStr[] = {
645     ""
646 };
647 
648 const char* wdays[] = {
649     "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
650 };
651 
652 const char* months[] = {
653     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
654     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
655 };
656 
main()657 int main()
658 {
659     struct tm *ptms, tms1, tms2;
660     time_t tt;
661     int i, zone1, zone2;
662     char buf[100], sgn;
663 
664     /* try a bunch of random dates */
665     srand(100);
666     for (i=0; i < 1000; ++i) {
667         tt = rand()*((double)0x7fffffff/RAND_MAX);
668         zone1 = (rand()%49 - 24)*30;
669         ptms = gmtime(&tt);
670         tms1 = *ptms;
671         sgn = (zone1 >= 0) ? '+' : '-';
672         sprintf(buf, "%s, %2d %s %d %d%d:%d%d:%d%d %c%d%d%d%d",
673             wdays[tms1.tm_wday], tms1.tm_mday, months[tms1.tm_mon],
674             tms1.tm_year+1900,
675             tms1.tm_hour/10, tms1.tm_hour%10,
676             tms1.tm_min/10, tms1.tm_min%10,
677             tms1.tm_sec/10, tms1.tm_sec%10,
678             sgn, abs(zone1)/60/10, abs(zone1)/60%10,
679             abs(zone1)%60/10, abs(zone1)%60%10);
680         ParseRfc822Date(buf, &tms2, &zone2);
681         if (tms1.tm_year != tms2.tm_year) {
682             fprintf(stderr, "Bad year\n");
683         }
684         if (tms1.tm_mon != tms2.tm_mon) {
685             fprintf(stderr, "Bad month\n");
686         }
687         if (tms1.tm_mday != tms2.tm_mday) {
688             fprintf(stderr, "Bad day\n");
689         }
690         if (tms1.tm_hour != tms2.tm_hour) {
691             fprintf(stderr, "Bad hour\n");
692         }
693         if (tms1.tm_min != tms2.tm_min) {
694             fprintf(stderr, "Bad minute\n");
695         }
696         if (tms1.tm_sec != tms2.tm_sec) {
697             fprintf(stderr, "Bad second\n");
698         }
699         if (zone1 != zone2) {
700             fprintf(stderr, "Bad zone\n");
701         }
702     }
703     return 0;
704 }
705 
706 #endif
707