1 /* datetime.c -- functions for manipulating RFC 5545 date-time values
2  *
3  * This code is Copyright (c) 2014, by the authors of nmh.
4  * See the COPYRIGHT file in the root directory of the nmh
5  * distribution for complete copyright information.
6  */
7 
8 #include "h/mh.h"
9 #include "h/icalendar.h"
10 #include <h/fmt_scan.h>
11 #include "h/tws.h"
12 #include "h/utils.h"
13 #include "unquote.h"
14 
15 /*
16  * This doesn't try to support all of the myriad date-time formats
17  * allowed by RFC 5545.  It is only used for viewing date-times,
18  * so that shouldn't be a problem:  if a particular format can't
19  * be handled by this code, just present it to the user in its
20  * original form.
21  *
22  * And, this assumes a valid iCalendar input file.  E.g, it
23  * doesn't check that each BEGIN has a matching END and vice
24  * versa.  That should be done in the parser, though it currently
25  * isn't.
26  */
27 
28 typedef struct tzparams {
29     /* Pointers to values in parse tree.
30      * TZOFFSETFROM is used to calculate the absolute time at which
31      * the transition to a given observance takes place.
32      * TZOFFSETTO is the timezone offset from UTC.  Both are in HHmm
33      * format. */
34     char *offsetfrom, *offsetto;
35     const char *dtstart;
36     const char *rrule;
37 
38     /* This is only used to make sure that timezone applies.  And not
39        always, because if the timezone DTSTART is before the epoch, we
40        don't try to compare to it. */
41     time_t start_dt; /* in seconds since epoch */
42 } tzparams;
43 
44 struct tzdesc {
45     char *tzid;
46 
47     /* The following are translations of the pieces of RRULE and DTSTART
48        into seconds from beginning of year. */
49     tzparams standard_params;
50     tzparams daylight_params;
51 
52     struct tzdesc *next;
53 };
54 
55 /*
56  * Parse a datetime of the form YYYYMMDDThhmmss and a string
57  * representation of the timezone in units of [+-]hhmm and load the
58  * struct tws.
59  */
60 static int
parse_datetime(const char * datetime,const char * zone,int dst,struct tws * tws)61 parse_datetime (const char *datetime, const char *zone, int dst,
62                 struct tws *tws) {
63     char utc_indicator;
64     int form_1 = 0;
65     int items_matched;
66 
67     memset(tws, 0, sizeof *tws);
68     items_matched =
69         sscanf (datetime, "%4d%2d%2dT%2d%2d%2d%c",
70                 &tws->tw_year, &tws->tw_mon, &tws->tw_mday,
71                 &tws->tw_hour, &tws->tw_min, &tws->tw_sec,
72                 &utc_indicator);
73     tws->tw_flags = TW_NULL;
74 
75     if (items_matched == 7) {
76         /* The 'Z' must be capital according to RFC 5545 Sec. 3.3.5. */
77         if (utc_indicator != 'Z') {
78             inform("%s has invalid timezone indicator of 0x%x",
79                     datetime, utc_indicator);
80             return NOTOK;
81         }
82     } else if (zone == NULL) {
83         form_1 = 1;
84     }
85 
86     /* items_matched of 3 is for, e.g., 20151230.  Assume that means
87        the entire day.  The time fields of the tws struct were
88        initialized to 0 by the memset() above. */
89     if (items_matched >= 6  ||  items_matched == 3) {
90         int offset = atoi (zone ? zone : "0");
91 
92         /* struct tws defines tw_mon over [0, 11]. */
93         --tws->tw_mon;
94 
95         /* Fill out rest of tws, i.e., its tw_wday and tw_flags. */
96         set_dotw (tws);
97         /* set_dotw() sets TW_SIMP.  Replace that with TW_SEXP so that
98            dasctime() outputs the dotw before the date instead of after. */
99         tws->tw_flags &= ~TW_SDAY;
100         tws->tw_flags |= TW_SEXP;
101 
102         /* For the call to dmktime():
103            - don't need tw_yday
104            - tw_clock must be 0 on entry, and is set by dmktime()
105            - the only flag in tw_flags used is TW_DST
106          */
107         tws->tw_yday = tws->tw_clock = 0;
108         tws->tw_zone = 60 * (offset / 100)  +  offset % 100;
109         if (dst) {
110             tws->tw_zone -= 60;  /* per dlocaltime() */
111             tws->tw_flags |= TW_DST;
112         }
113         /* dmktime() just sets tws->tw_clock. */
114         (void) dmktime (tws);
115 
116         if (! form_1) {
117             /* Set TW_SZEXP so that dasctime outputs timezone, except
118                with local time (Form #1). */
119             tws->tw_flags |= TW_SZEXP;
120 
121             /* Convert UTC time to time in local timezone.  However,
122                don't try for years before 1970 because dlocatime()
123                doesn't handle them well.  dlocaltime() will succeed if
124                tws->tw_clock is nonzero. */
125             if (tws->tw_year >= 1970  &&  tws->tw_clock > 0) {
126                 const int was_dst = tws->tw_flags & TW_DST;
127 
128                 *tws = *dlocaltime (&tws->tw_clock);
129                 if (was_dst  &&  ! (tws->tw_flags & TW_DST)) {
130                     /* dlocaltime() changed the DST flag from 1 to 0,
131                        which means the time is in the hour (assumed to
132                        be one hour) that is lost in the transition to
133                        DST.  So per RFC 5545 Sec. 3.3.5, "the
134                        DATE-TIME value is interpreted using the UTC
135                        offset before the gap in local times."  In
136                        other words, add an hour to it.
137                        No adjustment is necessary for the transition
138                        from DST to standard time, because dasctime()
139                        shows the first occurrence of the time. */
140                     tws->tw_clock += 3600;
141                     *tws = *dlocaltime (&tws->tw_clock);
142                 }
143             }
144         }
145 
146         return OK;
147     }
148 
149     return NOTOK;
150 }
151 
152 tzdesc_t
load_timezones(const contentline * clines)153 load_timezones (const contentline *clines) {
154     tzdesc_t timezones = NULL, timezone = NULL;
155     int in_vtimezone, in_standard, in_daylight;
156     tzparams *params = NULL;
157     const contentline *node;
158 
159     /* Interpret each VTIMEZONE section. */
160     in_vtimezone = in_standard = in_daylight = 0;
161     for (node = clines; node; node = node->next) {
162         /* node->name will be NULL if the line was "deleted". */
163         if (! node->name) { continue; }
164 
165         if (in_daylight  ||  in_standard) {
166             if (! strcasecmp ("END", node->name)  &&
167                 ((in_standard  &&  ! strcasecmp ("STANDARD", node->value))  ||
168                  (in_daylight  &&  ! strcasecmp ("DAYLIGHT", node->value)))) {
169                 struct tws tws;
170 
171                 if (in_standard) { in_standard = 0; }
172                 else if (in_daylight) { in_daylight = 0; }
173                 if (parse_datetime (params->dtstart, params->offsetfrom,
174                                     in_daylight,
175                                     &tws) == OK) {
176                     if (tws.tw_year >= 1970) {
177                         /* dmktime() falls apart for, e.g., the year 1601. */
178                         params->start_dt = tws.tw_clock;
179                     }
180                 } else {
181                     inform("failed to parse start time %s for %s",
182                             params->dtstart,
183                             in_standard ? "standard" : "daylight");
184                     return NULL;
185                 }
186                 params = NULL;
187             } else if (! strcasecmp ("DTSTART", node->name)) {
188                 /* Save DTSTART for use after getting TZOFFSETFROM. */
189                 params->dtstart = node->value;
190             } else if (! strcasecmp ("TZOFFSETFROM", node->name)) {
191                 params->offsetfrom = node->value;
192             } else if (! strcasecmp ("TZOFFSETTO", node->name)) {
193                 params->offsetto = node->value;
194             } else if (! strcasecmp ("RRULE", node->name)) {
195                 params->rrule = node->value;
196             }
197         } else if (in_vtimezone) {
198             if (! strcasecmp ("END", node->name)  &&
199                 ! strcasecmp ("VTIMEZONE", node->value)) {
200                 in_vtimezone = 0;
201             } else if (! strcasecmp ("BEGIN", node->name)  &&
202                 ! strcasecmp ("STANDARD", node->value)) {
203                 in_standard = 1;
204                 params = &timezone->standard_params;
205             } else if (! strcasecmp ("BEGIN", node->name)  &&
206                 ! strcasecmp ("DAYLIGHT", node->value)) {
207                 in_daylight = 1;
208                 params = &timezone->daylight_params;
209             } else if (! strcasecmp ("TZID", node->name)) {
210                 /* See comment below in format_datetime() about removing any enclosing quotes from a
211                    timezone identifier. */
212                 char *buf = mh_xmalloc(strlen(node->value) + 1);
213                 unquote_string(node->value, buf);
214                 timezone->tzid = buf;
215             }
216         } else {
217             if (! strcasecmp ("BEGIN", node->name)  &&
218                 ! strcasecmp ("VTIMEZONE", node->value)) {
219 
220                 in_vtimezone = 1;
221                 NEW0(timezone);
222                 if (timezones) {
223                     tzdesc_t t;
224 
225                     for (t = timezones; t && t->next; t = t->next) { continue; }
226                     /* The loop terminated at, not after, the last
227                        timezones node. */
228                     t->next = timezone;
229                 } else {
230                     timezones = timezone;
231                 }
232             }
233         }
234     }
235 
236     return timezones;
237 }
238 
239 void
free_timezones(tzdesc_t timezone)240 free_timezones (tzdesc_t timezone) {
241     tzdesc_t next;
242 
243     for ( ; timezone; timezone = next) {
244         free (timezone->tzid);
245         next = timezone->next;
246         free (timezone);
247     }
248 }
249 
250 /*
251  * Convert time to local timezone, accounting for daylight saving time:
252  * - Detect which type of datetime the node contains:
253  *     Form #1: DATE WITH LOCAL TIME
254  *     Form #2: DATE WITH UTC TIME
255  *     Form #3: DATE WITH LOCAL TIME AND TIME ZONE REFERENCE
256  * - Convert value to local time in seconds since epoch.
257  * - If there's a DST in the timezone, convert its start and end
258  *   date-times to local time in seconds, also.  Then determine
259  *   if the value is between them, and therefore DST.  Otherwise, it's
260  *   not.
261  * - Format the time value.
262  */
263 
264 /*
265  * Given a recurrence rule and year, calculate its time in seconds
266  * from 01 January UTC of the year.
267  */
268 time_t
rrule_clock(const char * rrule,const char * starttime,const char * zone,unsigned int year)269 rrule_clock (const char *rrule, const char *starttime, const char *zone,
270              unsigned int year) {
271     time_t clock = 0;
272 
273     if (nmh_strcasestr (rrule, "FREQ=YEARLY;INTERVAL=1")  ||
274         (nmh_strcasestr (rrule, "FREQ=YEARLY")  &&  nmh_strcasestr(rrule, "INTERVAL") == NULL)) {
275         struct tws *tws;
276         const char *cp;
277         int wday = -1, month = -1;
278         int specific_day = 1; /* BYDAY integer (prefix) */
279         char buf[32];
280         int day;
281 
282         if ((cp = nmh_strcasestr (rrule, "BYDAY="))) {
283             cp += 6;
284             /* BYDAY integers must be ASCII. */
285             if (*cp == '+') { ++cp; } /* +n specific day; don't support '-' */
286             else if (*cp == '-') { goto fail; }
287 
288             if (isdigit ((unsigned char) *cp)) { specific_day = *cp++ - 0x30; }
289 
290             if (! strncasecmp (cp, "SU", 2)) { wday = 0; }
291             else if (! strncasecmp (cp, "MO", 2)) { wday = 1; }
292             else if (! strncasecmp (cp, "TU", 2)) { wday = 2; }
293             else if (! strncasecmp (cp, "WE", 2)) { wday = 3; }
294             else if (! strncasecmp (cp, "TH", 2)) { wday = 4; }
295             else if (! strncasecmp (cp, "FR", 2)) { wday = 5; }
296             else if (! strncasecmp (cp, "SA", 2)) { wday = 6; }
297         }
298         if ((cp = nmh_strcasestr (rrule, "BYMONTH="))) {
299             month = atoi (cp + 8);
300         }
301 
302         for (day = 1; day <= 7; ++day) {
303             /* E.g, 11-01-2014 02:00:00-0400 */
304             snprintf (buf, sizeof buf, "%02d-%02d-%04u %.2s:%.2s:%.2s%s",
305                       month, day + 7 * (specific_day-1), year,
306                       starttime, starttime + 2, starttime + 4,
307                       zone ? zone : "0000");
308             if ((tws = dparsetime (buf))) {
309                 if (! (tws->tw_flags & (TW_SEXP|TW_SIMP))) { set_dotw (tws); }
310 
311                 if (tws->tw_wday == wday) {
312                     /* Found the day specified in the RRULE. */
313                     break;
314                 }
315             }
316         }
317 
318         if (day <= 7) {
319             clock = tws->tw_clock;
320         }
321     }
322 
323 fail:
324     if (clock == 0) {
325         inform("Unsupported RRULE format: %s, assume local timezone, continuing...",
326 	    rrule);
327     }
328 
329     return clock;
330 }
331 
332 char *
format_datetime(tzdesc_t timezones,const contentline * node)333 format_datetime (tzdesc_t timezones, const contentline *node) {
334     param_list *p;
335     char *dt_timezone = NULL;
336     int dst = 0;
337     struct tws tws[2]; /* [standard, daylight] */
338     tzdesc_t tz;
339     char *tp_std, *tp_dst, *tp_dt;
340 
341     /* Extract the timezone, if specified (RFC 5545 Sec. 3.3.5 Form #3). */
342     for (p = node->params; p && p->param_name; p = p->next) {
343         if (! strcasecmp (p->param_name, "TZID")  &&  p->values) {
344             /* Remove any enclosing quotes from the timezone identifier.  I don't believe that it's
345                legal for it to be quoted, according to RFC 5545 § 3.2.19:
346                    tzidparam  = "TZID" "=" [tzidprefix] paramtext
347                    tzidprefix = "/"
348                where paramtext includes SAFE-CHAR, which specifically excludes DQUOTE.  But we'll
349                be generous and strip quotes. */
350             char *buf = mh_xmalloc(strlen(p->values->value) + 1);
351             unquote_string(p->values->value, buf);
352             dt_timezone = buf;
353             break;
354         }
355     }
356 
357     if (! dt_timezone) {
358         /* Form #1: DATE WITH LOCAL TIME, i.e., no time zone, or
359            Form #2: DATE WITH UTC TIME */
360         if (parse_datetime (node->value, NULL, 0, &tws[0]) == OK) {
361             return strdup (dasctime (&tws[0], 0));
362         }
363         inform("unable to parse datetime %s", node->value);
364         return NULL;
365     }
366 
367     /*
368      * must be
369      * Form #3: DATE WITH LOCAL TIME AND TIME ZONE REFERENCE
370      */
371 
372     /* Find the corresponding tzdesc. */
373     for (tz = timezones; dt_timezone && tz; tz = tz->next) {
374         /* Property parameter values are case insensitive (RFC 5545
375            Sec. 2) and time zone identifiers are property parameters
376            (RFC 5545 Sec. 3.8.2.4), though it would seem odd to use
377            different case in the same file for identifiers that are
378            supposed to be the same. */
379         if (tz->tzid  &&  ! strcasecmp (dt_timezone, tz->tzid)) { break; }
380     }
381 
382     if (tz) {
383         free(dt_timezone);
384     } else {
385         inform("did not find VTIMEZONE section for %s", dt_timezone);
386         free(dt_timezone);
387         return NULL;
388     }
389 
390     /* Determine if it's Daylight Saving. */
391     tp_std = strchr (tz->standard_params.dtstart, 'T');
392     tp_dt = strchr (node->value, 'T');
393 
394     if (tz->daylight_params.dtstart) {
395         tp_dst = strchr (tz->daylight_params.dtstart, 'T');
396     } else {
397         /* No DAYLIGHT section. */
398         tp_dst = NULL;
399         dst = 0;
400     }
401 
402     if (tp_std  &&  tp_dt) {
403         time_t transition[2] = { 0, 0 }; /* [standard, daylight] */
404         time_t dt[2]; /* [standard, daylight] */
405         unsigned int year;
406         char buf[5];
407 
408         /* Datetime is form YYYYMMDDThhmmss.  Extract year. */
409         memcpy (buf, node->value, sizeof buf - 1);
410         buf[sizeof buf - 1] = '\0';
411         year = atoi (buf);
412 
413         if (tz->standard_params.rrule) {
414             /* +1 to skip the T before the time */
415             transition[0] =
416                 rrule_clock (tz->standard_params.rrule, tp_std + 1,
417                              tz->standard_params.offsetfrom, year);
418         }
419         if (tp_dst  &&  tz->daylight_params.rrule) {
420             /* +1 to skip the T before the time */
421             transition[1] =
422                 rrule_clock (tz->daylight_params.rrule, tp_dst + 1,
423                              tz->daylight_params.offsetfrom, year);
424         }
425 
426         if (transition[0] < transition[1]) {
427             inform("format_datetime() requires that daylight "
428                     "saving time transition precede standard time "
429                     "transition");
430             return NULL;
431         }
432 
433         if (parse_datetime (node->value, tz->standard_params.offsetto,
434                             0, &tws[0]) == OK) {
435             dt[0] = tws[0].tw_clock;
436         } else {
437             inform("unable to parse datetime %s", node->value);
438             return NULL;
439         }
440 
441         if (tp_dst) {
442             if (dt[0] < transition[1]) {
443                 dst = 0;
444             } else {
445                 if (parse_datetime (node->value,
446                                     tz->daylight_params.offsetto, 1,
447                                     &tws[1]) == OK) {
448                     dt[1] = tws[1].tw_clock;
449                 } else {
450                     inform("unable to parse datetime %s",
451                             node->value);
452                     return NULL;
453                 }
454 
455                 dst = dt[1] <= transition[0];
456             }
457         }
458 
459         if (dst) {
460             if (tz->daylight_params.start_dt > 0  &&
461                 dt[dst] < tz->daylight_params.start_dt) {
462                 inform("date-time of %s is before VTIMEZONE start "
463                         "of %s", node->value,
464                         tz->daylight_params.dtstart);
465                 return NULL;
466             }
467         } else {
468             if (tz->standard_params.start_dt > 0  &&
469                 dt[dst] < tz->standard_params.start_dt) {
470                 inform("date-time of %s is before VTIMEZONE start "
471                         "of %s", node->value,
472                         tz->standard_params.dtstart);
473                 return NULL;
474             }
475         }
476     } else {
477         if (! tp_std) {
478             inform("unsupported date-time format: %s",
479                     tz->standard_params.dtstart);
480             return NULL;
481         }
482         if (! tp_dt) {
483             inform("unsupported date-time format: %s", node->value);
484             return NULL;
485         }
486     }
487 
488     return strdup (dasctime (&tws[dst], 0));
489 }
490