1 /* _PDCLIB_tzload( char const *, struct _PDCLIB_timezone *, bool )
2 
3    This file is part of the Public Domain C Library (PDCLib).
4    Permission is granted to use, modify, and / or redistribute at will.
5 */
6 
7 #ifndef REGTEST
8 
9 #include "pdclib/_PDCLIB_tzcode.h"
10 
11 #include <errno.h>
12 #include <stdio.h>
13 #include <string.h>
14 
detzcode(const char * codep)15 static int_fast32_t detzcode( const char * codep )
16 {
17     int_fast32_t result;
18     int          i;
19     int_fast32_t one = 1;
20     int_fast32_t halfmaxval = one << ( 32 - 2 );
21     int_fast32_t maxval = halfmaxval - 1 + halfmaxval;
22     int_fast32_t minval = -1 - maxval;
23 
24     result = codep[ 0 ] & 0x7f;
25 
26     for ( i = 1; i < 4; ++i )
27     {
28         result = ( result << 8 ) | ( codep[ i ] & 0xff );
29     }
30 
31     if ( codep[ 0 ] & 0x80 )
32     {
33         /* Do two's-complement negation even on non-two's-complement machines.
34            If the result would be minval - 1, return minval.
35         */
36         result -= ! _PDCLIB_TWOS_COMPLEMENT && result != 0;
37         result += minval;
38     }
39 
40     return result;
41 }
42 
detzcode64(const char * codep)43 static int_fast64_t detzcode64( const char * codep )
44 {
45     uint_fast64_t result;
46     int           i;
47     int_fast64_t  one = 1;
48     int_fast64_t  halfmaxval = one << ( 64 - 2 );
49     int_fast64_t  maxval = halfmaxval - 1 + halfmaxval;
50     int_fast64_t  minval = - _PDCLIB_TWOS_COMPLEMENT - maxval;
51 
52     result = codep[ 0 ] & 0x7f;
53 
54     for ( i = 1; i < 8; ++i )
55     {
56         result = ( result << 8 ) | ( codep[ i ] & 0xff );
57     }
58 
59     if ( codep[ 0 ] & 0x80 )
60     {
61         /* Do two's-complement negation even on non-two's-complement machines.
62            If the result would be minval - 1, return minval.
63         */
64       result -= ! _PDCLIB_TWOS_COMPLEMENT && result != 0;
65       result += minval;
66     }
67 
68     return result;
69 }
70 
differ_by_repeat(const time_t t1,const time_t t0)71 static bool differ_by_repeat( const time_t t1, const time_t t0 )
72 {
73     if ( ( sizeof( time_t ) * _PDCLIB_CHAR_BIT ) - _PDCLIB_TYPE_SIGNED( time_t ) < SECSPERREPEAT_BITS )
74     {
75         return 0;
76     }
77 
78     return ( t1 - t0 ) == SECSPERREPEAT;
79 }
80 
typesequiv(const struct state * sp,int a,int b)81 static bool typesequiv( const struct state * sp, int a, int b )
82 {
83     bool result;
84 
85     if ( sp == NULL ||
86          a < 0 || a >= sp->typecnt ||
87          b < 0 || b >= sp->typecnt )
88     {
89         result = false;
90     }
91     else
92     {
93         const struct ttinfo *  ap = &sp->ttis[ a ];
94         const struct ttinfo *  bp = &sp->ttis[ b ];
95 
96         result = ( ap->utoff == bp->utoff &&
97                    ap->isdst == bp->isdst &&
98                    ap->ttisstd == bp->ttisstd &&
99                    ap->ttisut == bp->ttisut &&
100                    ( strcmp( &sp->chars[ ap->desigidx ], &sp->chars[ bp->desigidx ] ) == 0 )
101                  );
102     }
103 
104     return result;
105 }
106 
107 #define TZ_MAGIC "TZif"
108 
109 struct tzhead
110 {
111     char tzh_magic[ 4 ];       /* TZ_MAGIC */
112     char tzh_version[ 1 ];     /* '\0' or '2' or '3' as of 2013 */
113     char tzh_reserved[ 15 ];   /* reserved; must be zero */
114     char tzh_ttisutcnt[ 4 ];   /* coded number of trans. time flags */
115     char tzh_ttisstdcnt[ 4 ];  /* coded number of trans. time flags */
116     char tzh_leapcnt[ 4 ];     /* coded number of leap seconds */
117     char tzh_timecnt[ 4 ];     /* coded number of transition times */
118     char tzh_typecnt[ 4 ];     /* coded number of local time types */
119     char tzh_charcnt[ 4 ];     /* coded number of abbr. chars */
120 };
121 
122 /* Input buffer for data read from a compiled tz file.  */
123 union input_buffer
124 {
125     /* The first part of the buffer, interpreted as a header.  */
126     struct tzhead tzhead;
127 
128     /* The entire buffer.  */
129     char buf[ 2 * sizeof ( struct tzhead ) + 2 * sizeof ( struct state ) + 4 * TZ_MAX_TIMES ];
130 };
131 
132 /* _PDCLIB_TZDIR with a trailing '/' rather than a trailing '\0'.  */
133 static char const tzdirslash[ sizeof _PDCLIB_TZDIR + 1 ] = _PDCLIB_TZDIR "/";
134 
135 /* Local storage needed for 'tzloadbody'.  */
136 union local_storage
137 {
138     /* The results of analyzing the file's contents after it is opened.  */
139     struct file_analysis
140     {
141         /* The input buffer.  */
142         union input_buffer u;
143 
144         /* A temporary state used for parsing a TZ string in the file.  */
145         struct state st;
146     } u;
147 
148     /* The file name to be opened.  */
149     char fullname[ BIGGEST ( sizeof ( struct file_analysis ), sizeof tzdirslash + 1024 ) ];
150 };
151 
leapcorr(struct state const * sp,time_t t)152 static int_fast64_t leapcorr( struct state const * sp, time_t t )
153 {
154     struct lsinfo const * lp;
155     int i;
156 
157     i = sp->leapcnt;
158 
159     while ( --i >= 0 )
160     {
161         lp = &sp->lsis[ i ];
162 
163         if ( t >= lp->trans )
164         {
165             return lp->corr;
166         }
167     }
168 
169     return 0;
170 }
171 
172 /* Load tz data from the file named NAME into *SP.  Read extended
173    format if DOEXTEND.  Use *LSP for temporary storage.  Return 0 on
174    success, an errno value on failure.  */
tzloadbody(char const * name,struct state * sp,bool doextend,union local_storage * lsp)175 static int tzloadbody( char const * name, struct state * sp, bool doextend, union local_storage * lsp )
176 {
177     int    i;
178     FILE * fid;
179     int    stored;
180     size_t nread;
181     bool   doaccess;
182     union  input_buffer * up = &lsp->u.u;
183     size_t tzheadsize = sizeof ( struct tzhead );
184 
185     sp->goback = sp->goahead = false;
186 
187     if ( ! name )
188     {
189         name = _PDCLIB_TZDEFAULT;
190 
191         if ( ! name )
192         {
193             return _PDCLIB_EINVAL;
194         }
195     }
196 
197     if ( name[ 0 ] == ':' )
198     {
199         ++name;
200     }
201 
202     doaccess = name[ 0 ] == '/';
203 
204     if ( ! doaccess )
205     {
206         char const * dot;
207         size_t namelen = strlen( name );
208 
209         if ( sizeof lsp->fullname - sizeof tzdirslash <= namelen )
210         {
211             return _PDCLIB_ENAMETOOLONG;
212         }
213 
214         /* Create a string "TZDIR/NAME".  Using sprintf here
215            would pull in stdio (and would fail if the
216            resulting string length exceeded INT_MAX!).
217         */
218         memcpy( lsp->fullname, tzdirslash, sizeof tzdirslash );
219         strcpy( lsp->fullname + sizeof tzdirslash, name );
220 
221         /* Set doaccess if NAME contains a ".." file name
222            component, as such a name could read a file outside
223            the TZDIR virtual subtree.
224         */
225         for ( dot = name; ( dot = strchr( dot, '.' ) ); ++dot )
226         {
227             if ( ( dot == name || dot[ -1 ] == '/' ) && dot[ 1 ] == '.' && ( dot[ 2 ] == '/' || ! dot[ 2 ] ) )
228             {
229                 doaccess = true;
230                 break;
231             }
232         }
233 
234         name = lsp->fullname;
235     }
236 
237     fid = fopen( name, "rb" );
238 
239     if ( fid == NULL )
240     {
241         return errno;
242     }
243 
244     nread = fread( up->buf, 1, sizeof up->buf, fid );
245 
246     if ( nread < tzheadsize )
247     {
248         int err = errno;
249 
250         if ( ! ferror( fid ) )
251         {
252             err = _PDCLIB_EINVAL;
253         }
254 
255         fclose( fid );
256         return err;
257     }
258 
259     if ( fclose( fid ) == EOF )
260     {
261         return errno;
262     }
263 
264     for ( stored = 4; stored <= 8; stored *= 2 )
265     {
266         int_fast32_t ttisstdcnt = detzcode( up->tzhead.tzh_ttisstdcnt );
267         int_fast32_t ttisutcnt = detzcode( up->tzhead.tzh_ttisutcnt );
268         int_fast64_t prevtr = 0;
269         int_fast32_t prevcorr = 0;
270         int_fast32_t leapcnt = detzcode( up->tzhead.tzh_leapcnt );
271         int_fast32_t timecnt = detzcode( up->tzhead.tzh_timecnt );
272         int_fast32_t typecnt = detzcode( up->tzhead.tzh_typecnt );
273         int_fast32_t charcnt = detzcode( up->tzhead.tzh_charcnt );
274         char const *p = up->buf + tzheadsize;
275         /* Although tzfile(5) currently requires typecnt to be nonzero,
276            support future formats that may allow zero typecnt
277            in files that have a TZ string and no transitions.
278         */
279         if ( ! ( 0 <= leapcnt && leapcnt < TZ_MAX_LEAPS
280                && 0 <= typecnt && typecnt < TZ_MAX_TYPES
281                && 0 <= timecnt && timecnt < TZ_MAX_TIMES
282                && 0 <= charcnt && charcnt < TZ_MAX_CHARS
283                && ( ttisstdcnt == typecnt || ttisstdcnt == 0 )
284                && ( ttisutcnt == typecnt || ttisutcnt == 0 ) ) )
285         {
286             return _PDCLIB_EINVAL;
287         }
288 
289         if ( nread
290             < ( tzheadsize       /* struct tzhead */
291               + timecnt * stored   /* ats */
292               + timecnt        /* types */
293               + typecnt * 6        /* ttinfos */
294               + charcnt        /* chars */
295               + leapcnt * ( stored + 4 ) /* lsinfos */
296               + ttisstdcnt     /* ttisstds */
297               + ttisutcnt ) )        /* ttisuts */
298         {
299             return _PDCLIB_EINVAL;
300         }
301 
302         sp->leapcnt = leapcnt;
303         sp->timecnt = timecnt;
304         sp->typecnt = typecnt;
305         sp->charcnt = charcnt;
306 
307         /* Read transitions, discarding those out of time_t range.
308            But pretend the last transition before _PDCLIB_TIME_MIN
309            occurred at _PDCLIB_TIME_MIN.
310         */
311         timecnt = 0;
312 
313         for ( i = 0; i < sp->timecnt; ++i )
314         {
315             int_fast64_t at = stored == 4 ? detzcode( p ) : detzcode64( p );
316             sp->types[ i ] = at <= _PDCLIB_TIME_MAX;
317 
318             if ( sp->types[ i ] )
319             {
320                 time_t attime = ( ( _PDCLIB_TYPE_SIGNED( time_t ) ? at < _PDCLIB_TIME_MIN : at < 0 ) ? _PDCLIB_TIME_MIN : at );
321 
322                 if ( timecnt && attime <= sp->ats[ timecnt - 1 ] )
323                 {
324                     if ( attime < sp->ats[ timecnt - 1 ] )
325                     {
326                         return _PDCLIB_EINVAL;
327                     }
328 
329                     sp->types[ i - 1 ] = 0;
330                     timecnt--;
331                 }
332 
333                 sp->ats[ timecnt++ ] = attime;
334             }
335 
336             p += stored;
337         }
338 
339         timecnt = 0;
340 
341         for ( i = 0; i < sp->timecnt; ++i )
342         {
343             unsigned char typ = *p++;
344 
345             if ( sp->typecnt <= typ )
346             {
347                 return _PDCLIB_EINVAL;
348             }
349 
350             if ( sp->types[ i ] )
351             {
352                 sp->types[ timecnt++ ] = typ;
353             }
354         }
355 
356         sp->timecnt = timecnt;
357 
358         for ( i = 0; i < sp->typecnt; ++i )
359         {
360             struct ttinfo * ttisp;
361             unsigned char isdst, desigidx;
362 
363             ttisp = &sp->ttis[ i ];
364             ttisp->utoff = detzcode( p );
365             p += 4;
366             isdst = *p++;
367 
368             if ( ! ( isdst < 2 ) )
369             {
370                 return _PDCLIB_EINVAL;
371             }
372 
373             ttisp->isdst = isdst;
374             desigidx = *p++;
375 
376             if ( ! ( desigidx < sp->charcnt ) )
377             {
378                 return _PDCLIB_EINVAL;
379             }
380 
381             ttisp->desigidx = desigidx;
382         }
383 
384         for ( i = 0; i < sp->charcnt; ++i )
385         {
386             sp->chars[ i ] = *p++;
387         }
388 
389         sp->chars[ i ] = '\0';    /* ensure '\0' at end */
390 
391         /* Read leap seconds, discarding those out of time_t range.  */
392         leapcnt = 0;
393 
394         for ( i = 0; i < sp->leapcnt; ++i )
395         {
396             int_fast64_t tr = stored == 4 ? detzcode( p ) : detzcode64( p );
397             int_fast32_t corr = detzcode( p + stored );
398             p += stored + 4;
399 
400             /* Leap seconds cannot occur before the Epoch.  */
401             if ( tr < 0 )
402             {
403                 return _PDCLIB_EINVAL;
404             }
405 
406             if ( tr <= _PDCLIB_TIME_MAX )
407             {
408                 /* Leap seconds cannot occur more than once per UTC month,
409                    and UTC months are at least 28 days long (minus 1
410                    second for a negative leap second).  Each leap second's
411                    correction must differ from the previous one's by 1
412                    second.
413                 */
414                 if ( tr - prevtr < 28 * SECSPERDAY - 1 || ( corr != prevcorr - 1 && corr != prevcorr + 1 ) )
415                 {
416                     return _PDCLIB_EINVAL;
417                 }
418 
419                 sp->lsis[ leapcnt ].trans = prevtr = tr;
420                 sp->lsis[ leapcnt ].corr = prevcorr = corr;
421                 ++leapcnt;
422             }
423         }
424 
425         sp->leapcnt = leapcnt;
426 
427         for ( i = 0; i < sp->typecnt; ++i )
428         {
429             struct ttinfo * ttisp;
430 
431             ttisp = &sp->ttis[ i ];
432 
433             if ( ttisstdcnt == 0 )
434             {
435                 ttisp->ttisstd = false;
436             }
437             else
438             {
439                 if ( *p != true && *p != false )
440                 {
441                     return _PDCLIB_EINVAL;
442                 }
443 
444                 ttisp->ttisstd = *p++;
445             }
446         }
447 
448         for ( i = 0; i < sp->typecnt; ++i )
449         {
450             struct ttinfo * ttisp;
451 
452             ttisp = &sp->ttis[ i ];
453 
454             if ( ttisutcnt == 0 )
455             {
456                 ttisp->ttisut = false;
457             }
458             else
459             {
460                 if ( *p != true && *p != false )
461                 {
462                     return _PDCLIB_EINVAL;
463                 }
464 
465                 ttisp->ttisut = *p++;
466             }
467         }
468 
469         /* If this is an old file, we're done. */
470         if ( up->tzhead.tzh_version[ 0 ] == '\0' )
471         {
472             break;
473         }
474 
475         nread -= p - up->buf;
476         memmove( up->buf, p, nread );
477     }
478 
479     if ( doextend && nread > 2 && up->buf[ 0 ] == '\n' && up->buf[ nread - 1 ] == '\n' && sp->typecnt + 2 <= TZ_MAX_TYPES )
480     {
481         struct state    *ts = &lsp->u.st;
482 
483         up->buf[ nread - 1 ] = '\0';
484 
485         if ( _PDCLIB_tzparse( &up->buf[ 1 ], ts, false ) )
486         {
487             /* Attempt to reuse existing abbreviations.
488                Without this, America/Anchorage would be right on
489                the edge after 2037 when TZ_MAX_CHARS is 50, as
490                sp->charcnt equals 40 (for LMT AST AWT APT AHST
491                AHDT YST AKDT AKST) and ts->charcnt equals 10
492                (for AKST AKDT).  Reusing means sp->charcnt can
493                stay 40 in this example.  */
494             int gotabbr = 0;
495             int charcnt = sp->charcnt;
496 
497             for ( i = 0; i < ts->typecnt; ++i )
498             {
499                 char * tsabbr = ts->chars + ts->ttis[ i ].desigidx;
500                 int j;
501 
502                 for ( j = 0; j < charcnt; ++j )
503                 {
504                     if ( strcmp( sp->chars + j, tsabbr ) == 0 )
505                     {
506                         ts->ttis[ i ].desigidx = j;
507                         ++gotabbr;
508                         break;
509                     }
510                 }
511 
512                 if ( ! ( j < charcnt ) )
513                 {
514                     int tsabbrlen = strlen( tsabbr );
515 
516                     if ( j + tsabbrlen < TZ_MAX_CHARS )
517                     {
518                         strcpy( sp->chars + j, tsabbr );
519                         charcnt = j + tsabbrlen + 1;
520                         ts->ttis[ i ].desigidx = j;
521                         ++gotabbr;
522                     }
523                 }
524             }
525 
526             if ( gotabbr == ts->typecnt )
527             {
528                 sp->charcnt = charcnt;
529 
530                 /* Ignore any trailing, no-op transitions generated
531                    by zic as they don't help here and can run afoul
532                    of bugs in zic 2016j or earlier.  */
533                 while ( 1 < sp->timecnt && ( sp->types[ sp->timecnt - 1 ] == sp->types[ sp->timecnt - 2 ] ) )
534                 {
535                     sp->timecnt--;
536                 }
537 
538                 for ( i = 0; i < ts->timecnt; ++i )
539                 {
540                     if ( sp->timecnt == 0 || ( sp->ats[ sp->timecnt - 1 ] < ts->ats[ i ] + leapcorr( sp, ts->ats[ i ] ) ) )
541                     {
542                         break;
543                     }
544                 }
545 
546                 while ( i < ts->timecnt && sp->timecnt < TZ_MAX_TIMES )
547                 {
548                     sp->ats[ sp->timecnt ] = ts->ats[ i ] + leapcorr( sp, ts->ats[ i ] );
549                     sp->types[ sp->timecnt ] = ( sp->typecnt + ts->types[ i ] );
550                     sp->timecnt++;
551                     ++i;
552                 }
553 
554                 for ( i = 0; i < ts->typecnt; ++i )
555                 {
556                     sp->ttis[ sp->typecnt++ ] = ts->ttis[ i ];
557                 }
558             }
559         }
560     }
561 
562     if ( sp->typecnt == 0 )
563     {
564         return _PDCLIB_EINVAL;
565     }
566 
567     if ( sp->timecnt > 1 )
568     {
569         for ( i = 1; i < sp->timecnt; ++i )
570         {
571             if ( typesequiv( sp, sp->types[ i ], sp->types[ 0 ] ) && differ_by_repeat( sp->ats[ i ], sp->ats[ 0 ] ) )
572             {
573                 sp->goback = true;
574                 break;
575             }
576         }
577 
578         for ( i = sp->timecnt - 2; i >= 0; --i )
579         {
580             if ( typesequiv( sp, sp->types[ sp->timecnt - 1 ], sp->types[ i ] ) && differ_by_repeat( sp->ats[ sp->timecnt - 1 ], sp->ats[ i ] ) )
581             {
582                 sp->goahead = true;
583                 break;
584             }
585         }
586     }
587 
588     /* Infer sp->defaulttype from the data.  Although this default
589        type is always zero for data from recent tzdb releases,
590        things are trickier for data from tzdb 2018e or earlier.
591 
592        The first set of heuristics work around bugs in 32-bit data
593        generated by tzdb 2013c or earlier.  The workaround is for
594        zones like Australia/Macquarie where timestamps before the
595        first transition have a time type that is not the earliest
596        standard-time type.  See:
597        https://mm.icann.org/pipermail/tz/2013-May/019368.html
598     */
599     /* If type 0 is unused in transitions, it's the type to use for early times. */
600     for ( i = 0; i < sp->timecnt; ++i )
601     {
602         if ( sp->types[ i ] == 0 )
603         {
604             break;
605         }
606     }
607 
608     i = i < sp->timecnt ? -1 : 0;
609 
610     /* Absent the above,
611        if there are transition times
612        and the first transition is to a daylight time
613        find the standard type less than and closest to
614        the type of the first transition.
615     */
616     if ( i < 0 && sp->timecnt > 0 && sp->ttis[ sp->types[ 0 ] ].isdst )
617     {
618         i = sp->types[ 0 ];
619 
620         while ( --i >= 0 )
621         {
622             if ( ! sp->ttis[ i ].isdst )
623             {
624                 break;
625             }
626         }
627     }
628 
629     /* The next heuristics are for data generated by tzdb 2018e or
630        earlier, for zones like EST5EDT where the first transition
631        is to DST.
632     */
633     /* If no result yet, find the first standard type.
634        If there is none, punt to type zero.
635     */
636     if ( i < 0 )
637     {
638         i = 0;
639 
640         while ( sp->ttis[ i ].isdst )
641         {
642             if ( ++i >= sp->typecnt )
643             {
644                 i = 0;
645                 break;
646             }
647         }
648     }
649 
650     /* A simple 'sp->defaulttype = 0;' would suffice here if we
651        didn't have to worry about 2018e-or-earlier data.  Even
652        simpler would be to remove the defaulttype member and just
653        use 0 in its place.
654     */
655     sp->defaulttype = i;
656 
657     return 0;
658 }
659 
660 /* Load tz data from the file named NAME into *SP.  Read extended
661    format if DOEXTEND.  Return 0 on success, an errno value on failure.
662 */
_PDCLIB_tzload(char const * name,struct state * sp,bool doextend)663 int _PDCLIB_tzload( char const * name, struct state * sp, bool doextend )
664 {
665     union local_storage ls;
666     return tzloadbody( name, sp, doextend, &ls );
667 }
668 
669 #endif
670 
671 #ifdef TEST
672 
673 #include "_PDCLIB_test.h"
674 
main(void)675 int main( void )
676 {
677 #ifndef REGTEST
678 #endif
679 
680     return TEST_RESULTS;
681 }
682 
683 #endif
684