1 #include "Python.h"
2 #include "structmember.h"
3 
4 #include <ctype.h>
5 #include <stddef.h>
6 #include <stdint.h>
7 
8 #include "datetime.h"
9 
10 // Imports
11 static PyObject *io_open = NULL;
12 static PyObject *_tzpath_find_tzfile = NULL;
13 static PyObject *_common_mod = NULL;
14 
15 typedef struct TransitionRuleType TransitionRuleType;
16 typedef struct StrongCacheNode StrongCacheNode;
17 
18 typedef struct {
19     PyObject *utcoff;
20     PyObject *dstoff;
21     PyObject *tzname;
22     long utcoff_seconds;
23 } _ttinfo;
24 
25 typedef struct {
26     _ttinfo std;
27     _ttinfo dst;
28     int dst_diff;
29     TransitionRuleType *start;
30     TransitionRuleType *end;
31     unsigned char std_only;
32 } _tzrule;
33 
34 typedef struct {
35     PyDateTime_TZInfo base;
36     PyObject *key;
37     PyObject *file_repr;
38     PyObject *weakreflist;
39     size_t num_transitions;
40     size_t num_ttinfos;
41     int64_t *trans_list_utc;
42     int64_t *trans_list_wall[2];
43     _ttinfo **trans_ttinfos;  // References to the ttinfo for each transition
44     _ttinfo *ttinfo_before;
45     _tzrule tzrule_after;
46     _ttinfo *_ttinfos;  // Unique array of ttinfos for ease of deallocation
47     unsigned char fixed_offset;
48     unsigned char source;
49 } PyZoneInfo_ZoneInfo;
50 
51 struct TransitionRuleType {
52     int64_t (*year_to_timestamp)(TransitionRuleType *, int);
53 };
54 
55 typedef struct {
56     TransitionRuleType base;
57     uint8_t month;
58     uint8_t week;
59     uint8_t day;
60     int8_t hour;
61     int8_t minute;
62     int8_t second;
63 } CalendarRule;
64 
65 typedef struct {
66     TransitionRuleType base;
67     uint8_t julian;
68     unsigned int day;
69     int8_t hour;
70     int8_t minute;
71     int8_t second;
72 } DayRule;
73 
74 struct StrongCacheNode {
75     StrongCacheNode *next;
76     StrongCacheNode *prev;
77     PyObject *key;
78     PyObject *zone;
79 };
80 
81 static PyTypeObject PyZoneInfo_ZoneInfoType;
82 
83 // Globals
84 static PyObject *TIMEDELTA_CACHE = NULL;
85 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
86 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
87 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
88 
89 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
90 
91 // Constants
92 static const int EPOCHORDINAL = 719163;
93 static int DAYS_IN_MONTH[] = {
94     -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
95 };
96 
97 static int DAYS_BEFORE_MONTH[] = {
98     -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
99 };
100 
101 static const int SOURCE_NOCACHE = 0;
102 static const int SOURCE_CACHE = 1;
103 static const int SOURCE_FILE = 2;
104 
105 // Forward declarations
106 static int
107 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
108 static void
109 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
110                  unsigned char *isdsts, size_t num_transitions,
111                  size_t num_ttinfos);
112 static int
113 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
114             int64_t *trans_local[2], size_t num_ttinfos,
115             size_t num_transitions);
116 
117 static int
118 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
119 
120 static Py_ssize_t
121 parse_abbr(const char *const p, PyObject **abbr);
122 static Py_ssize_t
123 parse_tz_delta(const char *const p, long *total_seconds);
124 static Py_ssize_t
125 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
126                       int8_t *second);
127 static Py_ssize_t
128 parse_transition_rule(const char *const p, TransitionRuleType **out);
129 
130 static _ttinfo *
131 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
132 static _ttinfo *
133 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
134                            unsigned char *fold);
135 
136 static int
137 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
138 static void
139 xdecref_ttinfo(_ttinfo *ttinfo);
140 static int
141 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
142 
143 static int
144 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
145              long dst_offset, TransitionRuleType *start,
146              TransitionRuleType *end, _tzrule *out);
147 static void
148 free_tzrule(_tzrule *tzrule);
149 
150 static PyObject *
151 load_timedelta(long seconds);
152 
153 static int
154 get_local_timestamp(PyObject *dt, int64_t *local_ts);
155 static _ttinfo *
156 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
157 
158 static int
159 ymd_to_ord(int y, int m, int d);
160 static int
161 is_leap_year(int year);
162 
163 static size_t
164 _bisect(const int64_t value, const int64_t *arr, size_t size);
165 
166 static int
167 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
168 static void
169 clear_strong_cache(const PyTypeObject *const type);
170 static void
171 update_strong_cache(const PyTypeObject *const type, PyObject *key,
172                     PyObject *zone);
173 static PyObject *
174 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
175 
176 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)177 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
178 {
179     PyObject *file_obj = NULL;
180     PyObject *file_path = NULL;
181 
182     file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
183     if (file_path == NULL) {
184         return NULL;
185     }
186     else if (file_path == Py_None) {
187         file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
188         if (file_obj == NULL) {
189             Py_DECREF(file_path);
190             return NULL;
191         }
192     }
193 
194     PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
195     if (self == NULL) {
196         goto error;
197     }
198 
199     if (file_obj == NULL) {
200         file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
201         if (file_obj == NULL) {
202             goto error;
203         }
204     }
205 
206     if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
207         goto error;
208     }
209 
210     PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
211     Py_DECREF(file_obj);
212     file_obj = NULL;
213     if (rv == NULL) {
214         goto error;
215     }
216     Py_DECREF(rv);
217 
218     ((PyZoneInfo_ZoneInfo *)self)->key = key;
219     Py_INCREF(key);
220 
221     goto cleanup;
222 error:
223     Py_XDECREF(self);
224     self = NULL;
225 cleanup:
226     if (file_obj != NULL) {
227         PyObject *exc, *val, *tb;
228         PyErr_Fetch(&exc, &val, &tb);
229         PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
230         _PyErr_ChainExceptions(exc, val, tb);
231         if (tmp == NULL) {
232             Py_CLEAR(self);
233         }
234         Py_XDECREF(tmp);
235         Py_DECREF(file_obj);
236     }
237     Py_DECREF(file_path);
238     return self;
239 }
240 
241 static PyObject *
get_weak_cache(PyTypeObject * type)242 get_weak_cache(PyTypeObject *type)
243 {
244     if (type == &PyZoneInfo_ZoneInfoType) {
245         return ZONEINFO_WEAK_CACHE;
246     }
247     else {
248         PyObject *cache =
249             PyObject_GetAttrString((PyObject *)type, "_weak_cache");
250         // We are assuming that the type lives at least as long as the function
251         // that calls get_weak_cache, and that it holds a reference to the
252         // cache, so we'll return a "borrowed reference".
253         Py_XDECREF(cache);
254         return cache;
255     }
256 }
257 
258 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)259 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
260 {
261     PyObject *key = NULL;
262     static char *kwlist[] = {"key", NULL};
263     if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
264         return NULL;
265     }
266 
267     PyObject *instance = zone_from_strong_cache(type, key);
268     if (instance != NULL || PyErr_Occurred()) {
269         return instance;
270     }
271 
272     PyObject *weak_cache = get_weak_cache(type);
273     instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
274     if (instance == NULL) {
275         return NULL;
276     }
277 
278     if (instance == Py_None) {
279         Py_DECREF(instance);
280         PyObject *tmp = zoneinfo_new_instance(type, key);
281         if (tmp == NULL) {
282             return NULL;
283         }
284 
285         instance =
286             PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
287         Py_DECREF(tmp);
288         if (instance == NULL) {
289             return NULL;
290         }
291         ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
292     }
293 
294     update_strong_cache(type, key, instance);
295     return instance;
296 }
297 
298 static void
zoneinfo_dealloc(PyObject * obj_self)299 zoneinfo_dealloc(PyObject *obj_self)
300 {
301     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
302 
303     if (self->weakreflist != NULL) {
304         PyObject_ClearWeakRefs(obj_self);
305     }
306 
307     if (self->trans_list_utc != NULL) {
308         PyMem_Free(self->trans_list_utc);
309     }
310 
311     for (size_t i = 0; i < 2; i++) {
312         if (self->trans_list_wall[i] != NULL) {
313             PyMem_Free(self->trans_list_wall[i]);
314         }
315     }
316 
317     if (self->_ttinfos != NULL) {
318         for (size_t i = 0; i < self->num_ttinfos; ++i) {
319             xdecref_ttinfo(&(self->_ttinfos[i]));
320         }
321         PyMem_Free(self->_ttinfos);
322     }
323 
324     if (self->trans_ttinfos != NULL) {
325         PyMem_Free(self->trans_ttinfos);
326     }
327 
328     free_tzrule(&(self->tzrule_after));
329 
330     Py_XDECREF(self->key);
331     Py_XDECREF(self->file_repr);
332 
333     Py_TYPE(self)->tp_free((PyObject *)self);
334 }
335 
336 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)337 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
338 {
339     PyObject *file_obj = NULL;
340     PyObject *file_repr = NULL;
341     PyObject *key = Py_None;
342     PyZoneInfo_ZoneInfo *self = NULL;
343 
344     static char *kwlist[] = {"", "key", NULL};
345     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
346                                      &key)) {
347         return NULL;
348     }
349 
350     PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
351     self = (PyZoneInfo_ZoneInfo *)obj_self;
352     if (self == NULL) {
353         return NULL;
354     }
355 
356     file_repr = PyUnicode_FromFormat("%R", file_obj);
357     if (file_repr == NULL) {
358         goto error;
359     }
360 
361     if (load_data(self, file_obj)) {
362         goto error;
363     }
364 
365     self->source = SOURCE_FILE;
366     self->file_repr = file_repr;
367     self->key = key;
368     Py_INCREF(key);
369 
370     return obj_self;
371 error:
372     Py_XDECREF(file_repr);
373     Py_XDECREF(self);
374     return NULL;
375 }
376 
377 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)378 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
379 {
380     static char *kwlist[] = {"key", NULL};
381     PyObject *key = NULL;
382     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
383         return NULL;
384     }
385 
386     PyObject *out = zoneinfo_new_instance(cls, key);
387     if (out != NULL) {
388         ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
389     }
390 
391     return out;
392 }
393 
394 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)395 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
396 {
397     PyObject *only_keys = NULL;
398     static char *kwlist[] = {"only_keys", NULL};
399 
400     if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
401                                       &only_keys))) {
402         return NULL;
403     }
404 
405     PyTypeObject *type = (PyTypeObject *)cls;
406     PyObject *weak_cache = get_weak_cache(type);
407 
408     if (only_keys == NULL || only_keys == Py_None) {
409         PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
410         if (rv != NULL) {
411             Py_DECREF(rv);
412         }
413 
414         clear_strong_cache(type);
415     }
416     else {
417         PyObject *item = NULL;
418         PyObject *pop = PyUnicode_FromString("pop");
419         if (pop == NULL) {
420             return NULL;
421         }
422 
423         PyObject *iter = PyObject_GetIter(only_keys);
424         if (iter == NULL) {
425             Py_DECREF(pop);
426             return NULL;
427         }
428 
429         while ((item = PyIter_Next(iter))) {
430             // Remove from strong cache
431             if (eject_from_strong_cache(type, item) < 0) {
432                 Py_DECREF(item);
433                 break;
434             }
435 
436             // Remove from weak cache
437             PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
438                                                        Py_None, NULL);
439 
440             Py_DECREF(item);
441             if (tmp == NULL) {
442                 break;
443             }
444             Py_DECREF(tmp);
445         }
446         Py_DECREF(iter);
447         Py_DECREF(pop);
448     }
449 
450     if (PyErr_Occurred()) {
451         return NULL;
452     }
453 
454     Py_RETURN_NONE;
455 }
456 
457 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)458 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
459 {
460     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
461     if (tti == NULL) {
462         return NULL;
463     }
464     Py_INCREF(tti->utcoff);
465     return tti->utcoff;
466 }
467 
468 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)469 zoneinfo_dst(PyObject *self, PyObject *dt)
470 {
471     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
472     if (tti == NULL) {
473         return NULL;
474     }
475     Py_INCREF(tti->dstoff);
476     return tti->dstoff;
477 }
478 
479 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)480 zoneinfo_tzname(PyObject *self, PyObject *dt)
481 {
482     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
483     if (tti == NULL) {
484         return NULL;
485     }
486     Py_INCREF(tti->tzname);
487     return tti->tzname;
488 }
489 
490 #define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo)
491 #define GET_DT_TZINFO(p) \
492     (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None)
493 
494 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)495 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
496 {
497     if (!PyDateTime_Check(dt)) {
498         PyErr_SetString(PyExc_TypeError,
499                         "fromutc: argument must be a datetime");
500         return NULL;
501     }
502     if (GET_DT_TZINFO(dt) != obj_self) {
503         PyErr_SetString(PyExc_ValueError,
504                         "fromutc: dt.tzinfo "
505                         "is not self");
506         return NULL;
507     }
508 
509     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
510 
511     int64_t timestamp;
512     if (get_local_timestamp(dt, &timestamp)) {
513         return NULL;
514     }
515     size_t num_trans = self->num_transitions;
516 
517     _ttinfo *tti = NULL;
518     unsigned char fold = 0;
519 
520     if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
521         tti = self->ttinfo_before;
522     }
523     else if (num_trans == 0 ||
524              timestamp > self->trans_list_utc[num_trans - 1]) {
525         tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
526                                          PyDateTime_GET_YEAR(dt), &fold);
527 
528         // Immediately after the last manual transition, the fold/gap is
529         // between self->trans_ttinfos[num_transitions - 1] and whatever
530         // ttinfo applies immediately after the last transition, not between
531         // the STD and DST rules in the tzrule_after, so we may need to
532         // adjust the fold value.
533         if (num_trans) {
534             _ttinfo *tti_prev = NULL;
535             if (num_trans == 1) {
536                 tti_prev = self->ttinfo_before;
537             }
538             else {
539                 tti_prev = self->trans_ttinfos[num_trans - 2];
540             }
541             int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
542             if (diff > 0 &&
543                 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
544                 fold = 1;
545             }
546         }
547     }
548     else {
549         size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
550         _ttinfo *tti_prev = NULL;
551 
552         if (idx >= 2) {
553             tti_prev = self->trans_ttinfos[idx - 2];
554             tti = self->trans_ttinfos[idx - 1];
555         }
556         else {
557             tti_prev = self->ttinfo_before;
558             tti = self->trans_ttinfos[0];
559         }
560 
561         // Detect fold
562         int64_t shift =
563             (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
564         if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
565             fold = 1;
566         }
567     }
568 
569     PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
570     if (tmp == NULL) {
571         return NULL;
572     }
573 
574     if (fold) {
575         if (PyDateTime_CheckExact(tmp)) {
576             ((PyDateTime_DateTime *)tmp)->fold = 1;
577             dt = tmp;
578         }
579         else {
580             PyObject *replace = PyObject_GetAttrString(tmp, "replace");
581             PyObject *args = PyTuple_New(0);
582             PyObject *kwargs = PyDict_New();
583 
584             Py_DECREF(tmp);
585             if (args == NULL || kwargs == NULL || replace == NULL) {
586                 Py_XDECREF(args);
587                 Py_XDECREF(kwargs);
588                 Py_XDECREF(replace);
589                 return NULL;
590             }
591 
592             dt = NULL;
593             if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) {
594                 dt = PyObject_Call(replace, args, kwargs);
595             }
596 
597             Py_DECREF(args);
598             Py_DECREF(kwargs);
599             Py_DECREF(replace);
600 
601             if (dt == NULL) {
602                 return NULL;
603             }
604         }
605     }
606     else {
607         dt = tmp;
608     }
609     return dt;
610 }
611 
612 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)613 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
614 {
615     PyObject *rv = NULL;
616     const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
617     if (!(self->key == Py_None)) {
618         rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
619     }
620     else {
621         assert(PyUnicode_Check(self->file_repr));
622         rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
623                                   self->file_repr);
624     }
625 
626     return rv;
627 }
628 
629 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)630 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
631 {
632     if (!(self->key == Py_None)) {
633         Py_INCREF(self->key);
634         return self->key;
635     }
636     else {
637         return zoneinfo_repr(self);
638     }
639 }
640 
641 /* Pickles the ZoneInfo object by key and source.
642  *
643  * ZoneInfo objects are pickled by reference to the TZif file that they came
644  * from, which means that the exact transitions may be different or the file
645  * may not un-pickle if the data has changed on disk in the interim.
646  *
647  * It is necessary to include a bit indicating whether or not the object
648  * was constructed from the cache, because from-cache objects will hit the
649  * unpickling process's cache, whereas no-cache objects will bypass it.
650  *
651  * Objects constructed from ZoneInfo.from_file cannot be pickled.
652  */
653 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)654 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
655 {
656     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
657     if (self->source == SOURCE_FILE) {
658         // Objects constructed from files cannot be pickled.
659         PyObject *pickle = PyImport_ImportModule("pickle");
660         if (pickle == NULL) {
661             return NULL;
662         }
663 
664         PyObject *pickle_error =
665             PyObject_GetAttrString(pickle, "PicklingError");
666         Py_DECREF(pickle);
667         if (pickle_error == NULL) {
668             return NULL;
669         }
670 
671         PyErr_Format(pickle_error,
672                      "Cannot pickle a ZoneInfo file from a file stream.");
673         Py_DECREF(pickle_error);
674         return NULL;
675     }
676 
677     unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
678     PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
679 
680     if (constructor == NULL) {
681         return NULL;
682     }
683 
684     PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
685     Py_DECREF(constructor);
686     return rv;
687 }
688 
689 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)690 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
691 {
692     PyObject *key;
693     unsigned char from_cache;
694     if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
695         return NULL;
696     }
697 
698     if (from_cache) {
699         PyObject *val_args = Py_BuildValue("(O)", key);
700         if (val_args == NULL) {
701             return NULL;
702         }
703 
704         PyObject *rv = zoneinfo_new(cls, val_args, NULL);
705 
706         Py_DECREF(val_args);
707         return rv;
708     }
709     else {
710         return zoneinfo_new_instance(cls, key);
711     }
712 }
713 
714 /* It is relatively expensive to construct new timedelta objects, and in most
715  * cases we're looking at a relatively small number of timedeltas, such as
716  * integer number of hours, etc. We will keep a cache so that we construct
717  * a minimal number of these.
718  *
719  * Possibly this should be replaced with an LRU cache so that it's not possible
720  * for the memory usage to explode from this, but in order for this to be a
721  * serious problem, one would need to deliberately craft a malicious time zone
722  * file with many distinct offsets. As of tzdb 2019c, loading every single zone
723  * fills the cache with ~450 timedeltas for a total size of ~12kB.
724  *
725  * This returns a new reference to the timedelta.
726  */
727 static PyObject *
load_timedelta(long seconds)728 load_timedelta(long seconds)
729 {
730     PyObject *rv = NULL;
731     PyObject *pyoffset = PyLong_FromLong(seconds);
732     if (pyoffset == NULL) {
733         return NULL;
734     }
735     int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset);
736     if (contains == -1) {
737         goto error;
738     }
739 
740     if (!contains) {
741         PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
742             0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
743 
744         if (tmp == NULL) {
745             goto error;
746         }
747 
748         rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
749         Py_DECREF(tmp);
750     }
751     else {
752         rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset);
753     }
754 
755     Py_DECREF(pyoffset);
756     Py_INCREF(rv);
757     return rv;
758 error:
759     Py_DECREF(pyoffset);
760     return NULL;
761 }
762 
763 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
764  * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
765  * initialized _ttinfo objects.
766  */
767 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)768 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
769 {
770     out->utcoff = NULL;
771     out->dstoff = NULL;
772     out->tzname = NULL;
773 
774     out->utcoff_seconds = utcoffset;
775     out->utcoff = load_timedelta(utcoffset);
776     if (out->utcoff == NULL) {
777         return -1;
778     }
779 
780     out->dstoff = load_timedelta(dstoffset);
781     if (out->dstoff == NULL) {
782         return -1;
783     }
784 
785     out->tzname = tzname;
786     Py_INCREF(tzname);
787 
788     return 0;
789 }
790 
791 /* Decrease reference count on any non-NULL members of a _ttinfo  */
792 static void
xdecref_ttinfo(_ttinfo * ttinfo)793 xdecref_ttinfo(_ttinfo *ttinfo)
794 {
795     if (ttinfo != NULL) {
796         Py_XDECREF(ttinfo->utcoff);
797         Py_XDECREF(ttinfo->dstoff);
798         Py_XDECREF(ttinfo->tzname);
799     }
800 }
801 
802 /* Equality function for _ttinfo. */
803 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)804 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
805 {
806     int rv;
807     if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
808         1) {
809         goto end;
810     }
811 
812     if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
813         1) {
814         goto end;
815     }
816 
817     if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
818         1) {
819         goto end;
820     }
821 end:
822     return rv;
823 }
824 
825 /* Given a file-like object, this populates a ZoneInfo object
826  *
827  * The current version calls into a Python function to read the data from
828  * file into Python objects, and this translates those Python objects into
829  * C values and calculates derived values (e.g. dstoff) in C.
830  *
831  * This returns 0 on success and -1 on failure.
832  *
833  * The function will never return while `self` is partially initialized —
834  * the object only needs to be freed / deallocated if this succeeds.
835  */
836 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)837 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
838 {
839     PyObject *data_tuple = NULL;
840 
841     long *utcoff = NULL;
842     long *dstoff = NULL;
843     size_t *trans_idx = NULL;
844     unsigned char *isdst = NULL;
845 
846     self->trans_list_utc = NULL;
847     self->trans_list_wall[0] = NULL;
848     self->trans_list_wall[1] = NULL;
849     self->trans_ttinfos = NULL;
850     self->_ttinfos = NULL;
851     self->file_repr = NULL;
852 
853     size_t ttinfos_allocated = 0;
854 
855     data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
856 
857     if (data_tuple == NULL) {
858         goto error;
859     }
860 
861     if (!PyTuple_CheckExact(data_tuple)) {
862         PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
863                      data_tuple);
864         goto error;
865     }
866 
867     // Unpack the data tuple
868     PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
869     if (trans_idx_list == NULL) {
870         goto error;
871     }
872 
873     PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
874     if (trans_utc == NULL) {
875         goto error;
876     }
877 
878     PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
879     if (utcoff_list == NULL) {
880         goto error;
881     }
882 
883     PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
884     if (isdst_list == NULL) {
885         goto error;
886     }
887 
888     PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
889     if (abbr == NULL) {
890         goto error;
891     }
892 
893     PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
894     if (tz_str == NULL) {
895         goto error;
896     }
897 
898     // Load the relevant sizes
899     Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
900     if (num_transitions < 0) {
901         goto error;
902     }
903 
904     Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
905     if (num_ttinfos < 0) {
906         goto error;
907     }
908 
909     self->num_transitions = (size_t)num_transitions;
910     self->num_ttinfos = (size_t)num_ttinfos;
911 
912     // Load the transition indices and list
913     self->trans_list_utc =
914         PyMem_Malloc(self->num_transitions * sizeof(int64_t));
915     if (self->trans_list_utc == NULL) {
916         goto error;
917     }
918     trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
919     if (trans_idx == NULL) {
920         goto error;
921     }
922 
923     for (size_t i = 0; i < self->num_transitions; ++i) {
924         PyObject *num = PyTuple_GetItem(trans_utc, i);
925         if (num == NULL) {
926             goto error;
927         }
928         self->trans_list_utc[i] = PyLong_AsLongLong(num);
929         if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
930             goto error;
931         }
932 
933         num = PyTuple_GetItem(trans_idx_list, i);
934         if (num == NULL) {
935             goto error;
936         }
937 
938         Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
939         if (cur_trans_idx == -1) {
940             goto error;
941         }
942 
943         trans_idx[i] = (size_t)cur_trans_idx;
944         if (trans_idx[i] > self->num_ttinfos) {
945             PyErr_Format(
946                 PyExc_ValueError,
947                 "Invalid transition index found while reading TZif: %zd",
948                 cur_trans_idx);
949 
950             goto error;
951         }
952     }
953 
954     // Load UTC offsets and isdst (size num_ttinfos)
955     utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
956     isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
957 
958     if (utcoff == NULL || isdst == NULL) {
959         goto error;
960     }
961     for (size_t i = 0; i < self->num_ttinfos; ++i) {
962         PyObject *num = PyTuple_GetItem(utcoff_list, i);
963         if (num == NULL) {
964             goto error;
965         }
966 
967         utcoff[i] = PyLong_AsLong(num);
968         if (utcoff[i] == -1 && PyErr_Occurred()) {
969             goto error;
970         }
971 
972         num = PyTuple_GetItem(isdst_list, i);
973         if (num == NULL) {
974             goto error;
975         }
976 
977         int isdst_with_error = PyObject_IsTrue(num);
978         if (isdst_with_error == -1) {
979             goto error;
980         }
981         else {
982             isdst[i] = (unsigned char)isdst_with_error;
983         }
984     }
985 
986     dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
987     if (dstoff == NULL) {
988         goto error;
989     }
990 
991     // Derive dstoff and trans_list_wall from the information we've loaded
992     utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
993                      self->num_ttinfos);
994 
995     if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
996                     self->trans_list_wall, self->num_ttinfos,
997                     self->num_transitions)) {
998         goto error;
999     }
1000 
1001     // Build _ttinfo objects from utcoff, dstoff and abbr
1002     self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1003     if (self->_ttinfos == NULL) {
1004         goto error;
1005     }
1006     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1007         PyObject *tzname = PyTuple_GetItem(abbr, i);
1008         if (tzname == NULL) {
1009             goto error;
1010         }
1011 
1012         ttinfos_allocated++;
1013         if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1014             goto error;
1015         }
1016     }
1017 
1018     // Build our mapping from transition to the ttinfo that applies
1019     self->trans_ttinfos =
1020         PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1021     if (self->trans_ttinfos == NULL) {
1022         goto error;
1023     }
1024     for (size_t i = 0; i < self->num_transitions; ++i) {
1025         size_t ttinfo_idx = trans_idx[i];
1026         assert(ttinfo_idx < self->num_ttinfos);
1027         self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1028     }
1029 
1030     // Set ttinfo_before to the first non-DST transition
1031     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1032         if (!isdst[i]) {
1033             self->ttinfo_before = &(self->_ttinfos[i]);
1034             break;
1035         }
1036     }
1037 
1038     // If there are only DST ttinfos, pick the first one, if there are no
1039     // ttinfos at all, set ttinfo_before to NULL
1040     if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1041         self->ttinfo_before = &(self->_ttinfos[0]);
1042     }
1043 
1044     if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1045         if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1046             goto error;
1047         }
1048     }
1049     else {
1050         if (!self->num_ttinfos) {
1051             PyErr_Format(PyExc_ValueError, "No time zone information found.");
1052             goto error;
1053         }
1054 
1055         size_t idx;
1056         if (!self->num_transitions) {
1057             idx = self->num_ttinfos - 1;
1058         }
1059         else {
1060             idx = trans_idx[self->num_transitions - 1];
1061         }
1062 
1063         _ttinfo *tti = &(self->_ttinfos[idx]);
1064         build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1065                      &(self->tzrule_after));
1066 
1067         // We've abused the build_tzrule constructor to construct an STD-only
1068         // rule mimicking whatever ttinfo we've picked up, but it's possible
1069         // that the one we've picked up is a DST zone, so we need to make sure
1070         // that the dstoff is set correctly in that case.
1071         if (PyObject_IsTrue(tti->dstoff)) {
1072             _ttinfo *tti_after = &(self->tzrule_after.std);
1073             Py_DECREF(tti_after->dstoff);
1074             tti_after->dstoff = tti->dstoff;
1075             Py_INCREF(tti_after->dstoff);
1076         }
1077     }
1078 
1079     // Determine if this is a "fixed offset" zone, meaning that the output of
1080     // the utcoffset, dst and tzname functions does not depend on the specific
1081     // datetime passed.
1082     //
1083     // We make three simplifying assumptions here:
1084     //
1085     // 1. If tzrule_after is not std_only, it has transitions that might occur
1086     //    (it is possible to construct TZ strings that specify STD and DST but
1087     //    no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1088     // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1089     //    represent different offsets.
1090     // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1091     //    fixed-offset zone with extra _ttinfos defined may appear to *not* be
1092     //    a fixed offset zone).
1093     //
1094     // Violations to these assumptions would be fairly exotic, and exotic
1095     // zones should almost certainly not be used with datetime.time (the
1096     // only thing that would be affected by this).
1097     if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1098         self->fixed_offset = 0;
1099     }
1100     else if (self->num_ttinfos == 0) {
1101         self->fixed_offset = 1;
1102     }
1103     else {
1104         int constant_offset =
1105             ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1106         if (constant_offset < 0) {
1107             goto error;
1108         }
1109         else {
1110             self->fixed_offset = constant_offset;
1111         }
1112     }
1113 
1114     int rv = 0;
1115     goto cleanup;
1116 error:
1117     // These resources only need to be freed if we have failed, if we succeed
1118     // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1119     // method to free the relevant resources.
1120     if (self->trans_list_utc != NULL) {
1121         PyMem_Free(self->trans_list_utc);
1122         self->trans_list_utc = NULL;
1123     }
1124 
1125     for (size_t i = 0; i < 2; ++i) {
1126         if (self->trans_list_wall[i] != NULL) {
1127             PyMem_Free(self->trans_list_wall[i]);
1128             self->trans_list_wall[i] = NULL;
1129         }
1130     }
1131 
1132     if (self->_ttinfos != NULL) {
1133         for (size_t i = 0; i < ttinfos_allocated; ++i) {
1134             xdecref_ttinfo(&(self->_ttinfos[i]));
1135         }
1136         PyMem_Free(self->_ttinfos);
1137         self->_ttinfos = NULL;
1138     }
1139 
1140     if (self->trans_ttinfos != NULL) {
1141         PyMem_Free(self->trans_ttinfos);
1142         self->trans_ttinfos = NULL;
1143     }
1144 
1145     rv = -1;
1146 cleanup:
1147     Py_XDECREF(data_tuple);
1148 
1149     if (utcoff != NULL) {
1150         PyMem_Free(utcoff);
1151     }
1152 
1153     if (dstoff != NULL) {
1154         PyMem_Free(dstoff);
1155     }
1156 
1157     if (isdst != NULL) {
1158         PyMem_Free(isdst);
1159     }
1160 
1161     if (trans_idx != NULL) {
1162         PyMem_Free(trans_idx);
1163     }
1164 
1165     return rv;
1166 }
1167 
1168 /* Function to calculate the local timestamp of a transition from the year. */
1169 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1170 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1171 {
1172     CalendarRule *self = (CalendarRule *)base_self;
1173 
1174     // We want (year, month, day of month); we have year and month, but we
1175     // need to turn (week, day-of-week) into day-of-month
1176     //
1177     // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1178     // Week 5 represents the last occurrence of day `day`, so we need to know
1179     // the first weekday of the month and the number of days in the month.
1180     int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1181     uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1182     if (self->month == 2 && is_leap_year(year)) {
1183         days_in_month += 1;
1184     }
1185 
1186     // This equation seems magical, so I'll break it down:
1187     // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1188     //    + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1189     //    because this math is mod 7
1190     // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1191     //    numbers so that -1 % 7 = 6).
1192     // 3. Add 1 because month days are a 1-based index.
1193     int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1194     if (month_day < 0) {
1195         month_day += 7;
1196     }
1197     month_day += 1;
1198 
1199     // Now use a 0-based index version of `week` to calculate the w-th
1200     // occurrence of `day`
1201     month_day += ((int8_t)(self->week) - 1) * 7;
1202 
1203     // month_day will only be > days_in_month if w was 5, and `w` means "last
1204     // occurrence of `d`", so now we just check if we over-shot the end of the
1205     // month and if so knock off 1 week.
1206     if (month_day > days_in_month) {
1207         month_day -= 7;
1208     }
1209 
1210     int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1211     return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1212             (int64_t)(self->minute * 60) + (int64_t)(self->second));
1213 }
1214 
1215 /* Constructor for CalendarRule. */
1216 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1217 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1218                  int8_t minute, int8_t second, CalendarRule *out)
1219 {
1220     // These bounds come from the POSIX standard, which describes an Mm.n.d
1221     // rule as:
1222     //
1223     //   The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1224     //   5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1225     //   may occur in either the fourth or the fifth week). Week 1 is the first
1226     //   week in which the d'th day occurs. Day zero is Sunday.
1227     if (month <= 0 || month > 12) {
1228         PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1229         return -1;
1230     }
1231 
1232     if (week <= 0 || week > 5) {
1233         PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1234         return -1;
1235     }
1236 
1237     // If the 'day' parameter type is changed to a signed type,
1238     // "day < 0" check must be added.
1239     if (/* day < 0 || */ day > 6) {
1240         PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1241         return -1;
1242     }
1243 
1244     TransitionRuleType base = {&calendarrule_year_to_timestamp};
1245 
1246     CalendarRule new_offset = {
1247         .base = base,
1248         .month = month,
1249         .week = week,
1250         .day = day,
1251         .hour = hour,
1252         .minute = minute,
1253         .second = second,
1254     };
1255 
1256     *out = new_offset;
1257     return 0;
1258 }
1259 
1260 /* Function to calculate the local timestamp of a transition from the year.
1261  *
1262  * This translates the day of the year into a local timestamp — either a
1263  * 1-based Julian day, not including leap days, or the 0-based year-day,
1264  * including leap days.
1265  * */
1266 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1267 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1268 {
1269     // The function signature requires a TransitionRuleType pointer, but this
1270     // function is only applicable to DayRule* objects.
1271     DayRule *self = (DayRule *)base_self;
1272 
1273     // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1274     // to know the number of days since 1970-01-01, so we must subtract off
1275     // the equivalent of ymd_to_ord(1970, 1, 1).
1276     //
1277     // We subtract off an additional 1 day to account for January 1st (we want
1278     // the number of full days *before* the date of the transition - partial
1279     // days are accounted for in the hour, minute and second portions.
1280     int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1281 
1282     // The Julian day specification skips over February 29th in leap years,
1283     // from the POSIX standard:
1284     //
1285     //   Leap days shall not be counted. That is, in all years-including leap
1286     //   years-February 28 is day 59 and March 1 is day 60. It is impossible to
1287     //   refer explicitly to the occasional February 29.
1288     //
1289     // This is actually more useful than you'd think — if you want a rule that
1290     // always transitions on a given calendar day (other than February 29th),
1291     // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1292     // always refers to December 31st.
1293     unsigned int day = self->day;
1294     if (self->julian && day >= 59 && is_leap_year(year)) {
1295         day += 1;
1296     }
1297 
1298     return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1299            (self->minute * 60) + self->second;
1300 }
1301 
1302 /* Constructor for DayRule. */
1303 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1304 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1305             int8_t second, DayRule *out)
1306 {
1307     // The POSIX standard specifies that Julian days must be in the range (1 <=
1308     // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1309     // be in the range (0 <= n <= 365).
1310     if (day < julian || day > 365) {
1311         PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1312                      julian, day);
1313         return -1;
1314     }
1315 
1316     TransitionRuleType base = {
1317         &dayrule_year_to_timestamp,
1318     };
1319 
1320     DayRule tmp = {
1321         .base = base,
1322         .julian = julian,
1323         .day = day,
1324         .hour = hour,
1325         .minute = minute,
1326         .second = second,
1327     };
1328 
1329     *out = tmp;
1330 
1331     return 0;
1332 }
1333 
1334 /* Calculate the start and end rules for a _tzrule in the given year. */
1335 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1336 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1337 {
1338     assert(rule->start != NULL);
1339     assert(rule->end != NULL);
1340     *start = rule->start->year_to_timestamp(rule->start, year);
1341     *end = rule->end->year_to_timestamp(rule->end, year);
1342 }
1343 
1344 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1345  *
1346  * This takes a local timestamp and fold for disambiguation purposes; the year
1347  * could technically be calculated from the timestamp, but given that the
1348  * callers of this function already have the year information accessible from
1349  * the datetime struct, it is taken as an additional parameter to reduce
1350  * unnecessary calculation.
1351  * */
1352 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1353 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1354 {
1355     if (rule->std_only) {
1356         return &(rule->std);
1357     }
1358 
1359     int64_t start, end;
1360     uint8_t isdst;
1361 
1362     tzrule_transitions(rule, year, &start, &end);
1363 
1364     // With fold = 0, the period (denominated in local time) with the smaller
1365     // offset starts at the end of the gap and ends at the end of the fold;
1366     // with fold = 1, it runs from the start of the gap to the beginning of the
1367     // fold.
1368     //
1369     // So in order to determine the DST boundaries we need to know both the
1370     // fold and whether DST is positive or negative (rare), and it turns out
1371     // that this boils down to fold XOR is_positive.
1372     if (fold == (rule->dst_diff >= 0)) {
1373         end -= rule->dst_diff;
1374     }
1375     else {
1376         start += rule->dst_diff;
1377     }
1378 
1379     if (start < end) {
1380         isdst = (ts >= start) && (ts < end);
1381     }
1382     else {
1383         isdst = (ts < end) || (ts >= start);
1384     }
1385 
1386     if (isdst) {
1387         return &(rule->dst);
1388     }
1389     else {
1390         return &(rule->std);
1391     }
1392 }
1393 
1394 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1395  *
1396  * This function can determine the _ttinfo that applies at a given epoch time,
1397  * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1398  * This is to be used in the .fromutc() function.
1399  *
1400  * The year is technically a redundant parameter, because it can be calculated
1401  * from the timestamp, but all callers of this function should have the year
1402  * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1403  * calculation.
1404  **/
1405 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1406 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1407                            unsigned char *fold)
1408 {
1409     if (rule->std_only) {
1410         *fold = 0;
1411         return &(rule->std);
1412     }
1413 
1414     int64_t start, end;
1415     uint8_t isdst;
1416     tzrule_transitions(rule, year, &start, &end);
1417     start -= rule->std.utcoff_seconds;
1418     end -= rule->dst.utcoff_seconds;
1419 
1420     if (start < end) {
1421         isdst = (ts >= start) && (ts < end);
1422     }
1423     else {
1424         isdst = (ts < end) || (ts >= start);
1425     }
1426 
1427     // For positive DST, the ambiguous period is one dst_diff after the end of
1428     // DST; for negative DST, the ambiguous period is one dst_diff before the
1429     // start of DST.
1430     int64_t ambig_start, ambig_end;
1431     if (rule->dst_diff > 0) {
1432         ambig_start = end;
1433         ambig_end = end + rule->dst_diff;
1434     }
1435     else {
1436         ambig_start = start;
1437         ambig_end = start - rule->dst_diff;
1438     }
1439 
1440     *fold = (ts >= ambig_start) && (ts < ambig_end);
1441 
1442     if (isdst) {
1443         return &(rule->dst);
1444     }
1445     else {
1446         return &(rule->std);
1447     }
1448 }
1449 
1450 /* Parse a TZ string in the format specified by the POSIX standard:
1451  *
1452  *  std offset[dst[offset],start[/time],end[/time]]
1453  *
1454  *  std and dst must be 3 or more characters long and must not contain a
1455  *  leading colon, embedded digits, commas, nor a plus or minus signs; The
1456  *  spaces between "std" and "offset" are only for display and are not actually
1457  *  present in the string.
1458  *
1459  *  The format of the offset is ``[+|-]hh[:mm[:ss]]``
1460  *
1461  * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1462  *
1463  * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1464  */
1465 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1466 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1467 {
1468     PyObject *std_abbr = NULL;
1469     PyObject *dst_abbr = NULL;
1470     TransitionRuleType *start = NULL;
1471     TransitionRuleType *end = NULL;
1472     // Initialize offsets to invalid value (> 24 hours)
1473     long std_offset = 1 << 20;
1474     long dst_offset = 1 << 20;
1475 
1476     char *tz_str = PyBytes_AsString(tz_str_obj);
1477     if (tz_str == NULL) {
1478         return -1;
1479     }
1480     char *p = tz_str;
1481 
1482     // Read the `std` abbreviation, which must be at least 3 characters long.
1483     Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1484     if (num_chars < 1) {
1485         PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1486         goto error;
1487     }
1488 
1489     p += num_chars;
1490 
1491     // Now read the STD offset, which is required
1492     num_chars = parse_tz_delta(p, &std_offset);
1493     if (num_chars < 0) {
1494         PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1495         goto error;
1496     }
1497     p += num_chars;
1498 
1499     // If the string ends here, there is no DST, otherwise we must parse the
1500     // DST abbreviation and start and end dates and times.
1501     if (*p == '\0') {
1502         goto complete;
1503     }
1504 
1505     num_chars = parse_abbr(p, &dst_abbr);
1506     if (num_chars < 1) {
1507         PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1508         goto error;
1509     }
1510     p += num_chars;
1511 
1512     if (*p == ',') {
1513         // From the POSIX standard:
1514         //
1515         // If no offset follows dst, the alternative time is assumed to be one
1516         // hour ahead of standard time.
1517         dst_offset = std_offset + 3600;
1518     }
1519     else {
1520         num_chars = parse_tz_delta(p, &dst_offset);
1521         if (num_chars < 0) {
1522             PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1523                          tz_str_obj);
1524             goto error;
1525         }
1526 
1527         p += num_chars;
1528     }
1529 
1530     TransitionRuleType **transitions[2] = {&start, &end};
1531     for (size_t i = 0; i < 2; ++i) {
1532         if (*p != ',') {
1533             PyErr_Format(PyExc_ValueError,
1534                          "Missing transition rules in TZ string: %R",
1535                          tz_str_obj);
1536             goto error;
1537         }
1538         p++;
1539 
1540         num_chars = parse_transition_rule(p, transitions[i]);
1541         if (num_chars < 0) {
1542             PyErr_Format(PyExc_ValueError,
1543                          "Malformed transition rule in TZ string: %R",
1544                          tz_str_obj);
1545             goto error;
1546         }
1547         p += num_chars;
1548     }
1549 
1550     if (*p != '\0') {
1551         PyErr_Format(PyExc_ValueError,
1552                      "Extraneous characters at end of TZ string: %R",
1553                      tz_str_obj);
1554         goto error;
1555     }
1556 
1557 complete:
1558     build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1559     Py_DECREF(std_abbr);
1560     Py_XDECREF(dst_abbr);
1561 
1562     return 0;
1563 error:
1564     Py_XDECREF(std_abbr);
1565     if (dst_abbr != NULL && dst_abbr != Py_None) {
1566         Py_DECREF(dst_abbr);
1567     }
1568 
1569     if (start != NULL) {
1570         PyMem_Free(start);
1571     }
1572 
1573     if (end != NULL) {
1574         PyMem_Free(end);
1575     }
1576 
1577     return -1;
1578 }
1579 
1580 static int
parse_uint(const char * const p,uint8_t * value)1581 parse_uint(const char *const p, uint8_t *value)
1582 {
1583     if (!isdigit(*p)) {
1584         return -1;
1585     }
1586 
1587     *value = (*p) - '0';
1588     return 0;
1589 }
1590 
1591 /* Parse the STD and DST abbreviations from a TZ string. */
1592 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1593 parse_abbr(const char *const p, PyObject **abbr)
1594 {
1595     const char *ptr = p;
1596     char buff = *ptr;
1597     const char *str_start;
1598     const char *str_end;
1599 
1600     if (*ptr == '<') {
1601         ptr++;
1602         str_start = ptr;
1603         while ((buff = *ptr) != '>') {
1604             // From the POSIX standard:
1605             //
1606             //   In the quoted form, the first character shall be the less-than
1607             //   ( '<' ) character and the last character shall be the
1608             //   greater-than ( '>' ) character. All characters between these
1609             //   quoting characters shall be alphanumeric characters from the
1610             //   portable character set in the current locale, the plus-sign (
1611             //   '+' ) character, or the minus-sign ( '-' ) character. The std
1612             //   and dst fields in this case shall not include the quoting
1613             //   characters.
1614             if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1615                 buff != '-') {
1616                 return -1;
1617             }
1618             ptr++;
1619         }
1620         str_end = ptr;
1621         ptr++;
1622     }
1623     else {
1624         str_start = p;
1625         // From the POSIX standard:
1626         //
1627         //   In the unquoted form, all characters in these fields shall be
1628         //   alphabetic characters from the portable character set in the
1629         //   current locale.
1630         while (isalpha(*ptr)) {
1631             ptr++;
1632         }
1633         str_end = ptr;
1634     }
1635 
1636     *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1637     if (*abbr == NULL) {
1638         return -1;
1639     }
1640 
1641     return ptr - p;
1642 }
1643 
1644 /* Parse a UTC offset from a TZ str. */
1645 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1646 parse_tz_delta(const char *const p, long *total_seconds)
1647 {
1648     // From the POSIX spec:
1649     //
1650     //   Indicates the value added to the local time to arrive at Coordinated
1651     //   Universal Time. The offset has the form:
1652     //
1653     //   hh[:mm[:ss]]
1654     //
1655     //   One or more digits may be used; the value is always interpreted as a
1656     //   decimal number.
1657     //
1658     // The POSIX spec says that the values for `hour` must be between 0 and 24
1659     // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1660     // transition times may be signed and range from -167 to 167.
1661     long sign = -1;
1662     long hours = 0;
1663     long minutes = 0;
1664     long seconds = 0;
1665 
1666     const char *ptr = p;
1667     char buff = *ptr;
1668     if (buff == '-' || buff == '+') {
1669         // Negative numbers correspond to *positive* offsets, from the spec:
1670         //
1671         //   If preceded by a '-', the timezone shall be east of the Prime
1672         //   Meridian; otherwise, it shall be west (which may be indicated by
1673         //   an optional preceding '+' ).
1674         if (buff == '-') {
1675             sign = 1;
1676         }
1677 
1678         ptr++;
1679     }
1680 
1681     // The hour can be 1 or 2 numeric characters
1682     for (size_t i = 0; i < 2; ++i) {
1683         buff = *ptr;
1684         if (!isdigit(buff)) {
1685             if (i == 0) {
1686                 return -1;
1687             }
1688             else {
1689                 break;
1690             }
1691         }
1692 
1693         hours *= 10;
1694         hours += buff - '0';
1695         ptr++;
1696     }
1697 
1698     if (hours > 24 || hours < 0) {
1699         return -1;
1700     }
1701 
1702     // Minutes and seconds always of the format ":dd"
1703     long *outputs[2] = {&minutes, &seconds};
1704     for (size_t i = 0; i < 2; ++i) {
1705         if (*ptr != ':') {
1706             goto complete;
1707         }
1708         ptr++;
1709 
1710         for (size_t j = 0; j < 2; ++j) {
1711             buff = *ptr;
1712             if (!isdigit(buff)) {
1713                 return -1;
1714             }
1715             *(outputs[i]) *= 10;
1716             *(outputs[i]) += buff - '0';
1717             ptr++;
1718         }
1719     }
1720 
1721 complete:
1722     *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1723 
1724     return ptr - p;
1725 }
1726 
1727 /* Parse the date portion of a transition rule. */
1728 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1729 parse_transition_rule(const char *const p, TransitionRuleType **out)
1730 {
1731     // The full transition rule indicates when to change back and forth between
1732     // STD and DST, and has the form:
1733     //
1734     //   date[/time],date[/time]
1735     //
1736     // This function parses an individual date[/time] section, and returns
1737     // the number of characters that contributed to the transition rule. This
1738     // does not include the ',' at the end of the first rule.
1739     //
1740     // The POSIX spec states that if *time* is not given, the default is 02:00.
1741     const char *ptr = p;
1742     int8_t hour = 2;
1743     int8_t minute = 0;
1744     int8_t second = 0;
1745 
1746     // Rules come in one of three flavors:
1747     //
1748     //   1. Jn: Julian day n, with no leap days.
1749     //   2. n: Day of year (0-based, with leap days)
1750     //   3. Mm.n.d: Specifying by month, week and day-of-week.
1751 
1752     if (*ptr == 'M') {
1753         uint8_t month, week, day;
1754         ptr++;
1755         if (parse_uint(ptr, &month)) {
1756             return -1;
1757         }
1758         ptr++;
1759         if (*ptr != '.') {
1760             uint8_t tmp;
1761             if (parse_uint(ptr, &tmp)) {
1762                 return -1;
1763             }
1764 
1765             month *= 10;
1766             month += tmp;
1767             ptr++;
1768         }
1769 
1770         uint8_t *values[2] = {&week, &day};
1771         for (size_t i = 0; i < 2; ++i) {
1772             if (*ptr != '.') {
1773                 return -1;
1774             }
1775             ptr++;
1776 
1777             if (parse_uint(ptr, values[i])) {
1778                 return -1;
1779             }
1780             ptr++;
1781         }
1782 
1783         if (*ptr == '/') {
1784             ptr++;
1785             Py_ssize_t num_chars =
1786                 parse_transition_time(ptr, &hour, &minute, &second);
1787             if (num_chars < 0) {
1788                 return -1;
1789             }
1790             ptr += num_chars;
1791         }
1792 
1793         CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1794         if (rv == NULL) {
1795             return -1;
1796         }
1797 
1798         if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1799             PyMem_Free(rv);
1800             return -1;
1801         }
1802 
1803         *out = (TransitionRuleType *)rv;
1804     }
1805     else {
1806         uint8_t julian = 0;
1807         unsigned int day = 0;
1808         if (*ptr == 'J') {
1809             julian = 1;
1810             ptr++;
1811         }
1812 
1813         for (size_t i = 0; i < 3; ++i) {
1814             if (!isdigit(*ptr)) {
1815                 if (i == 0) {
1816                     return -1;
1817                 }
1818                 break;
1819             }
1820             day *= 10;
1821             day += (*ptr) - '0';
1822             ptr++;
1823         }
1824 
1825         if (*ptr == '/') {
1826             ptr++;
1827             Py_ssize_t num_chars =
1828                 parse_transition_time(ptr, &hour, &minute, &second);
1829             if (num_chars < 0) {
1830                 return -1;
1831             }
1832             ptr += num_chars;
1833         }
1834 
1835         DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1836         if (rv == NULL) {
1837             return -1;
1838         }
1839 
1840         if (dayrule_new(julian, day, hour, minute, second, rv)) {
1841             PyMem_Free(rv);
1842             return -1;
1843         }
1844         *out = (TransitionRuleType *)rv;
1845     }
1846 
1847     return ptr - p;
1848 }
1849 
1850 /* Parse the time portion of a transition rule (e.g. following an /) */
1851 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1852 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1853                       int8_t *second)
1854 {
1855     // From the spec:
1856     //
1857     //   The time has the same format as offset except that no leading sign
1858     //   ( '-' or '+' ) is allowed.
1859     //
1860     // The format for the offset is:
1861     //
1862     //   h[h][:mm[:ss]]
1863     //
1864     // RFC 8536 also allows transition times to be signed and to range from
1865     // -167 to +167, but the current version only supports [0, 99].
1866     //
1867     // TODO: Support the full range of transition hours.
1868     int8_t *components[3] = {hour, minute, second};
1869     const char *ptr = p;
1870     int8_t sign = 1;
1871 
1872     if (*ptr == '-' || *ptr == '+') {
1873         if (*ptr == '-') {
1874             sign = -1;
1875         }
1876         ptr++;
1877     }
1878 
1879     for (size_t i = 0; i < 3; ++i) {
1880         if (i > 0) {
1881             if (*ptr != ':') {
1882                 break;
1883             }
1884             ptr++;
1885         }
1886 
1887         uint8_t buff = 0;
1888         for (size_t j = 0; j < 2; j++) {
1889             if (!isdigit(*ptr)) {
1890                 if (i == 0 && j > 0) {
1891                     break;
1892                 }
1893                 return -1;
1894             }
1895 
1896             buff *= 10;
1897             buff += (*ptr) - '0';
1898             ptr++;
1899         }
1900 
1901         *(components[i]) = sign * buff;
1902     }
1903 
1904     return ptr - p;
1905 }
1906 
1907 /* Constructor for a _tzrule.
1908  *
1909  * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1910  * case `dst_offset` will be ignored and `start` and `end` are expected to be
1911  * NULL as well.
1912  *
1913  * Returns 0 on success.
1914  */
1915 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1916 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1917              long dst_offset, TransitionRuleType *start,
1918              TransitionRuleType *end, _tzrule *out)
1919 {
1920     _tzrule rv = {{0}};
1921 
1922     rv.start = start;
1923     rv.end = end;
1924 
1925     if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1926         goto error;
1927     }
1928 
1929     if (dst_abbr != NULL) {
1930         rv.dst_diff = dst_offset - std_offset;
1931         if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1932             goto error;
1933         }
1934     }
1935     else {
1936         rv.std_only = 1;
1937     }
1938 
1939     *out = rv;
1940 
1941     return 0;
1942 error:
1943     xdecref_ttinfo(&rv.std);
1944     xdecref_ttinfo(&rv.dst);
1945     return -1;
1946 }
1947 
1948 /* Destructor for _tzrule. */
1949 static void
free_tzrule(_tzrule * tzrule)1950 free_tzrule(_tzrule *tzrule)
1951 {
1952     xdecref_ttinfo(&(tzrule->std));
1953     if (!tzrule->std_only) {
1954         xdecref_ttinfo(&(tzrule->dst));
1955     }
1956 
1957     if (tzrule->start != NULL) {
1958         PyMem_Free(tzrule->start);
1959     }
1960 
1961     if (tzrule->end != NULL) {
1962         PyMem_Free(tzrule->end);
1963     }
1964 }
1965 
1966 /* Calculate DST offsets from transitions and UTC offsets
1967  *
1968  * This is necessary because each C `ttinfo` only contains the UTC offset,
1969  * time zone abbreviation and an isdst boolean - it does not include the
1970  * amount of the DST offset, but we need the amount for the dst() function.
1971  *
1972  * Thus function uses heuristics to infer what the offset should be, so it
1973  * is not guaranteed that this will work for all zones. If we cannot assign
1974  * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1975  * bool(dt.dst()) will always match ttinfo.isdst.
1976  */
1977 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1978 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1979                  unsigned char *isdsts, size_t num_transitions,
1980                  size_t num_ttinfos)
1981 {
1982     size_t dst_count = 0;
1983     size_t dst_found = 0;
1984     for (size_t i = 0; i < num_ttinfos; ++i) {
1985         dst_count++;
1986     }
1987 
1988     for (size_t i = 1; i < num_transitions; ++i) {
1989         if (dst_count == dst_found) {
1990             break;
1991         }
1992 
1993         size_t idx = trans_idx[i];
1994         size_t comp_idx = trans_idx[i - 1];
1995 
1996         // Only look at DST offsets that have nto been assigned already
1997         if (!isdsts[idx] || dstoffs[idx] != 0) {
1998             continue;
1999         }
2000 
2001         long dstoff = 0;
2002         long utcoff = utcoffs[idx];
2003 
2004         if (!isdsts[comp_idx]) {
2005             dstoff = utcoff - utcoffs[comp_idx];
2006         }
2007 
2008         if (!dstoff && idx < (num_ttinfos - 1)) {
2009             comp_idx = trans_idx[i + 1];
2010 
2011             // If the following transition is also DST and we couldn't find
2012             // the DST offset by this point, we're going to have to skip it
2013             // and hope this transition gets assigned later
2014             if (isdsts[comp_idx]) {
2015                 continue;
2016             }
2017 
2018             dstoff = utcoff - utcoffs[comp_idx];
2019         }
2020 
2021         if (dstoff) {
2022             dst_found++;
2023             dstoffs[idx] = dstoff;
2024         }
2025     }
2026 
2027     if (dst_found < dst_count) {
2028         // If there are time zones we didn't find a value for, we'll end up
2029         // with dstoff = 0 for something where isdst=1. This is obviously
2030         // wrong — one hour will be a much better guess than 0.
2031         for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2032             if (isdsts[idx] && !dstoffs[idx]) {
2033                 dstoffs[idx] = 3600;
2034             }
2035         }
2036     }
2037 }
2038 
2039 #define _swap(x, y, buffer) \
2040     buffer = x;             \
2041     x = y;                  \
2042     y = buffer;
2043 
2044 /* Calculate transitions in local time from UTC time and offsets.
2045  *
2046  * We want to know when each transition occurs, denominated in the number of
2047  * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2048  * *local time* (note: this is *not* equivalent to the output of
2049  * datetime.timestamp, which is the total number of seconds actual elapsed
2050  * since 1970-01-01T00:00:00Z in UTC).
2051  *
2052  * This is an ambiguous question because "local time" can be ambiguous — but it
2053  * is disambiguated by the `fold` parameter, so we allocate two arrays:
2054  *
2055  *  trans_local[0]: The wall-time transitions for fold=0
2056  *  trans_local[1]: The wall-time transitions for fold=1
2057  *
2058  * This returns 0 on success and a negative number of failure. The trans_local
2059  * arrays must be freed if they are not NULL.
2060  */
2061 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2062 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2063             int64_t *trans_local[2], size_t num_ttinfos,
2064             size_t num_transitions)
2065 {
2066     if (num_transitions == 0) {
2067         return 0;
2068     }
2069 
2070     // Copy the UTC transitions into each array to be modified in place later
2071     for (size_t i = 0; i < 2; ++i) {
2072         trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2073         if (trans_local[i] == NULL) {
2074             return -1;
2075         }
2076 
2077         memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2078     }
2079 
2080     int64_t offset_0, offset_1, buff;
2081     if (num_ttinfos > 1) {
2082         offset_0 = utcoff[0];
2083         offset_1 = utcoff[trans_idx[0]];
2084 
2085         if (offset_1 > offset_0) {
2086             _swap(offset_0, offset_1, buff);
2087         }
2088     }
2089     else {
2090         offset_0 = utcoff[0];
2091         offset_1 = utcoff[0];
2092     }
2093 
2094     trans_local[0][0] += offset_0;
2095     trans_local[1][0] += offset_1;
2096 
2097     for (size_t i = 1; i < num_transitions; ++i) {
2098         offset_0 = utcoff[trans_idx[i - 1]];
2099         offset_1 = utcoff[trans_idx[i]];
2100 
2101         if (offset_1 > offset_0) {
2102             _swap(offset_1, offset_0, buff);
2103         }
2104 
2105         trans_local[0][i] += offset_0;
2106         trans_local[1][i] += offset_1;
2107     }
2108 
2109     return 0;
2110 }
2111 
2112 /* Simple bisect_right binary search implementation */
2113 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2114 _bisect(const int64_t value, const int64_t *arr, size_t size)
2115 {
2116     size_t lo = 0;
2117     size_t hi = size;
2118     size_t m;
2119 
2120     while (lo < hi) {
2121         m = (lo + hi) / 2;
2122         if (arr[m] > value) {
2123             hi = m;
2124         }
2125         else {
2126             lo = m + 1;
2127         }
2128     }
2129 
2130     return hi;
2131 }
2132 
2133 /* Find the ttinfo rules that apply at a given local datetime. */
2134 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2135 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2136 {
2137     // datetime.time has a .tzinfo attribute that passes None as the dt
2138     // argument; it only really has meaning for fixed-offset zones.
2139     if (dt == Py_None) {
2140         if (self->fixed_offset) {
2141             return &(self->tzrule_after.std);
2142         }
2143         else {
2144             return &NO_TTINFO;
2145         }
2146     }
2147 
2148     int64_t ts;
2149     if (get_local_timestamp(dt, &ts)) {
2150         return NULL;
2151     }
2152 
2153     unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2154     assert(fold < 2);
2155     int64_t *local_transitions = self->trans_list_wall[fold];
2156     size_t num_trans = self->num_transitions;
2157 
2158     if (num_trans && ts < local_transitions[0]) {
2159         return self->ttinfo_before;
2160     }
2161     else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2162         return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2163                                   PyDateTime_GET_YEAR(dt));
2164     }
2165     else {
2166         size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2167         assert(idx < self->num_transitions);
2168         return self->trans_ttinfos[idx];
2169     }
2170 }
2171 
2172 static int
is_leap_year(int year)2173 is_leap_year(int year)
2174 {
2175     const unsigned int ayear = (unsigned int)year;
2176     return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2177 }
2178 
2179 /* Calculates ordinal datetime from year, month and day. */
2180 static int
ymd_to_ord(int y,int m,int d)2181 ymd_to_ord(int y, int m, int d)
2182 {
2183     y -= 1;
2184     int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2185     int yearday = DAYS_BEFORE_MONTH[m];
2186     if (m > 2 && is_leap_year(y + 1)) {
2187         yearday += 1;
2188     }
2189 
2190     return days_before_year + yearday + d;
2191 }
2192 
2193 /* Calculate the number of seconds since 1970-01-01 in local time.
2194  *
2195  * This gets a datetime in the same "units" as self->trans_list_wall so that we
2196  * can easily determine which transitions a datetime falls between. See the
2197  * comment above ts_to_local for more information.
2198  * */
2199 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2200 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2201 {
2202     assert(local_ts != NULL);
2203 
2204     int hour, minute, second;
2205     int ord;
2206     if (PyDateTime_CheckExact(dt)) {
2207         int y = PyDateTime_GET_YEAR(dt);
2208         int m = PyDateTime_GET_MONTH(dt);
2209         int d = PyDateTime_GET_DAY(dt);
2210         hour = PyDateTime_DATE_GET_HOUR(dt);
2211         minute = PyDateTime_DATE_GET_MINUTE(dt);
2212         second = PyDateTime_DATE_GET_SECOND(dt);
2213 
2214         ord = ymd_to_ord(y, m, d);
2215     }
2216     else {
2217         PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2218         if (num == NULL) {
2219             return -1;
2220         }
2221 
2222         ord = PyLong_AsLong(num);
2223         Py_DECREF(num);
2224         if (ord == -1 && PyErr_Occurred()) {
2225             return -1;
2226         }
2227 
2228         num = PyObject_GetAttrString(dt, "hour");
2229         if (num == NULL) {
2230             return -1;
2231         }
2232         hour = PyLong_AsLong(num);
2233         Py_DECREF(num);
2234         if (hour == -1) {
2235             return -1;
2236         }
2237 
2238         num = PyObject_GetAttrString(dt, "minute");
2239         if (num == NULL) {
2240             return -1;
2241         }
2242         minute = PyLong_AsLong(num);
2243         Py_DECREF(num);
2244         if (minute == -1) {
2245             return -1;
2246         }
2247 
2248         num = PyObject_GetAttrString(dt, "second");
2249         if (num == NULL) {
2250             return -1;
2251         }
2252         second = PyLong_AsLong(num);
2253         Py_DECREF(num);
2254         if (second == -1) {
2255             return -1;
2256         }
2257     }
2258 
2259     *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2260                 (int64_t)(hour * 3600 + minute * 60 + second);
2261 
2262     return 0;
2263 }
2264 
2265 /////
2266 // Functions for cache handling
2267 
2268 /* Constructor for StrongCacheNode */
2269 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2270 strong_cache_node_new(PyObject *key, PyObject *zone)
2271 {
2272     StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2273     if (node == NULL) {
2274         return NULL;
2275     }
2276 
2277     Py_INCREF(key);
2278     Py_INCREF(zone);
2279 
2280     node->next = NULL;
2281     node->prev = NULL;
2282     node->key = key;
2283     node->zone = zone;
2284 
2285     return node;
2286 }
2287 
2288 /* Destructor for StrongCacheNode */
2289 void
strong_cache_node_free(StrongCacheNode * node)2290 strong_cache_node_free(StrongCacheNode *node)
2291 {
2292     Py_XDECREF(node->key);
2293     Py_XDECREF(node->zone);
2294 
2295     PyMem_Free(node);
2296 }
2297 
2298 /* Frees all nodes at or after a specified root in the strong cache.
2299  *
2300  * This can be used on the root node to free the entire cache or it can be used
2301  * to clear all nodes that have been expired (which, if everything is going
2302  * right, will actually only be 1 node at a time).
2303  */
2304 void
strong_cache_free(StrongCacheNode * root)2305 strong_cache_free(StrongCacheNode *root)
2306 {
2307     StrongCacheNode *node = root;
2308     StrongCacheNode *next_node;
2309     while (node != NULL) {
2310         next_node = node->next;
2311         strong_cache_node_free(node);
2312 
2313         node = next_node;
2314     }
2315 }
2316 
2317 /* Removes a node from the cache and update its neighbors.
2318  *
2319  * This is used both when ejecting a node from the cache and when moving it to
2320  * the front of the cache.
2321  */
2322 static void
remove_from_strong_cache(StrongCacheNode * node)2323 remove_from_strong_cache(StrongCacheNode *node)
2324 {
2325     if (ZONEINFO_STRONG_CACHE == node) {
2326         ZONEINFO_STRONG_CACHE = node->next;
2327     }
2328 
2329     if (node->prev != NULL) {
2330         node->prev->next = node->next;
2331     }
2332 
2333     if (node->next != NULL) {
2334         node->next->prev = node->prev;
2335     }
2336 
2337     node->next = NULL;
2338     node->prev = NULL;
2339 }
2340 
2341 /* Retrieves the node associated with a key, if it exists.
2342  *
2343  * This traverses the strong cache until it finds a matching key and returns a
2344  * pointer to the relevant node if found. Returns NULL if no node is found.
2345  *
2346  * root may be NULL, indicating an empty cache.
2347  */
2348 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2349 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2350 {
2351     const StrongCacheNode *node = root;
2352     while (node != NULL) {
2353         int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2354         if (rv < 0) {
2355             return NULL;
2356         }
2357         if (rv) {
2358             return (StrongCacheNode *)node;
2359         }
2360 
2361         node = node->next;
2362     }
2363 
2364     return NULL;
2365 }
2366 
2367 /* Ejects a given key from the class's strong cache, if applicable.
2368  *
2369  * This function is used to enable the per-key functionality in clear_cache.
2370  */
2371 static int
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2372 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2373 {
2374     if (type != &PyZoneInfo_ZoneInfoType) {
2375         return 0;
2376     }
2377 
2378     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2379     if (node != NULL) {
2380         remove_from_strong_cache(node);
2381 
2382         strong_cache_node_free(node);
2383     }
2384     else if (PyErr_Occurred()) {
2385         return -1;
2386     }
2387     return 0;
2388 }
2389 
2390 /* Moves a node to the front of the LRU cache.
2391  *
2392  * The strong cache is an LRU cache, so whenever a given node is accessed, if
2393  * it is not at the front of the cache, it needs to be moved there.
2394  */
2395 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2396 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2397 {
2398     StrongCacheNode *root_p = *root;
2399     if (root_p == node) {
2400         return;
2401     }
2402 
2403     remove_from_strong_cache(node);
2404 
2405     node->prev = NULL;
2406     node->next = root_p;
2407 
2408     if (root_p != NULL) {
2409         root_p->prev = node;
2410     }
2411 
2412     *root = node;
2413 }
2414 
2415 /* Retrieves a ZoneInfo from the strong cache if it's present.
2416  *
2417  * This function finds the ZoneInfo by key and if found will move the node to
2418  * the front of the LRU cache and return a new reference to it. It returns NULL
2419  * if the key is not in the cache.
2420  *
2421  * The strong cache is currently only implemented for the base class, so this
2422  * always returns a cache miss for subclasses.
2423  */
2424 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2425 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2426 {
2427     if (type != &PyZoneInfo_ZoneInfoType) {
2428         return NULL;  // Strong cache currently only implemented for base class
2429     }
2430 
2431     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2432 
2433     if (node != NULL) {
2434         move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2435         Py_INCREF(node->zone);
2436         return node->zone;
2437     }
2438 
2439     return NULL;  // Cache miss
2440 }
2441 
2442 /* Inserts a new key into the strong LRU cache.
2443  *
2444  * This function is only to be used after a cache miss — it creates a new node
2445  * at the front of the cache and ejects any stale entries (keeping the size of
2446  * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2447  */
2448 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2449 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2450                     PyObject *zone)
2451 {
2452     if (type != &PyZoneInfo_ZoneInfoType) {
2453         return;
2454     }
2455 
2456     StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2457 
2458     move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2459 
2460     StrongCacheNode *node = new_node->next;
2461     for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2462         if (node == NULL) {
2463             return;
2464         }
2465         node = node->next;
2466     }
2467 
2468     // Everything beyond this point needs to be freed
2469     if (node != NULL) {
2470         if (node->prev != NULL) {
2471             node->prev->next = NULL;
2472         }
2473         strong_cache_free(node);
2474     }
2475 }
2476 
2477 /* Clears all entries into a type's strong cache.
2478  *
2479  * Because the strong cache is not implemented for subclasses, this is a no-op
2480  * for everything except the base class.
2481  */
2482 void
clear_strong_cache(const PyTypeObject * const type)2483 clear_strong_cache(const PyTypeObject *const type)
2484 {
2485     if (type != &PyZoneInfo_ZoneInfoType) {
2486         return;
2487     }
2488 
2489     strong_cache_free(ZONEINFO_STRONG_CACHE);
2490     ZONEINFO_STRONG_CACHE = NULL;
2491 }
2492 
2493 static PyObject *
new_weak_cache(void)2494 new_weak_cache(void)
2495 {
2496     PyObject *weakref_module = PyImport_ImportModule("weakref");
2497     if (weakref_module == NULL) {
2498         return NULL;
2499     }
2500 
2501     PyObject *weak_cache =
2502         PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2503     Py_DECREF(weakref_module);
2504     return weak_cache;
2505 }
2506 
2507 static int
initialize_caches(void)2508 initialize_caches(void)
2509 {
2510     // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2511     if (TIMEDELTA_CACHE == NULL) {
2512         TIMEDELTA_CACHE = PyDict_New();
2513     }
2514     else {
2515         Py_INCREF(TIMEDELTA_CACHE);
2516     }
2517 
2518     if (TIMEDELTA_CACHE == NULL) {
2519         return -1;
2520     }
2521 
2522     if (ZONEINFO_WEAK_CACHE == NULL) {
2523         ZONEINFO_WEAK_CACHE = new_weak_cache();
2524     }
2525     else {
2526         Py_INCREF(ZONEINFO_WEAK_CACHE);
2527     }
2528 
2529     if (ZONEINFO_WEAK_CACHE == NULL) {
2530         return -1;
2531     }
2532 
2533     return 0;
2534 }
2535 
2536 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2537 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2538 {
2539     PyObject *weak_cache = new_weak_cache();
2540     if (weak_cache == NULL) {
2541         return NULL;
2542     }
2543 
2544     if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2545                                weak_cache) < 0) {
2546         Py_DECREF(weak_cache);
2547         return NULL;
2548     }
2549     Py_DECREF(weak_cache);
2550     Py_RETURN_NONE;
2551 }
2552 
2553 /////
2554 // Specify the ZoneInfo type
2555 static PyMethodDef zoneinfo_methods[] = {
2556     {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2557      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2558      PyDoc_STR("Clear the ZoneInfo cache.")},
2559     {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2560      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2561      PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2562     {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2563      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2564      PyDoc_STR("Create a ZoneInfo file from a file object.")},
2565     {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2566      PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2567                "the given datetime.")},
2568     {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2569      PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2570                "in a zone at the given datetime.")},
2571     {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2572      PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2573                "zone that applies in a zone at a given datetime.")},
2574     {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2575      PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2576                "datetime in local time.")},
2577     {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2578      PyDoc_STR("Function for serialization with the pickle protocol.")},
2579     {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2580      PyDoc_STR("Private method used in unpickling.")},
2581     {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2582      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2583      PyDoc_STR("Function to initialize subclasses.")},
2584     {NULL} /* Sentinel */
2585 };
2586 
2587 static PyMemberDef zoneinfo_members[] = {
2588     {.name = "key",
2589      .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2590      .type = T_OBJECT_EX,
2591      .flags = READONLY,
2592      .doc = NULL},
2593     {NULL}, /* Sentinel */
2594 };
2595 
2596 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2597     PyVarObject_HEAD_INIT(NULL, 0)  //
2598         .tp_name = "zoneinfo.ZoneInfo",
2599     .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2600     .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2601     .tp_repr = (reprfunc)zoneinfo_repr,
2602     .tp_str = (reprfunc)zoneinfo_str,
2603     .tp_getattro = PyObject_GenericGetAttr,
2604     .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2605     /* .tp_doc = zoneinfo_doc, */
2606     .tp_methods = zoneinfo_methods,
2607     .tp_members = zoneinfo_members,
2608     .tp_new = zoneinfo_new,
2609     .tp_dealloc = zoneinfo_dealloc,
2610 };
2611 
2612 /////
2613 // Specify the _zoneinfo module
2614 static PyMethodDef module_methods[] = {{NULL, NULL}};
2615 static void
module_free()2616 module_free()
2617 {
2618     Py_XDECREF(_tzpath_find_tzfile);
2619     _tzpath_find_tzfile = NULL;
2620 
2621     Py_XDECREF(_common_mod);
2622     _common_mod = NULL;
2623 
2624     Py_XDECREF(io_open);
2625     io_open = NULL;
2626 
2627     xdecref_ttinfo(&NO_TTINFO);
2628 
2629     if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2630         Py_DECREF(TIMEDELTA_CACHE);
2631     } else {
2632         Py_CLEAR(TIMEDELTA_CACHE);
2633     }
2634 
2635     if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2636         Py_DECREF(ZONEINFO_WEAK_CACHE);
2637     } else {
2638         Py_CLEAR(ZONEINFO_WEAK_CACHE);
2639     }
2640 
2641     clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2642 }
2643 
2644 static int
zoneinfomodule_exec(PyObject * m)2645 zoneinfomodule_exec(PyObject *m)
2646 {
2647     PyDateTime_IMPORT;
2648     if (PyDateTimeAPI == NULL) {
2649         goto error;
2650     }
2651     PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2652     if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2653         goto error;
2654     }
2655 
2656     Py_INCREF(&PyZoneInfo_ZoneInfoType);
2657     PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2658 
2659     /* Populate imports */
2660     PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2661     if (_tzpath_module == NULL) {
2662         goto error;
2663     }
2664 
2665     _tzpath_find_tzfile =
2666         PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2667     Py_DECREF(_tzpath_module);
2668     if (_tzpath_find_tzfile == NULL) {
2669         goto error;
2670     }
2671 
2672     PyObject *io_module = PyImport_ImportModule("io");
2673     if (io_module == NULL) {
2674         goto error;
2675     }
2676 
2677     io_open = PyObject_GetAttrString(io_module, "open");
2678     Py_DECREF(io_module);
2679     if (io_open == NULL) {
2680         goto error;
2681     }
2682 
2683     _common_mod = PyImport_ImportModule("zoneinfo._common");
2684     if (_common_mod == NULL) {
2685         goto error;
2686     }
2687 
2688     if (NO_TTINFO.utcoff == NULL) {
2689         NO_TTINFO.utcoff = Py_None;
2690         NO_TTINFO.dstoff = Py_None;
2691         NO_TTINFO.tzname = Py_None;
2692 
2693         for (size_t i = 0; i < 3; ++i) {
2694             Py_INCREF(Py_None);
2695         }
2696     }
2697 
2698     if (initialize_caches()) {
2699         goto error;
2700     }
2701 
2702     return 0;
2703 
2704 error:
2705     return -1;
2706 }
2707 
2708 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2709     {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2710 
2711 static struct PyModuleDef zoneinfomodule = {
2712     PyModuleDef_HEAD_INIT,
2713     .m_name = "_zoneinfo",
2714     .m_doc = "C implementation of the zoneinfo module",
2715     .m_size = 0,
2716     .m_methods = module_methods,
2717     .m_slots = zoneinfomodule_slots,
2718     .m_free = (freefunc)module_free};
2719 
2720 PyMODINIT_FUNC
PyInit__zoneinfo(void)2721 PyInit__zoneinfo(void)
2722 {
2723     return PyModuleDef_Init(&zoneinfomodule);
2724 }
2725