1 #ifndef Py_BUILD_CORE_BUILTIN
2 #  define Py_BUILD_CORE_MODULE 1
3 #endif
4 
5 #include "Python.h"
6 #include "pycore_long.h"          // _PyLong_GetOne()
7 #include "structmember.h"
8 
9 #include <ctype.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 
13 #include "datetime.h"
14 
15 // Imports
16 static PyObject *io_open = NULL;
17 static PyObject *_tzpath_find_tzfile = NULL;
18 static PyObject *_common_mod = NULL;
19 
20 typedef struct TransitionRuleType TransitionRuleType;
21 typedef struct StrongCacheNode StrongCacheNode;
22 
23 typedef struct {
24     PyObject *utcoff;
25     PyObject *dstoff;
26     PyObject *tzname;
27     long utcoff_seconds;
28 } _ttinfo;
29 
30 typedef struct {
31     _ttinfo std;
32     _ttinfo dst;
33     int dst_diff;
34     TransitionRuleType *start;
35     TransitionRuleType *end;
36     unsigned char std_only;
37 } _tzrule;
38 
39 typedef struct {
40     PyDateTime_TZInfo base;
41     PyObject *key;
42     PyObject *file_repr;
43     PyObject *weakreflist;
44     size_t num_transitions;
45     size_t num_ttinfos;
46     int64_t *trans_list_utc;
47     int64_t *trans_list_wall[2];
48     _ttinfo **trans_ttinfos;  // References to the ttinfo for each transition
49     _ttinfo *ttinfo_before;
50     _tzrule tzrule_after;
51     _ttinfo *_ttinfos;  // Unique array of ttinfos for ease of deallocation
52     unsigned char fixed_offset;
53     unsigned char source;
54 } PyZoneInfo_ZoneInfo;
55 
56 struct TransitionRuleType {
57     int64_t (*year_to_timestamp)(TransitionRuleType *, int);
58 };
59 
60 typedef struct {
61     TransitionRuleType base;
62     uint8_t month;
63     uint8_t week;
64     uint8_t day;
65     int8_t hour;
66     int8_t minute;
67     int8_t second;
68 } CalendarRule;
69 
70 typedef struct {
71     TransitionRuleType base;
72     uint8_t julian;
73     unsigned int day;
74     int8_t hour;
75     int8_t minute;
76     int8_t second;
77 } DayRule;
78 
79 struct StrongCacheNode {
80     StrongCacheNode *next;
81     StrongCacheNode *prev;
82     PyObject *key;
83     PyObject *zone;
84 };
85 
86 static PyTypeObject PyZoneInfo_ZoneInfoType;
87 
88 // Globals
89 static PyObject *TIMEDELTA_CACHE = NULL;
90 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
91 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
92 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
93 
94 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
95 
96 // Constants
97 static const int EPOCHORDINAL = 719163;
98 static int DAYS_IN_MONTH[] = {
99     -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
100 };
101 
102 static int DAYS_BEFORE_MONTH[] = {
103     -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
104 };
105 
106 static const int SOURCE_NOCACHE = 0;
107 static const int SOURCE_CACHE = 1;
108 static const int SOURCE_FILE = 2;
109 
110 // Forward declarations
111 static int
112 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
113 static void
114 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
115                  unsigned char *isdsts, size_t num_transitions,
116                  size_t num_ttinfos);
117 static int
118 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
119             int64_t *trans_local[2], size_t num_ttinfos,
120             size_t num_transitions);
121 
122 static int
123 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
124 
125 static Py_ssize_t
126 parse_abbr(const char *const p, PyObject **abbr);
127 static Py_ssize_t
128 parse_tz_delta(const char *const p, long *total_seconds);
129 static Py_ssize_t
130 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
131                       int8_t *second);
132 static Py_ssize_t
133 parse_transition_rule(const char *const p, TransitionRuleType **out);
134 
135 static _ttinfo *
136 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
137 static _ttinfo *
138 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
139                            unsigned char *fold);
140 
141 static int
142 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
143 static void
144 xdecref_ttinfo(_ttinfo *ttinfo);
145 static int
146 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
147 
148 static int
149 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
150              long dst_offset, TransitionRuleType *start,
151              TransitionRuleType *end, _tzrule *out);
152 static void
153 free_tzrule(_tzrule *tzrule);
154 
155 static PyObject *
156 load_timedelta(long seconds);
157 
158 static int
159 get_local_timestamp(PyObject *dt, int64_t *local_ts);
160 static _ttinfo *
161 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
162 
163 static int
164 ymd_to_ord(int y, int m, int d);
165 static int
166 is_leap_year(int year);
167 
168 static size_t
169 _bisect(const int64_t value, const int64_t *arr, size_t size);
170 
171 static int
172 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
173 static void
174 clear_strong_cache(const PyTypeObject *const type);
175 static void
176 update_strong_cache(const PyTypeObject *const type, PyObject *key,
177                     PyObject *zone);
178 static PyObject *
179 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
180 
181 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)182 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
183 {
184     PyObject *file_obj = NULL;
185     PyObject *file_path = NULL;
186 
187     file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
188     if (file_path == NULL) {
189         return NULL;
190     }
191     else if (file_path == Py_None) {
192         file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
193         if (file_obj == NULL) {
194             Py_DECREF(file_path);
195             return NULL;
196         }
197     }
198 
199     PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
200     if (self == NULL) {
201         goto error;
202     }
203 
204     if (file_obj == NULL) {
205         file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
206         if (file_obj == NULL) {
207             goto error;
208         }
209     }
210 
211     if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
212         goto error;
213     }
214 
215     PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
216     Py_DECREF(file_obj);
217     file_obj = NULL;
218     if (rv == NULL) {
219         goto error;
220     }
221     Py_DECREF(rv);
222 
223     ((PyZoneInfo_ZoneInfo *)self)->key = key;
224     Py_INCREF(key);
225 
226     goto cleanup;
227 error:
228     Py_XDECREF(self);
229     self = NULL;
230 cleanup:
231     if (file_obj != NULL) {
232         PyObject *exc, *val, *tb;
233         PyErr_Fetch(&exc, &val, &tb);
234         PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
235         _PyErr_ChainExceptions(exc, val, tb);
236         if (tmp == NULL) {
237             Py_CLEAR(self);
238         }
239         Py_XDECREF(tmp);
240         Py_DECREF(file_obj);
241     }
242     Py_DECREF(file_path);
243     return self;
244 }
245 
246 static PyObject *
get_weak_cache(PyTypeObject * type)247 get_weak_cache(PyTypeObject *type)
248 {
249     if (type == &PyZoneInfo_ZoneInfoType) {
250         return ZONEINFO_WEAK_CACHE;
251     }
252     else {
253         PyObject *cache =
254             PyObject_GetAttrString((PyObject *)type, "_weak_cache");
255         // We are assuming that the type lives at least as long as the function
256         // that calls get_weak_cache, and that it holds a reference to the
257         // cache, so we'll return a "borrowed reference".
258         Py_XDECREF(cache);
259         return cache;
260     }
261 }
262 
263 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)264 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
265 {
266     PyObject *key = NULL;
267     static char *kwlist[] = {"key", NULL};
268     if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
269         return NULL;
270     }
271 
272     PyObject *instance = zone_from_strong_cache(type, key);
273     if (instance != NULL || PyErr_Occurred()) {
274         return instance;
275     }
276 
277     PyObject *weak_cache = get_weak_cache(type);
278     instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
279     if (instance == NULL) {
280         return NULL;
281     }
282 
283     if (instance == Py_None) {
284         Py_DECREF(instance);
285         PyObject *tmp = zoneinfo_new_instance(type, key);
286         if (tmp == NULL) {
287             return NULL;
288         }
289 
290         instance =
291             PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
292         Py_DECREF(tmp);
293         if (instance == NULL) {
294             return NULL;
295         }
296         ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
297     }
298 
299     update_strong_cache(type, key, instance);
300     return instance;
301 }
302 
303 static void
zoneinfo_dealloc(PyObject * obj_self)304 zoneinfo_dealloc(PyObject *obj_self)
305 {
306     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
307 
308     if (self->weakreflist != NULL) {
309         PyObject_ClearWeakRefs(obj_self);
310     }
311 
312     if (self->trans_list_utc != NULL) {
313         PyMem_Free(self->trans_list_utc);
314     }
315 
316     for (size_t i = 0; i < 2; i++) {
317         if (self->trans_list_wall[i] != NULL) {
318             PyMem_Free(self->trans_list_wall[i]);
319         }
320     }
321 
322     if (self->_ttinfos != NULL) {
323         for (size_t i = 0; i < self->num_ttinfos; ++i) {
324             xdecref_ttinfo(&(self->_ttinfos[i]));
325         }
326         PyMem_Free(self->_ttinfos);
327     }
328 
329     if (self->trans_ttinfos != NULL) {
330         PyMem_Free(self->trans_ttinfos);
331     }
332 
333     free_tzrule(&(self->tzrule_after));
334 
335     Py_XDECREF(self->key);
336     Py_XDECREF(self->file_repr);
337 
338     Py_TYPE(self)->tp_free((PyObject *)self);
339 }
340 
341 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)342 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
343 {
344     PyObject *file_obj = NULL;
345     PyObject *file_repr = NULL;
346     PyObject *key = Py_None;
347     PyZoneInfo_ZoneInfo *self = NULL;
348 
349     static char *kwlist[] = {"", "key", NULL};
350     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
351                                      &key)) {
352         return NULL;
353     }
354 
355     PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
356     self = (PyZoneInfo_ZoneInfo *)obj_self;
357     if (self == NULL) {
358         return NULL;
359     }
360 
361     file_repr = PyUnicode_FromFormat("%R", file_obj);
362     if (file_repr == NULL) {
363         goto error;
364     }
365 
366     if (load_data(self, file_obj)) {
367         goto error;
368     }
369 
370     self->source = SOURCE_FILE;
371     self->file_repr = file_repr;
372     self->key = key;
373     Py_INCREF(key);
374 
375     return obj_self;
376 error:
377     Py_XDECREF(file_repr);
378     Py_XDECREF(self);
379     return NULL;
380 }
381 
382 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)383 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
384 {
385     static char *kwlist[] = {"key", NULL};
386     PyObject *key = NULL;
387     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
388         return NULL;
389     }
390 
391     PyObject *out = zoneinfo_new_instance(cls, key);
392     if (out != NULL) {
393         ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
394     }
395 
396     return out;
397 }
398 
399 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)400 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
401 {
402     PyObject *only_keys = NULL;
403     static char *kwlist[] = {"only_keys", NULL};
404 
405     if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
406                                       &only_keys))) {
407         return NULL;
408     }
409 
410     PyTypeObject *type = (PyTypeObject *)cls;
411     PyObject *weak_cache = get_weak_cache(type);
412 
413     if (only_keys == NULL || only_keys == Py_None) {
414         PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
415         if (rv != NULL) {
416             Py_DECREF(rv);
417         }
418 
419         clear_strong_cache(type);
420     }
421     else {
422         PyObject *item = NULL;
423         PyObject *pop = PyUnicode_FromString("pop");
424         if (pop == NULL) {
425             return NULL;
426         }
427 
428         PyObject *iter = PyObject_GetIter(only_keys);
429         if (iter == NULL) {
430             Py_DECREF(pop);
431             return NULL;
432         }
433 
434         while ((item = PyIter_Next(iter))) {
435             // Remove from strong cache
436             if (eject_from_strong_cache(type, item) < 0) {
437                 Py_DECREF(item);
438                 break;
439             }
440 
441             // Remove from weak cache
442             PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
443                                                        Py_None, NULL);
444 
445             Py_DECREF(item);
446             if (tmp == NULL) {
447                 break;
448             }
449             Py_DECREF(tmp);
450         }
451         Py_DECREF(iter);
452         Py_DECREF(pop);
453     }
454 
455     if (PyErr_Occurred()) {
456         return NULL;
457     }
458 
459     Py_RETURN_NONE;
460 }
461 
462 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)463 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
464 {
465     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
466     if (tti == NULL) {
467         return NULL;
468     }
469     Py_INCREF(tti->utcoff);
470     return tti->utcoff;
471 }
472 
473 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)474 zoneinfo_dst(PyObject *self, PyObject *dt)
475 {
476     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
477     if (tti == NULL) {
478         return NULL;
479     }
480     Py_INCREF(tti->dstoff);
481     return tti->dstoff;
482 }
483 
484 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)485 zoneinfo_tzname(PyObject *self, PyObject *dt)
486 {
487     _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
488     if (tti == NULL) {
489         return NULL;
490     }
491     Py_INCREF(tti->tzname);
492     return tti->tzname;
493 }
494 
495 #define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
496 
497 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)498 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
499 {
500     if (!PyDateTime_Check(dt)) {
501         PyErr_SetString(PyExc_TypeError,
502                         "fromutc: argument must be a datetime");
503         return NULL;
504     }
505     if (GET_DT_TZINFO(dt) != obj_self) {
506         PyErr_SetString(PyExc_ValueError,
507                         "fromutc: dt.tzinfo "
508                         "is not self");
509         return NULL;
510     }
511 
512     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
513 
514     int64_t timestamp;
515     if (get_local_timestamp(dt, &timestamp)) {
516         return NULL;
517     }
518     size_t num_trans = self->num_transitions;
519 
520     _ttinfo *tti = NULL;
521     unsigned char fold = 0;
522 
523     if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
524         tti = self->ttinfo_before;
525     }
526     else if (num_trans == 0 ||
527              timestamp > self->trans_list_utc[num_trans - 1]) {
528         tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
529                                          PyDateTime_GET_YEAR(dt), &fold);
530 
531         // Immediately after the last manual transition, the fold/gap is
532         // between self->trans_ttinfos[num_transitions - 1] and whatever
533         // ttinfo applies immediately after the last transition, not between
534         // the STD and DST rules in the tzrule_after, so we may need to
535         // adjust the fold value.
536         if (num_trans) {
537             _ttinfo *tti_prev = NULL;
538             if (num_trans == 1) {
539                 tti_prev = self->ttinfo_before;
540             }
541             else {
542                 tti_prev = self->trans_ttinfos[num_trans - 2];
543             }
544             int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
545             if (diff > 0 &&
546                 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
547                 fold = 1;
548             }
549         }
550     }
551     else {
552         size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
553         _ttinfo *tti_prev = NULL;
554 
555         if (idx >= 2) {
556             tti_prev = self->trans_ttinfos[idx - 2];
557             tti = self->trans_ttinfos[idx - 1];
558         }
559         else {
560             tti_prev = self->ttinfo_before;
561             tti = self->trans_ttinfos[0];
562         }
563 
564         // Detect fold
565         int64_t shift =
566             (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
567         if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
568             fold = 1;
569         }
570     }
571 
572     PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
573     if (tmp == NULL) {
574         return NULL;
575     }
576 
577     if (fold) {
578         if (PyDateTime_CheckExact(tmp)) {
579             ((PyDateTime_DateTime *)tmp)->fold = 1;
580             dt = tmp;
581         }
582         else {
583             PyObject *replace = PyObject_GetAttrString(tmp, "replace");
584             PyObject *args = PyTuple_New(0);
585             PyObject *kwargs = PyDict_New();
586 
587             Py_DECREF(tmp);
588             if (args == NULL || kwargs == NULL || replace == NULL) {
589                 Py_XDECREF(args);
590                 Py_XDECREF(kwargs);
591                 Py_XDECREF(replace);
592                 return NULL;
593             }
594 
595             dt = NULL;
596             if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
597                 dt = PyObject_Call(replace, args, kwargs);
598             }
599 
600             Py_DECREF(args);
601             Py_DECREF(kwargs);
602             Py_DECREF(replace);
603 
604             if (dt == NULL) {
605                 return NULL;
606             }
607         }
608     }
609     else {
610         dt = tmp;
611     }
612     return dt;
613 }
614 
615 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)616 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
617 {
618     PyObject *rv = NULL;
619     const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
620     if (!(self->key == Py_None)) {
621         rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
622     }
623     else {
624         assert(PyUnicode_Check(self->file_repr));
625         rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
626                                   self->file_repr);
627     }
628 
629     return rv;
630 }
631 
632 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)633 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
634 {
635     if (!(self->key == Py_None)) {
636         Py_INCREF(self->key);
637         return self->key;
638     }
639     else {
640         return zoneinfo_repr(self);
641     }
642 }
643 
644 /* Pickles the ZoneInfo object by key and source.
645  *
646  * ZoneInfo objects are pickled by reference to the TZif file that they came
647  * from, which means that the exact transitions may be different or the file
648  * may not un-pickle if the data has changed on disk in the interim.
649  *
650  * It is necessary to include a bit indicating whether or not the object
651  * was constructed from the cache, because from-cache objects will hit the
652  * unpickling process's cache, whereas no-cache objects will bypass it.
653  *
654  * Objects constructed from ZoneInfo.from_file cannot be pickled.
655  */
656 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)657 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
658 {
659     PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
660     if (self->source == SOURCE_FILE) {
661         // Objects constructed from files cannot be pickled.
662         PyObject *pickle = PyImport_ImportModule("pickle");
663         if (pickle == NULL) {
664             return NULL;
665         }
666 
667         PyObject *pickle_error =
668             PyObject_GetAttrString(pickle, "PicklingError");
669         Py_DECREF(pickle);
670         if (pickle_error == NULL) {
671             return NULL;
672         }
673 
674         PyErr_Format(pickle_error,
675                      "Cannot pickle a ZoneInfo file from a file stream.");
676         Py_DECREF(pickle_error);
677         return NULL;
678     }
679 
680     unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
681     PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
682 
683     if (constructor == NULL) {
684         return NULL;
685     }
686 
687     PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
688     Py_DECREF(constructor);
689     return rv;
690 }
691 
692 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)693 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
694 {
695     PyObject *key;
696     unsigned char from_cache;
697     if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
698         return NULL;
699     }
700 
701     if (from_cache) {
702         PyObject *val_args = Py_BuildValue("(O)", key);
703         if (val_args == NULL) {
704             return NULL;
705         }
706 
707         PyObject *rv = zoneinfo_new(cls, val_args, NULL);
708 
709         Py_DECREF(val_args);
710         return rv;
711     }
712     else {
713         return zoneinfo_new_instance(cls, key);
714     }
715 }
716 
717 /* It is relatively expensive to construct new timedelta objects, and in most
718  * cases we're looking at a relatively small number of timedeltas, such as
719  * integer number of hours, etc. We will keep a cache so that we construct
720  * a minimal number of these.
721  *
722  * Possibly this should be replaced with an LRU cache so that it's not possible
723  * for the memory usage to explode from this, but in order for this to be a
724  * serious problem, one would need to deliberately craft a malicious time zone
725  * file with many distinct offsets. As of tzdb 2019c, loading every single zone
726  * fills the cache with ~450 timedeltas for a total size of ~12kB.
727  *
728  * This returns a new reference to the timedelta.
729  */
730 static PyObject *
load_timedelta(long seconds)731 load_timedelta(long seconds)
732 {
733     PyObject *rv;
734     PyObject *pyoffset = PyLong_FromLong(seconds);
735     if (pyoffset == NULL) {
736         return NULL;
737     }
738     rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
739     if (rv == NULL) {
740         if (PyErr_Occurred()) {
741             goto error;
742         }
743         PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
744             0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
745 
746         if (tmp == NULL) {
747             goto error;
748         }
749 
750         rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
751         Py_DECREF(tmp);
752     }
753 
754     Py_XINCREF(rv);
755     Py_DECREF(pyoffset);
756     return rv;
757 error:
758     Py_DECREF(pyoffset);
759     return NULL;
760 }
761 
762 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
763  * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
764  * initialized _ttinfo objects.
765  */
766 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)767 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
768 {
769     out->utcoff = NULL;
770     out->dstoff = NULL;
771     out->tzname = NULL;
772 
773     out->utcoff_seconds = utcoffset;
774     out->utcoff = load_timedelta(utcoffset);
775     if (out->utcoff == NULL) {
776         return -1;
777     }
778 
779     out->dstoff = load_timedelta(dstoffset);
780     if (out->dstoff == NULL) {
781         return -1;
782     }
783 
784     out->tzname = tzname;
785     Py_INCREF(tzname);
786 
787     return 0;
788 }
789 
790 /* Decrease reference count on any non-NULL members of a _ttinfo  */
791 static void
xdecref_ttinfo(_ttinfo * ttinfo)792 xdecref_ttinfo(_ttinfo *ttinfo)
793 {
794     if (ttinfo != NULL) {
795         Py_XDECREF(ttinfo->utcoff);
796         Py_XDECREF(ttinfo->dstoff);
797         Py_XDECREF(ttinfo->tzname);
798     }
799 }
800 
801 /* Equality function for _ttinfo. */
802 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)803 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
804 {
805     int rv;
806     if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
807         1) {
808         goto end;
809     }
810 
811     if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
812         1) {
813         goto end;
814     }
815 
816     if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
817         1) {
818         goto end;
819     }
820 end:
821     return rv;
822 }
823 
824 /* Given a file-like object, this populates a ZoneInfo object
825  *
826  * The current version calls into a Python function to read the data from
827  * file into Python objects, and this translates those Python objects into
828  * C values and calculates derived values (e.g. dstoff) in C.
829  *
830  * This returns 0 on success and -1 on failure.
831  *
832  * The function will never return while `self` is partially initialized —
833  * the object only needs to be freed / deallocated if this succeeds.
834  */
835 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)836 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
837 {
838     PyObject *data_tuple = NULL;
839 
840     long *utcoff = NULL;
841     long *dstoff = NULL;
842     size_t *trans_idx = NULL;
843     unsigned char *isdst = NULL;
844 
845     self->trans_list_utc = NULL;
846     self->trans_list_wall[0] = NULL;
847     self->trans_list_wall[1] = NULL;
848     self->trans_ttinfos = NULL;
849     self->_ttinfos = NULL;
850     self->file_repr = NULL;
851 
852     size_t ttinfos_allocated = 0;
853 
854     data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
855 
856     if (data_tuple == NULL) {
857         goto error;
858     }
859 
860     if (!PyTuple_CheckExact(data_tuple)) {
861         PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
862                      data_tuple);
863         goto error;
864     }
865 
866     // Unpack the data tuple
867     PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
868     if (trans_idx_list == NULL) {
869         goto error;
870     }
871 
872     PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
873     if (trans_utc == NULL) {
874         goto error;
875     }
876 
877     PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
878     if (utcoff_list == NULL) {
879         goto error;
880     }
881 
882     PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
883     if (isdst_list == NULL) {
884         goto error;
885     }
886 
887     PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
888     if (abbr == NULL) {
889         goto error;
890     }
891 
892     PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
893     if (tz_str == NULL) {
894         goto error;
895     }
896 
897     // Load the relevant sizes
898     Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
899     if (num_transitions < 0) {
900         goto error;
901     }
902 
903     Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
904     if (num_ttinfos < 0) {
905         goto error;
906     }
907 
908     self->num_transitions = (size_t)num_transitions;
909     self->num_ttinfos = (size_t)num_ttinfos;
910 
911     // Load the transition indices and list
912     self->trans_list_utc =
913         PyMem_Malloc(self->num_transitions * sizeof(int64_t));
914     if (self->trans_list_utc == NULL) {
915         goto error;
916     }
917     trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
918     if (trans_idx == NULL) {
919         goto error;
920     }
921 
922     for (size_t i = 0; i < self->num_transitions; ++i) {
923         PyObject *num = PyTuple_GetItem(trans_utc, i);
924         if (num == NULL) {
925             goto error;
926         }
927         self->trans_list_utc[i] = PyLong_AsLongLong(num);
928         if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
929             goto error;
930         }
931 
932         num = PyTuple_GetItem(trans_idx_list, i);
933         if (num == NULL) {
934             goto error;
935         }
936 
937         Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
938         if (cur_trans_idx == -1) {
939             goto error;
940         }
941 
942         trans_idx[i] = (size_t)cur_trans_idx;
943         if (trans_idx[i] > self->num_ttinfos) {
944             PyErr_Format(
945                 PyExc_ValueError,
946                 "Invalid transition index found while reading TZif: %zd",
947                 cur_trans_idx);
948 
949             goto error;
950         }
951     }
952 
953     // Load UTC offsets and isdst (size num_ttinfos)
954     utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
955     isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
956 
957     if (utcoff == NULL || isdst == NULL) {
958         goto error;
959     }
960     for (size_t i = 0; i < self->num_ttinfos; ++i) {
961         PyObject *num = PyTuple_GetItem(utcoff_list, i);
962         if (num == NULL) {
963             goto error;
964         }
965 
966         utcoff[i] = PyLong_AsLong(num);
967         if (utcoff[i] == -1 && PyErr_Occurred()) {
968             goto error;
969         }
970 
971         num = PyTuple_GetItem(isdst_list, i);
972         if (num == NULL) {
973             goto error;
974         }
975 
976         int isdst_with_error = PyObject_IsTrue(num);
977         if (isdst_with_error == -1) {
978             goto error;
979         }
980         else {
981             isdst[i] = (unsigned char)isdst_with_error;
982         }
983     }
984 
985     dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
986     if (dstoff == NULL) {
987         goto error;
988     }
989 
990     // Derive dstoff and trans_list_wall from the information we've loaded
991     utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
992                      self->num_ttinfos);
993 
994     if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
995                     self->trans_list_wall, self->num_ttinfos,
996                     self->num_transitions)) {
997         goto error;
998     }
999 
1000     // Build _ttinfo objects from utcoff, dstoff and abbr
1001     self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1002     if (self->_ttinfos == NULL) {
1003         goto error;
1004     }
1005     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1006         PyObject *tzname = PyTuple_GetItem(abbr, i);
1007         if (tzname == NULL) {
1008             goto error;
1009         }
1010 
1011         ttinfos_allocated++;
1012         if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1013             goto error;
1014         }
1015     }
1016 
1017     // Build our mapping from transition to the ttinfo that applies
1018     self->trans_ttinfos =
1019         PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1020     if (self->trans_ttinfos == NULL) {
1021         goto error;
1022     }
1023     for (size_t i = 0; i < self->num_transitions; ++i) {
1024         size_t ttinfo_idx = trans_idx[i];
1025         assert(ttinfo_idx < self->num_ttinfos);
1026         self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1027     }
1028 
1029     // Set ttinfo_before to the first non-DST transition
1030     for (size_t i = 0; i < self->num_ttinfos; ++i) {
1031         if (!isdst[i]) {
1032             self->ttinfo_before = &(self->_ttinfos[i]);
1033             break;
1034         }
1035     }
1036 
1037     // If there are only DST ttinfos, pick the first one, if there are no
1038     // ttinfos at all, set ttinfo_before to NULL
1039     if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1040         self->ttinfo_before = &(self->_ttinfos[0]);
1041     }
1042 
1043     if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1044         if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1045             goto error;
1046         }
1047     }
1048     else {
1049         if (!self->num_ttinfos) {
1050             PyErr_Format(PyExc_ValueError, "No time zone information found.");
1051             goto error;
1052         }
1053 
1054         size_t idx;
1055         if (!self->num_transitions) {
1056             idx = self->num_ttinfos - 1;
1057         }
1058         else {
1059             idx = trans_idx[self->num_transitions - 1];
1060         }
1061 
1062         _ttinfo *tti = &(self->_ttinfos[idx]);
1063         build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1064                      &(self->tzrule_after));
1065 
1066         // We've abused the build_tzrule constructor to construct an STD-only
1067         // rule mimicking whatever ttinfo we've picked up, but it's possible
1068         // that the one we've picked up is a DST zone, so we need to make sure
1069         // that the dstoff is set correctly in that case.
1070         if (PyObject_IsTrue(tti->dstoff)) {
1071             _ttinfo *tti_after = &(self->tzrule_after.std);
1072             Py_DECREF(tti_after->dstoff);
1073             tti_after->dstoff = tti->dstoff;
1074             Py_INCREF(tti_after->dstoff);
1075         }
1076     }
1077 
1078     // Determine if this is a "fixed offset" zone, meaning that the output of
1079     // the utcoffset, dst and tzname functions does not depend on the specific
1080     // datetime passed.
1081     //
1082     // We make three simplifying assumptions here:
1083     //
1084     // 1. If tzrule_after is not std_only, it has transitions that might occur
1085     //    (it is possible to construct TZ strings that specify STD and DST but
1086     //    no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1087     // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1088     //    represent different offsets.
1089     // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1090     //    fixed-offset zone with extra _ttinfos defined may appear to *not* be
1091     //    a fixed offset zone).
1092     //
1093     // Violations to these assumptions would be fairly exotic, and exotic
1094     // zones should almost certainly not be used with datetime.time (the
1095     // only thing that would be affected by this).
1096     if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1097         self->fixed_offset = 0;
1098     }
1099     else if (self->num_ttinfos == 0) {
1100         self->fixed_offset = 1;
1101     }
1102     else {
1103         int constant_offset =
1104             ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1105         if (constant_offset < 0) {
1106             goto error;
1107         }
1108         else {
1109             self->fixed_offset = constant_offset;
1110         }
1111     }
1112 
1113     int rv = 0;
1114     goto cleanup;
1115 error:
1116     // These resources only need to be freed if we have failed, if we succeed
1117     // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1118     // method to free the relevant resources.
1119     if (self->trans_list_utc != NULL) {
1120         PyMem_Free(self->trans_list_utc);
1121         self->trans_list_utc = NULL;
1122     }
1123 
1124     for (size_t i = 0; i < 2; ++i) {
1125         if (self->trans_list_wall[i] != NULL) {
1126             PyMem_Free(self->trans_list_wall[i]);
1127             self->trans_list_wall[i] = NULL;
1128         }
1129     }
1130 
1131     if (self->_ttinfos != NULL) {
1132         for (size_t i = 0; i < ttinfos_allocated; ++i) {
1133             xdecref_ttinfo(&(self->_ttinfos[i]));
1134         }
1135         PyMem_Free(self->_ttinfos);
1136         self->_ttinfos = NULL;
1137     }
1138 
1139     if (self->trans_ttinfos != NULL) {
1140         PyMem_Free(self->trans_ttinfos);
1141         self->trans_ttinfos = NULL;
1142     }
1143 
1144     rv = -1;
1145 cleanup:
1146     Py_XDECREF(data_tuple);
1147 
1148     if (utcoff != NULL) {
1149         PyMem_Free(utcoff);
1150     }
1151 
1152     if (dstoff != NULL) {
1153         PyMem_Free(dstoff);
1154     }
1155 
1156     if (isdst != NULL) {
1157         PyMem_Free(isdst);
1158     }
1159 
1160     if (trans_idx != NULL) {
1161         PyMem_Free(trans_idx);
1162     }
1163 
1164     return rv;
1165 }
1166 
1167 /* Function to calculate the local timestamp of a transition from the year. */
1168 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1169 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1170 {
1171     CalendarRule *self = (CalendarRule *)base_self;
1172 
1173     // We want (year, month, day of month); we have year and month, but we
1174     // need to turn (week, day-of-week) into day-of-month
1175     //
1176     // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1177     // Week 5 represents the last occurrence of day `day`, so we need to know
1178     // the first weekday of the month and the number of days in the month.
1179     int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1180     uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1181     if (self->month == 2 && is_leap_year(year)) {
1182         days_in_month += 1;
1183     }
1184 
1185     // This equation seems magical, so I'll break it down:
1186     // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1187     //    + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1188     //    because this math is mod 7
1189     // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1190     //    numbers so that -1 % 7 = 6).
1191     // 3. Add 1 because month days are a 1-based index.
1192     int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1193     if (month_day < 0) {
1194         month_day += 7;
1195     }
1196     month_day += 1;
1197 
1198     // Now use a 0-based index version of `week` to calculate the w-th
1199     // occurrence of `day`
1200     month_day += ((int8_t)(self->week) - 1) * 7;
1201 
1202     // month_day will only be > days_in_month if w was 5, and `w` means "last
1203     // occurrence of `d`", so now we just check if we over-shot the end of the
1204     // month and if so knock off 1 week.
1205     if (month_day > days_in_month) {
1206         month_day -= 7;
1207     }
1208 
1209     int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1210     return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1211             (int64_t)(self->minute * 60) + (int64_t)(self->second));
1212 }
1213 
1214 /* Constructor for CalendarRule. */
1215 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1216 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1217                  int8_t minute, int8_t second, CalendarRule *out)
1218 {
1219     // These bounds come from the POSIX standard, which describes an Mm.n.d
1220     // rule as:
1221     //
1222     //   The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1223     //   5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1224     //   may occur in either the fourth or the fifth week). Week 1 is the first
1225     //   week in which the d'th day occurs. Day zero is Sunday.
1226     if (month <= 0 || month > 12) {
1227         PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1228         return -1;
1229     }
1230 
1231     if (week <= 0 || week > 5) {
1232         PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1233         return -1;
1234     }
1235 
1236     // If the 'day' parameter type is changed to a signed type,
1237     // "day < 0" check must be added.
1238     if (/* day < 0 || */ day > 6) {
1239         PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1240         return -1;
1241     }
1242 
1243     TransitionRuleType base = {&calendarrule_year_to_timestamp};
1244 
1245     CalendarRule new_offset = {
1246         .base = base,
1247         .month = month,
1248         .week = week,
1249         .day = day,
1250         .hour = hour,
1251         .minute = minute,
1252         .second = second,
1253     };
1254 
1255     *out = new_offset;
1256     return 0;
1257 }
1258 
1259 /* Function to calculate the local timestamp of a transition from the year.
1260  *
1261  * This translates the day of the year into a local timestamp — either a
1262  * 1-based Julian day, not including leap days, or the 0-based year-day,
1263  * including leap days.
1264  * */
1265 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1266 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1267 {
1268     // The function signature requires a TransitionRuleType pointer, but this
1269     // function is only applicable to DayRule* objects.
1270     DayRule *self = (DayRule *)base_self;
1271 
1272     // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1273     // to know the number of days since 1970-01-01, so we must subtract off
1274     // the equivalent of ymd_to_ord(1970, 1, 1).
1275     //
1276     // We subtract off an additional 1 day to account for January 1st (we want
1277     // the number of full days *before* the date of the transition - partial
1278     // days are accounted for in the hour, minute and second portions.
1279     int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1280 
1281     // The Julian day specification skips over February 29th in leap years,
1282     // from the POSIX standard:
1283     //
1284     //   Leap days shall not be counted. That is, in all years-including leap
1285     //   years-February 28 is day 59 and March 1 is day 60. It is impossible to
1286     //   refer explicitly to the occasional February 29.
1287     //
1288     // This is actually more useful than you'd think — if you want a rule that
1289     // always transitions on a given calendar day (other than February 29th),
1290     // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1291     // always refers to December 31st.
1292     unsigned int day = self->day;
1293     if (self->julian && day >= 59 && is_leap_year(year)) {
1294         day += 1;
1295     }
1296 
1297     return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1298            (self->minute * 60) + self->second;
1299 }
1300 
1301 /* Constructor for DayRule. */
1302 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1303 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1304             int8_t second, DayRule *out)
1305 {
1306     // The POSIX standard specifies that Julian days must be in the range (1 <=
1307     // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1308     // be in the range (0 <= n <= 365).
1309     if (day < julian || day > 365) {
1310         PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1311                      julian, day);
1312         return -1;
1313     }
1314 
1315     TransitionRuleType base = {
1316         &dayrule_year_to_timestamp,
1317     };
1318 
1319     DayRule tmp = {
1320         .base = base,
1321         .julian = julian,
1322         .day = day,
1323         .hour = hour,
1324         .minute = minute,
1325         .second = second,
1326     };
1327 
1328     *out = tmp;
1329 
1330     return 0;
1331 }
1332 
1333 /* Calculate the start and end rules for a _tzrule in the given year. */
1334 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1335 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1336 {
1337     assert(rule->start != NULL);
1338     assert(rule->end != NULL);
1339     *start = rule->start->year_to_timestamp(rule->start, year);
1340     *end = rule->end->year_to_timestamp(rule->end, year);
1341 }
1342 
1343 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1344  *
1345  * This takes a local timestamp and fold for disambiguation purposes; the year
1346  * could technically be calculated from the timestamp, but given that the
1347  * callers of this function already have the year information accessible from
1348  * the datetime struct, it is taken as an additional parameter to reduce
1349  * unnecessary calculation.
1350  * */
1351 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1352 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1353 {
1354     if (rule->std_only) {
1355         return &(rule->std);
1356     }
1357 
1358     int64_t start, end;
1359     uint8_t isdst;
1360 
1361     tzrule_transitions(rule, year, &start, &end);
1362 
1363     // With fold = 0, the period (denominated in local time) with the smaller
1364     // offset starts at the end of the gap and ends at the end of the fold;
1365     // with fold = 1, it runs from the start of the gap to the beginning of the
1366     // fold.
1367     //
1368     // So in order to determine the DST boundaries we need to know both the
1369     // fold and whether DST is positive or negative (rare), and it turns out
1370     // that this boils down to fold XOR is_positive.
1371     if (fold == (rule->dst_diff >= 0)) {
1372         end -= rule->dst_diff;
1373     }
1374     else {
1375         start += rule->dst_diff;
1376     }
1377 
1378     if (start < end) {
1379         isdst = (ts >= start) && (ts < end);
1380     }
1381     else {
1382         isdst = (ts < end) || (ts >= start);
1383     }
1384 
1385     if (isdst) {
1386         return &(rule->dst);
1387     }
1388     else {
1389         return &(rule->std);
1390     }
1391 }
1392 
1393 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1394  *
1395  * This function can determine the _ttinfo that applies at a given epoch time,
1396  * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1397  * This is to be used in the .fromutc() function.
1398  *
1399  * The year is technically a redundant parameter, because it can be calculated
1400  * from the timestamp, but all callers of this function should have the year
1401  * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1402  * calculation.
1403  **/
1404 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1405 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1406                            unsigned char *fold)
1407 {
1408     if (rule->std_only) {
1409         *fold = 0;
1410         return &(rule->std);
1411     }
1412 
1413     int64_t start, end;
1414     uint8_t isdst;
1415     tzrule_transitions(rule, year, &start, &end);
1416     start -= rule->std.utcoff_seconds;
1417     end -= rule->dst.utcoff_seconds;
1418 
1419     if (start < end) {
1420         isdst = (ts >= start) && (ts < end);
1421     }
1422     else {
1423         isdst = (ts < end) || (ts >= start);
1424     }
1425 
1426     // For positive DST, the ambiguous period is one dst_diff after the end of
1427     // DST; for negative DST, the ambiguous period is one dst_diff before the
1428     // start of DST.
1429     int64_t ambig_start, ambig_end;
1430     if (rule->dst_diff > 0) {
1431         ambig_start = end;
1432         ambig_end = end + rule->dst_diff;
1433     }
1434     else {
1435         ambig_start = start;
1436         ambig_end = start - rule->dst_diff;
1437     }
1438 
1439     *fold = (ts >= ambig_start) && (ts < ambig_end);
1440 
1441     if (isdst) {
1442         return &(rule->dst);
1443     }
1444     else {
1445         return &(rule->std);
1446     }
1447 }
1448 
1449 /* Parse a TZ string in the format specified by the POSIX standard:
1450  *
1451  *  std offset[dst[offset],start[/time],end[/time]]
1452  *
1453  *  std and dst must be 3 or more characters long and must not contain a
1454  *  leading colon, embedded digits, commas, nor a plus or minus signs; The
1455  *  spaces between "std" and "offset" are only for display and are not actually
1456  *  present in the string.
1457  *
1458  *  The format of the offset is ``[+|-]hh[:mm[:ss]]``
1459  *
1460  * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1461  *
1462  * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1463  */
1464 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1465 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1466 {
1467     PyObject *std_abbr = NULL;
1468     PyObject *dst_abbr = NULL;
1469     TransitionRuleType *start = NULL;
1470     TransitionRuleType *end = NULL;
1471     // Initialize offsets to invalid value (> 24 hours)
1472     long std_offset = 1 << 20;
1473     long dst_offset = 1 << 20;
1474 
1475     const char *tz_str = PyBytes_AsString(tz_str_obj);
1476     if (tz_str == NULL) {
1477         return -1;
1478     }
1479     const char *p = tz_str;
1480 
1481     // Read the `std` abbreviation, which must be at least 3 characters long.
1482     Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1483     if (num_chars < 1) {
1484         PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1485         goto error;
1486     }
1487 
1488     p += num_chars;
1489 
1490     // Now read the STD offset, which is required
1491     num_chars = parse_tz_delta(p, &std_offset);
1492     if (num_chars < 0) {
1493         PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1494         goto error;
1495     }
1496     p += num_chars;
1497 
1498     // If the string ends here, there is no DST, otherwise we must parse the
1499     // DST abbreviation and start and end dates and times.
1500     if (*p == '\0') {
1501         goto complete;
1502     }
1503 
1504     num_chars = parse_abbr(p, &dst_abbr);
1505     if (num_chars < 1) {
1506         PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1507         goto error;
1508     }
1509     p += num_chars;
1510 
1511     if (*p == ',') {
1512         // From the POSIX standard:
1513         //
1514         // If no offset follows dst, the alternative time is assumed to be one
1515         // hour ahead of standard time.
1516         dst_offset = std_offset + 3600;
1517     }
1518     else {
1519         num_chars = parse_tz_delta(p, &dst_offset);
1520         if (num_chars < 0) {
1521             PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1522                          tz_str_obj);
1523             goto error;
1524         }
1525 
1526         p += num_chars;
1527     }
1528 
1529     TransitionRuleType **transitions[2] = {&start, &end};
1530     for (size_t i = 0; i < 2; ++i) {
1531         if (*p != ',') {
1532             PyErr_Format(PyExc_ValueError,
1533                          "Missing transition rules in TZ string: %R",
1534                          tz_str_obj);
1535             goto error;
1536         }
1537         p++;
1538 
1539         num_chars = parse_transition_rule(p, transitions[i]);
1540         if (num_chars < 0) {
1541             PyErr_Format(PyExc_ValueError,
1542                          "Malformed transition rule in TZ string: %R",
1543                          tz_str_obj);
1544             goto error;
1545         }
1546         p += num_chars;
1547     }
1548 
1549     if (*p != '\0') {
1550         PyErr_Format(PyExc_ValueError,
1551                      "Extraneous characters at end of TZ string: %R",
1552                      tz_str_obj);
1553         goto error;
1554     }
1555 
1556 complete:
1557     build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1558     Py_DECREF(std_abbr);
1559     Py_XDECREF(dst_abbr);
1560 
1561     return 0;
1562 error:
1563     Py_XDECREF(std_abbr);
1564     if (dst_abbr != NULL && dst_abbr != Py_None) {
1565         Py_DECREF(dst_abbr);
1566     }
1567 
1568     if (start != NULL) {
1569         PyMem_Free(start);
1570     }
1571 
1572     if (end != NULL) {
1573         PyMem_Free(end);
1574     }
1575 
1576     return -1;
1577 }
1578 
1579 static int
parse_uint(const char * const p,uint8_t * value)1580 parse_uint(const char *const p, uint8_t *value)
1581 {
1582     if (!isdigit(*p)) {
1583         return -1;
1584     }
1585 
1586     *value = (*p) - '0';
1587     return 0;
1588 }
1589 
1590 /* Parse the STD and DST abbreviations from a TZ string. */
1591 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1592 parse_abbr(const char *const p, PyObject **abbr)
1593 {
1594     const char *ptr = p;
1595     char buff = *ptr;
1596     const char *str_start;
1597     const char *str_end;
1598 
1599     if (*ptr == '<') {
1600         ptr++;
1601         str_start = ptr;
1602         while ((buff = *ptr) != '>') {
1603             // From the POSIX standard:
1604             //
1605             //   In the quoted form, the first character shall be the less-than
1606             //   ( '<' ) character and the last character shall be the
1607             //   greater-than ( '>' ) character. All characters between these
1608             //   quoting characters shall be alphanumeric characters from the
1609             //   portable character set in the current locale, the plus-sign (
1610             //   '+' ) character, or the minus-sign ( '-' ) character. The std
1611             //   and dst fields in this case shall not include the quoting
1612             //   characters.
1613             if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1614                 buff != '-') {
1615                 return -1;
1616             }
1617             ptr++;
1618         }
1619         str_end = ptr;
1620         ptr++;
1621     }
1622     else {
1623         str_start = p;
1624         // From the POSIX standard:
1625         //
1626         //   In the unquoted form, all characters in these fields shall be
1627         //   alphabetic characters from the portable character set in the
1628         //   current locale.
1629         while (isalpha(*ptr)) {
1630             ptr++;
1631         }
1632         str_end = ptr;
1633     }
1634 
1635     *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1636     if (*abbr == NULL) {
1637         return -1;
1638     }
1639 
1640     return ptr - p;
1641 }
1642 
1643 /* Parse a UTC offset from a TZ str. */
1644 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1645 parse_tz_delta(const char *const p, long *total_seconds)
1646 {
1647     // From the POSIX spec:
1648     //
1649     //   Indicates the value added to the local time to arrive at Coordinated
1650     //   Universal Time. The offset has the form:
1651     //
1652     //   hh[:mm[:ss]]
1653     //
1654     //   One or more digits may be used; the value is always interpreted as a
1655     //   decimal number.
1656     //
1657     // The POSIX spec says that the values for `hour` must be between 0 and 24
1658     // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1659     // transition times may be signed and range from -167 to 167.
1660     long sign = -1;
1661     long hours = 0;
1662     long minutes = 0;
1663     long seconds = 0;
1664 
1665     const char *ptr = p;
1666     char buff = *ptr;
1667     if (buff == '-' || buff == '+') {
1668         // Negative numbers correspond to *positive* offsets, from the spec:
1669         //
1670         //   If preceded by a '-', the timezone shall be east of the Prime
1671         //   Meridian; otherwise, it shall be west (which may be indicated by
1672         //   an optional preceding '+' ).
1673         if (buff == '-') {
1674             sign = 1;
1675         }
1676 
1677         ptr++;
1678     }
1679 
1680     // The hour can be 1 or 2 numeric characters
1681     for (size_t i = 0; i < 2; ++i) {
1682         buff = *ptr;
1683         if (!isdigit(buff)) {
1684             if (i == 0) {
1685                 return -1;
1686             }
1687             else {
1688                 break;
1689             }
1690         }
1691 
1692         hours *= 10;
1693         hours += buff - '0';
1694         ptr++;
1695     }
1696 
1697     if (hours > 24 || hours < 0) {
1698         return -1;
1699     }
1700 
1701     // Minutes and seconds always of the format ":dd"
1702     long *outputs[2] = {&minutes, &seconds};
1703     for (size_t i = 0; i < 2; ++i) {
1704         if (*ptr != ':') {
1705             goto complete;
1706         }
1707         ptr++;
1708 
1709         for (size_t j = 0; j < 2; ++j) {
1710             buff = *ptr;
1711             if (!isdigit(buff)) {
1712                 return -1;
1713             }
1714             *(outputs[i]) *= 10;
1715             *(outputs[i]) += buff - '0';
1716             ptr++;
1717         }
1718     }
1719 
1720 complete:
1721     *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1722 
1723     return ptr - p;
1724 }
1725 
1726 /* Parse the date portion of a transition rule. */
1727 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1728 parse_transition_rule(const char *const p, TransitionRuleType **out)
1729 {
1730     // The full transition rule indicates when to change back and forth between
1731     // STD and DST, and has the form:
1732     //
1733     //   date[/time],date[/time]
1734     //
1735     // This function parses an individual date[/time] section, and returns
1736     // the number of characters that contributed to the transition rule. This
1737     // does not include the ',' at the end of the first rule.
1738     //
1739     // The POSIX spec states that if *time* is not given, the default is 02:00.
1740     const char *ptr = p;
1741     int8_t hour = 2;
1742     int8_t minute = 0;
1743     int8_t second = 0;
1744 
1745     // Rules come in one of three flavors:
1746     //
1747     //   1. Jn: Julian day n, with no leap days.
1748     //   2. n: Day of year (0-based, with leap days)
1749     //   3. Mm.n.d: Specifying by month, week and day-of-week.
1750 
1751     if (*ptr == 'M') {
1752         uint8_t month, week, day;
1753         ptr++;
1754         if (parse_uint(ptr, &month)) {
1755             return -1;
1756         }
1757         ptr++;
1758         if (*ptr != '.') {
1759             uint8_t tmp;
1760             if (parse_uint(ptr, &tmp)) {
1761                 return -1;
1762             }
1763 
1764             month *= 10;
1765             month += tmp;
1766             ptr++;
1767         }
1768 
1769         uint8_t *values[2] = {&week, &day};
1770         for (size_t i = 0; i < 2; ++i) {
1771             if (*ptr != '.') {
1772                 return -1;
1773             }
1774             ptr++;
1775 
1776             if (parse_uint(ptr, values[i])) {
1777                 return -1;
1778             }
1779             ptr++;
1780         }
1781 
1782         if (*ptr == '/') {
1783             ptr++;
1784             Py_ssize_t num_chars =
1785                 parse_transition_time(ptr, &hour, &minute, &second);
1786             if (num_chars < 0) {
1787                 return -1;
1788             }
1789             ptr += num_chars;
1790         }
1791 
1792         CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1793         if (rv == NULL) {
1794             return -1;
1795         }
1796 
1797         if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1798             PyMem_Free(rv);
1799             return -1;
1800         }
1801 
1802         *out = (TransitionRuleType *)rv;
1803     }
1804     else {
1805         uint8_t julian = 0;
1806         unsigned int day = 0;
1807         if (*ptr == 'J') {
1808             julian = 1;
1809             ptr++;
1810         }
1811 
1812         for (size_t i = 0; i < 3; ++i) {
1813             if (!isdigit(*ptr)) {
1814                 if (i == 0) {
1815                     return -1;
1816                 }
1817                 break;
1818             }
1819             day *= 10;
1820             day += (*ptr) - '0';
1821             ptr++;
1822         }
1823 
1824         if (*ptr == '/') {
1825             ptr++;
1826             Py_ssize_t num_chars =
1827                 parse_transition_time(ptr, &hour, &minute, &second);
1828             if (num_chars < 0) {
1829                 return -1;
1830             }
1831             ptr += num_chars;
1832         }
1833 
1834         DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1835         if (rv == NULL) {
1836             return -1;
1837         }
1838 
1839         if (dayrule_new(julian, day, hour, minute, second, rv)) {
1840             PyMem_Free(rv);
1841             return -1;
1842         }
1843         *out = (TransitionRuleType *)rv;
1844     }
1845 
1846     return ptr - p;
1847 }
1848 
1849 /* Parse the time portion of a transition rule (e.g. following an /) */
1850 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1851 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1852                       int8_t *second)
1853 {
1854     // From the spec:
1855     //
1856     //   The time has the same format as offset except that no leading sign
1857     //   ( '-' or '+' ) is allowed.
1858     //
1859     // The format for the offset is:
1860     //
1861     //   h[h][:mm[:ss]]
1862     //
1863     // RFC 8536 also allows transition times to be signed and to range from
1864     // -167 to +167, but the current version only supports [0, 99].
1865     //
1866     // TODO: Support the full range of transition hours.
1867     int8_t *components[3] = {hour, minute, second};
1868     const char *ptr = p;
1869     int8_t sign = 1;
1870 
1871     if (*ptr == '-' || *ptr == '+') {
1872         if (*ptr == '-') {
1873             sign = -1;
1874         }
1875         ptr++;
1876     }
1877 
1878     for (size_t i = 0; i < 3; ++i) {
1879         if (i > 0) {
1880             if (*ptr != ':') {
1881                 break;
1882             }
1883             ptr++;
1884         }
1885 
1886         uint8_t buff = 0;
1887         for (size_t j = 0; j < 2; j++) {
1888             if (!isdigit(*ptr)) {
1889                 if (i == 0 && j > 0) {
1890                     break;
1891                 }
1892                 return -1;
1893             }
1894 
1895             buff *= 10;
1896             buff += (*ptr) - '0';
1897             ptr++;
1898         }
1899 
1900         *(components[i]) = sign * buff;
1901     }
1902 
1903     return ptr - p;
1904 }
1905 
1906 /* Constructor for a _tzrule.
1907  *
1908  * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1909  * case `dst_offset` will be ignored and `start` and `end` are expected to be
1910  * NULL as well.
1911  *
1912  * Returns 0 on success.
1913  */
1914 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1915 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1916              long dst_offset, TransitionRuleType *start,
1917              TransitionRuleType *end, _tzrule *out)
1918 {
1919     _tzrule rv = {{0}};
1920 
1921     rv.start = start;
1922     rv.end = end;
1923 
1924     if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1925         goto error;
1926     }
1927 
1928     if (dst_abbr != NULL) {
1929         rv.dst_diff = dst_offset - std_offset;
1930         if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1931             goto error;
1932         }
1933     }
1934     else {
1935         rv.std_only = 1;
1936     }
1937 
1938     *out = rv;
1939 
1940     return 0;
1941 error:
1942     xdecref_ttinfo(&rv.std);
1943     xdecref_ttinfo(&rv.dst);
1944     return -1;
1945 }
1946 
1947 /* Destructor for _tzrule. */
1948 static void
free_tzrule(_tzrule * tzrule)1949 free_tzrule(_tzrule *tzrule)
1950 {
1951     xdecref_ttinfo(&(tzrule->std));
1952     if (!tzrule->std_only) {
1953         xdecref_ttinfo(&(tzrule->dst));
1954     }
1955 
1956     if (tzrule->start != NULL) {
1957         PyMem_Free(tzrule->start);
1958     }
1959 
1960     if (tzrule->end != NULL) {
1961         PyMem_Free(tzrule->end);
1962     }
1963 }
1964 
1965 /* Calculate DST offsets from transitions and UTC offsets
1966  *
1967  * This is necessary because each C `ttinfo` only contains the UTC offset,
1968  * time zone abbreviation and an isdst boolean - it does not include the
1969  * amount of the DST offset, but we need the amount for the dst() function.
1970  *
1971  * Thus function uses heuristics to infer what the offset should be, so it
1972  * is not guaranteed that this will work for all zones. If we cannot assign
1973  * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1974  * bool(dt.dst()) will always match ttinfo.isdst.
1975  */
1976 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1977 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1978                  unsigned char *isdsts, size_t num_transitions,
1979                  size_t num_ttinfos)
1980 {
1981     size_t dst_count = 0;
1982     size_t dst_found = 0;
1983     for (size_t i = 0; i < num_ttinfos; ++i) {
1984         dst_count++;
1985     }
1986 
1987     for (size_t i = 1; i < num_transitions; ++i) {
1988         if (dst_count == dst_found) {
1989             break;
1990         }
1991 
1992         size_t idx = trans_idx[i];
1993         size_t comp_idx = trans_idx[i - 1];
1994 
1995         // Only look at DST offsets that have nto been assigned already
1996         if (!isdsts[idx] || dstoffs[idx] != 0) {
1997             continue;
1998         }
1999 
2000         long dstoff = 0;
2001         long utcoff = utcoffs[idx];
2002 
2003         if (!isdsts[comp_idx]) {
2004             dstoff = utcoff - utcoffs[comp_idx];
2005         }
2006 
2007         if (!dstoff && idx < (num_ttinfos - 1)) {
2008             comp_idx = trans_idx[i + 1];
2009 
2010             // If the following transition is also DST and we couldn't find
2011             // the DST offset by this point, we're going to have to skip it
2012             // and hope this transition gets assigned later
2013             if (isdsts[comp_idx]) {
2014                 continue;
2015             }
2016 
2017             dstoff = utcoff - utcoffs[comp_idx];
2018         }
2019 
2020         if (dstoff) {
2021             dst_found++;
2022             dstoffs[idx] = dstoff;
2023         }
2024     }
2025 
2026     if (dst_found < dst_count) {
2027         // If there are time zones we didn't find a value for, we'll end up
2028         // with dstoff = 0 for something where isdst=1. This is obviously
2029         // wrong — one hour will be a much better guess than 0.
2030         for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2031             if (isdsts[idx] && !dstoffs[idx]) {
2032                 dstoffs[idx] = 3600;
2033             }
2034         }
2035     }
2036 }
2037 
2038 #define _swap(x, y, buffer) \
2039     buffer = x;             \
2040     x = y;                  \
2041     y = buffer;
2042 
2043 /* Calculate transitions in local time from UTC time and offsets.
2044  *
2045  * We want to know when each transition occurs, denominated in the number of
2046  * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2047  * *local time* (note: this is *not* equivalent to the output of
2048  * datetime.timestamp, which is the total number of seconds actual elapsed
2049  * since 1970-01-01T00:00:00Z in UTC).
2050  *
2051  * This is an ambiguous question because "local time" can be ambiguous — but it
2052  * is disambiguated by the `fold` parameter, so we allocate two arrays:
2053  *
2054  *  trans_local[0]: The wall-time transitions for fold=0
2055  *  trans_local[1]: The wall-time transitions for fold=1
2056  *
2057  * This returns 0 on success and a negative number of failure. The trans_local
2058  * arrays must be freed if they are not NULL.
2059  */
2060 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2061 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2062             int64_t *trans_local[2], size_t num_ttinfos,
2063             size_t num_transitions)
2064 {
2065     if (num_transitions == 0) {
2066         return 0;
2067     }
2068 
2069     // Copy the UTC transitions into each array to be modified in place later
2070     for (size_t i = 0; i < 2; ++i) {
2071         trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2072         if (trans_local[i] == NULL) {
2073             return -1;
2074         }
2075 
2076         memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2077     }
2078 
2079     int64_t offset_0, offset_1, buff;
2080     if (num_ttinfos > 1) {
2081         offset_0 = utcoff[0];
2082         offset_1 = utcoff[trans_idx[0]];
2083 
2084         if (offset_1 > offset_0) {
2085             _swap(offset_0, offset_1, buff);
2086         }
2087     }
2088     else {
2089         offset_0 = utcoff[0];
2090         offset_1 = utcoff[0];
2091     }
2092 
2093     trans_local[0][0] += offset_0;
2094     trans_local[1][0] += offset_1;
2095 
2096     for (size_t i = 1; i < num_transitions; ++i) {
2097         offset_0 = utcoff[trans_idx[i - 1]];
2098         offset_1 = utcoff[trans_idx[i]];
2099 
2100         if (offset_1 > offset_0) {
2101             _swap(offset_1, offset_0, buff);
2102         }
2103 
2104         trans_local[0][i] += offset_0;
2105         trans_local[1][i] += offset_1;
2106     }
2107 
2108     return 0;
2109 }
2110 
2111 /* Simple bisect_right binary search implementation */
2112 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2113 _bisect(const int64_t value, const int64_t *arr, size_t size)
2114 {
2115     size_t lo = 0;
2116     size_t hi = size;
2117     size_t m;
2118 
2119     while (lo < hi) {
2120         m = (lo + hi) / 2;
2121         if (arr[m] > value) {
2122             hi = m;
2123         }
2124         else {
2125             lo = m + 1;
2126         }
2127     }
2128 
2129     return hi;
2130 }
2131 
2132 /* Find the ttinfo rules that apply at a given local datetime. */
2133 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2134 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2135 {
2136     // datetime.time has a .tzinfo attribute that passes None as the dt
2137     // argument; it only really has meaning for fixed-offset zones.
2138     if (dt == Py_None) {
2139         if (self->fixed_offset) {
2140             return &(self->tzrule_after.std);
2141         }
2142         else {
2143             return &NO_TTINFO;
2144         }
2145     }
2146 
2147     int64_t ts;
2148     if (get_local_timestamp(dt, &ts)) {
2149         return NULL;
2150     }
2151 
2152     unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2153     assert(fold < 2);
2154     int64_t *local_transitions = self->trans_list_wall[fold];
2155     size_t num_trans = self->num_transitions;
2156 
2157     if (num_trans && ts < local_transitions[0]) {
2158         return self->ttinfo_before;
2159     }
2160     else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2161         return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2162                                   PyDateTime_GET_YEAR(dt));
2163     }
2164     else {
2165         size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2166         assert(idx < self->num_transitions);
2167         return self->trans_ttinfos[idx];
2168     }
2169 }
2170 
2171 static int
is_leap_year(int year)2172 is_leap_year(int year)
2173 {
2174     const unsigned int ayear = (unsigned int)year;
2175     return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2176 }
2177 
2178 /* Calculates ordinal datetime from year, month and day. */
2179 static int
ymd_to_ord(int y,int m,int d)2180 ymd_to_ord(int y, int m, int d)
2181 {
2182     y -= 1;
2183     int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2184     int yearday = DAYS_BEFORE_MONTH[m];
2185     if (m > 2 && is_leap_year(y + 1)) {
2186         yearday += 1;
2187     }
2188 
2189     return days_before_year + yearday + d;
2190 }
2191 
2192 /* Calculate the number of seconds since 1970-01-01 in local time.
2193  *
2194  * This gets a datetime in the same "units" as self->trans_list_wall so that we
2195  * can easily determine which transitions a datetime falls between. See the
2196  * comment above ts_to_local for more information.
2197  * */
2198 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2199 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2200 {
2201     assert(local_ts != NULL);
2202 
2203     int hour, minute, second;
2204     int ord;
2205     if (PyDateTime_CheckExact(dt)) {
2206         int y = PyDateTime_GET_YEAR(dt);
2207         int m = PyDateTime_GET_MONTH(dt);
2208         int d = PyDateTime_GET_DAY(dt);
2209         hour = PyDateTime_DATE_GET_HOUR(dt);
2210         minute = PyDateTime_DATE_GET_MINUTE(dt);
2211         second = PyDateTime_DATE_GET_SECOND(dt);
2212 
2213         ord = ymd_to_ord(y, m, d);
2214     }
2215     else {
2216         PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2217         if (num == NULL) {
2218             return -1;
2219         }
2220 
2221         ord = PyLong_AsLong(num);
2222         Py_DECREF(num);
2223         if (ord == -1 && PyErr_Occurred()) {
2224             return -1;
2225         }
2226 
2227         num = PyObject_GetAttrString(dt, "hour");
2228         if (num == NULL) {
2229             return -1;
2230         }
2231         hour = PyLong_AsLong(num);
2232         Py_DECREF(num);
2233         if (hour == -1) {
2234             return -1;
2235         }
2236 
2237         num = PyObject_GetAttrString(dt, "minute");
2238         if (num == NULL) {
2239             return -1;
2240         }
2241         minute = PyLong_AsLong(num);
2242         Py_DECREF(num);
2243         if (minute == -1) {
2244             return -1;
2245         }
2246 
2247         num = PyObject_GetAttrString(dt, "second");
2248         if (num == NULL) {
2249             return -1;
2250         }
2251         second = PyLong_AsLong(num);
2252         Py_DECREF(num);
2253         if (second == -1) {
2254             return -1;
2255         }
2256     }
2257 
2258     *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2259                 (int64_t)(hour * 3600 + minute * 60 + second);
2260 
2261     return 0;
2262 }
2263 
2264 /////
2265 // Functions for cache handling
2266 
2267 /* Constructor for StrongCacheNode */
2268 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2269 strong_cache_node_new(PyObject *key, PyObject *zone)
2270 {
2271     StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2272     if (node == NULL) {
2273         return NULL;
2274     }
2275 
2276     Py_INCREF(key);
2277     Py_INCREF(zone);
2278 
2279     node->next = NULL;
2280     node->prev = NULL;
2281     node->key = key;
2282     node->zone = zone;
2283 
2284     return node;
2285 }
2286 
2287 /* Destructor for StrongCacheNode */
2288 void
strong_cache_node_free(StrongCacheNode * node)2289 strong_cache_node_free(StrongCacheNode *node)
2290 {
2291     Py_XDECREF(node->key);
2292     Py_XDECREF(node->zone);
2293 
2294     PyMem_Free(node);
2295 }
2296 
2297 /* Frees all nodes at or after a specified root in the strong cache.
2298  *
2299  * This can be used on the root node to free the entire cache or it can be used
2300  * to clear all nodes that have been expired (which, if everything is going
2301  * right, will actually only be 1 node at a time).
2302  */
2303 void
strong_cache_free(StrongCacheNode * root)2304 strong_cache_free(StrongCacheNode *root)
2305 {
2306     StrongCacheNode *node = root;
2307     StrongCacheNode *next_node;
2308     while (node != NULL) {
2309         next_node = node->next;
2310         strong_cache_node_free(node);
2311 
2312         node = next_node;
2313     }
2314 }
2315 
2316 /* Removes a node from the cache and update its neighbors.
2317  *
2318  * This is used both when ejecting a node from the cache and when moving it to
2319  * the front of the cache.
2320  */
2321 static void
remove_from_strong_cache(StrongCacheNode * node)2322 remove_from_strong_cache(StrongCacheNode *node)
2323 {
2324     if (ZONEINFO_STRONG_CACHE == node) {
2325         ZONEINFO_STRONG_CACHE = node->next;
2326     }
2327 
2328     if (node->prev != NULL) {
2329         node->prev->next = node->next;
2330     }
2331 
2332     if (node->next != NULL) {
2333         node->next->prev = node->prev;
2334     }
2335 
2336     node->next = NULL;
2337     node->prev = NULL;
2338 }
2339 
2340 /* Retrieves the node associated with a key, if it exists.
2341  *
2342  * This traverses the strong cache until it finds a matching key and returns a
2343  * pointer to the relevant node if found. Returns NULL if no node is found.
2344  *
2345  * root may be NULL, indicating an empty cache.
2346  */
2347 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2348 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2349 {
2350     const StrongCacheNode *node = root;
2351     while (node != NULL) {
2352         int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2353         if (rv < 0) {
2354             return NULL;
2355         }
2356         if (rv) {
2357             return (StrongCacheNode *)node;
2358         }
2359 
2360         node = node->next;
2361     }
2362 
2363     return NULL;
2364 }
2365 
2366 /* Ejects a given key from the class's strong cache, if applicable.
2367  *
2368  * This function is used to enable the per-key functionality in clear_cache.
2369  */
2370 static int
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2371 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2372 {
2373     if (type != &PyZoneInfo_ZoneInfoType) {
2374         return 0;
2375     }
2376 
2377     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2378     if (node != NULL) {
2379         remove_from_strong_cache(node);
2380 
2381         strong_cache_node_free(node);
2382     }
2383     else if (PyErr_Occurred()) {
2384         return -1;
2385     }
2386     return 0;
2387 }
2388 
2389 /* Moves a node to the front of the LRU cache.
2390  *
2391  * The strong cache is an LRU cache, so whenever a given node is accessed, if
2392  * it is not at the front of the cache, it needs to be moved there.
2393  */
2394 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2395 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2396 {
2397     StrongCacheNode *root_p = *root;
2398     if (root_p == node) {
2399         return;
2400     }
2401 
2402     remove_from_strong_cache(node);
2403 
2404     node->prev = NULL;
2405     node->next = root_p;
2406 
2407     if (root_p != NULL) {
2408         root_p->prev = node;
2409     }
2410 
2411     *root = node;
2412 }
2413 
2414 /* Retrieves a ZoneInfo from the strong cache if it's present.
2415  *
2416  * This function finds the ZoneInfo by key and if found will move the node to
2417  * the front of the LRU cache and return a new reference to it. It returns NULL
2418  * if the key is not in the cache.
2419  *
2420  * The strong cache is currently only implemented for the base class, so this
2421  * always returns a cache miss for subclasses.
2422  */
2423 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2424 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2425 {
2426     if (type != &PyZoneInfo_ZoneInfoType) {
2427         return NULL;  // Strong cache currently only implemented for base class
2428     }
2429 
2430     StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2431 
2432     if (node != NULL) {
2433         move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2434         Py_INCREF(node->zone);
2435         return node->zone;
2436     }
2437 
2438     return NULL;  // Cache miss
2439 }
2440 
2441 /* Inserts a new key into the strong LRU cache.
2442  *
2443  * This function is only to be used after a cache miss — it creates a new node
2444  * at the front of the cache and ejects any stale entries (keeping the size of
2445  * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2446  */
2447 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2448 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2449                     PyObject *zone)
2450 {
2451     if (type != &PyZoneInfo_ZoneInfoType) {
2452         return;
2453     }
2454 
2455     StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2456 
2457     move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2458 
2459     StrongCacheNode *node = new_node->next;
2460     for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2461         if (node == NULL) {
2462             return;
2463         }
2464         node = node->next;
2465     }
2466 
2467     // Everything beyond this point needs to be freed
2468     if (node != NULL) {
2469         if (node->prev != NULL) {
2470             node->prev->next = NULL;
2471         }
2472         strong_cache_free(node);
2473     }
2474 }
2475 
2476 /* Clears all entries into a type's strong cache.
2477  *
2478  * Because the strong cache is not implemented for subclasses, this is a no-op
2479  * for everything except the base class.
2480  */
2481 void
clear_strong_cache(const PyTypeObject * const type)2482 clear_strong_cache(const PyTypeObject *const type)
2483 {
2484     if (type != &PyZoneInfo_ZoneInfoType) {
2485         return;
2486     }
2487 
2488     strong_cache_free(ZONEINFO_STRONG_CACHE);
2489     ZONEINFO_STRONG_CACHE = NULL;
2490 }
2491 
2492 static PyObject *
new_weak_cache(void)2493 new_weak_cache(void)
2494 {
2495     PyObject *weakref_module = PyImport_ImportModule("weakref");
2496     if (weakref_module == NULL) {
2497         return NULL;
2498     }
2499 
2500     PyObject *weak_cache =
2501         PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2502     Py_DECREF(weakref_module);
2503     return weak_cache;
2504 }
2505 
2506 static int
initialize_caches(void)2507 initialize_caches(void)
2508 {
2509     // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2510     if (TIMEDELTA_CACHE == NULL) {
2511         TIMEDELTA_CACHE = PyDict_New();
2512     }
2513     else {
2514         Py_INCREF(TIMEDELTA_CACHE);
2515     }
2516 
2517     if (TIMEDELTA_CACHE == NULL) {
2518         return -1;
2519     }
2520 
2521     if (ZONEINFO_WEAK_CACHE == NULL) {
2522         ZONEINFO_WEAK_CACHE = new_weak_cache();
2523     }
2524     else {
2525         Py_INCREF(ZONEINFO_WEAK_CACHE);
2526     }
2527 
2528     if (ZONEINFO_WEAK_CACHE == NULL) {
2529         return -1;
2530     }
2531 
2532     return 0;
2533 }
2534 
2535 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2536 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2537 {
2538     PyObject *weak_cache = new_weak_cache();
2539     if (weak_cache == NULL) {
2540         return NULL;
2541     }
2542 
2543     if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2544                                weak_cache) < 0) {
2545         Py_DECREF(weak_cache);
2546         return NULL;
2547     }
2548     Py_DECREF(weak_cache);
2549     Py_RETURN_NONE;
2550 }
2551 
2552 /////
2553 // Specify the ZoneInfo type
2554 static PyMethodDef zoneinfo_methods[] = {
2555     {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2556      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2557      PyDoc_STR("Clear the ZoneInfo cache.")},
2558     {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2559      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2560      PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2561     {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2562      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2563      PyDoc_STR("Create a ZoneInfo file from a file object.")},
2564     {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2565      PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2566                "the given datetime.")},
2567     {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2568      PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2569                "in a zone at the given datetime.")},
2570     {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2571      PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2572                "zone that applies in a zone at a given datetime.")},
2573     {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2574      PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2575                "datetime in local time.")},
2576     {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2577      PyDoc_STR("Function for serialization with the pickle protocol.")},
2578     {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2579      PyDoc_STR("Private method used in unpickling.")},
2580     {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2581      METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2582      PyDoc_STR("Function to initialize subclasses.")},
2583     {NULL} /* Sentinel */
2584 };
2585 
2586 static PyMemberDef zoneinfo_members[] = {
2587     {.name = "key",
2588      .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2589      .type = T_OBJECT_EX,
2590      .flags = READONLY,
2591      .doc = NULL},
2592     {NULL}, /* Sentinel */
2593 };
2594 
2595 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2596     PyVarObject_HEAD_INIT(NULL, 0)  //
2597         .tp_name = "zoneinfo.ZoneInfo",
2598     .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2599     .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2600     .tp_repr = (reprfunc)zoneinfo_repr,
2601     .tp_str = (reprfunc)zoneinfo_str,
2602     .tp_getattro = PyObject_GenericGetAttr,
2603     .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2604     /* .tp_doc = zoneinfo_doc, */
2605     .tp_methods = zoneinfo_methods,
2606     .tp_members = zoneinfo_members,
2607     .tp_new = zoneinfo_new,
2608     .tp_dealloc = zoneinfo_dealloc,
2609 };
2610 
2611 /////
2612 // Specify the _zoneinfo module
2613 static PyMethodDef module_methods[] = {{NULL, NULL}};
2614 static void
module_free(void)2615 module_free(void)
2616 {
2617     Py_XDECREF(_tzpath_find_tzfile);
2618     _tzpath_find_tzfile = NULL;
2619 
2620     Py_XDECREF(_common_mod);
2621     _common_mod = NULL;
2622 
2623     Py_XDECREF(io_open);
2624     io_open = NULL;
2625 
2626     xdecref_ttinfo(&NO_TTINFO);
2627 
2628     if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2629         Py_DECREF(TIMEDELTA_CACHE);
2630     } else {
2631         Py_CLEAR(TIMEDELTA_CACHE);
2632     }
2633 
2634     if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2635         Py_DECREF(ZONEINFO_WEAK_CACHE);
2636     } else {
2637         Py_CLEAR(ZONEINFO_WEAK_CACHE);
2638     }
2639 
2640     clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2641 }
2642 
2643 static int
zoneinfomodule_exec(PyObject * m)2644 zoneinfomodule_exec(PyObject *m)
2645 {
2646     PyDateTime_IMPORT;
2647     if (PyDateTimeAPI == NULL) {
2648         goto error;
2649     }
2650     PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2651     if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2652         goto error;
2653     }
2654 
2655     Py_INCREF(&PyZoneInfo_ZoneInfoType);
2656     PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2657 
2658     /* Populate imports */
2659     PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2660     if (_tzpath_module == NULL) {
2661         goto error;
2662     }
2663 
2664     _tzpath_find_tzfile =
2665         PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2666     Py_DECREF(_tzpath_module);
2667     if (_tzpath_find_tzfile == NULL) {
2668         goto error;
2669     }
2670 
2671     PyObject *io_module = PyImport_ImportModule("io");
2672     if (io_module == NULL) {
2673         goto error;
2674     }
2675 
2676     io_open = PyObject_GetAttrString(io_module, "open");
2677     Py_DECREF(io_module);
2678     if (io_open == NULL) {
2679         goto error;
2680     }
2681 
2682     _common_mod = PyImport_ImportModule("zoneinfo._common");
2683     if (_common_mod == NULL) {
2684         goto error;
2685     }
2686 
2687     if (NO_TTINFO.utcoff == NULL) {
2688         NO_TTINFO.utcoff = Py_None;
2689         NO_TTINFO.dstoff = Py_None;
2690         NO_TTINFO.tzname = Py_None;
2691 
2692         for (size_t i = 0; i < 3; ++i) {
2693             Py_INCREF(Py_None);
2694         }
2695     }
2696 
2697     if (initialize_caches()) {
2698         goto error;
2699     }
2700 
2701     return 0;
2702 
2703 error:
2704     return -1;
2705 }
2706 
2707 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2708     {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2709 
2710 static struct PyModuleDef zoneinfomodule = {
2711     PyModuleDef_HEAD_INIT,
2712     .m_name = "_zoneinfo",
2713     .m_doc = "C implementation of the zoneinfo module",
2714     .m_size = 0,
2715     .m_methods = module_methods,
2716     .m_slots = zoneinfomodule_slots,
2717     .m_free = (freefunc)module_free};
2718 
2719 PyMODINIT_FUNC
PyInit__zoneinfo(void)2720 PyInit__zoneinfo(void)
2721 {
2722     return PyModuleDef_Init(&zoneinfomodule);
2723 }
2724