1import cython
2
3from cpython.datetime cimport (
4    PyDate_Check,
5    PyDateTime_Check,
6    PyDateTime_IMPORT,
7    datetime,
8    tzinfo,
9)
10
11# import datetime C API
12PyDateTime_IMPORT
13
14
15cimport numpy as cnp
16from numpy cimport float64_t, int64_t, ndarray
17
18import numpy as np
19
20cnp.import_array()
21
22import pytz
23
24from pandas._libs.tslibs.np_datetime cimport (
25    _string_to_dts,
26    check_dts_bounds,
27    dt64_to_dtstruct,
28    dtstruct_to_dt64,
29    get_datetime64_value,
30    npy_datetimestruct,
31    pydate_to_dt64,
32    pydatetime_to_dt64,
33)
34from pandas._libs.util cimport is_datetime64_object, is_float_object, is_integer_object
35
36from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
37from pandas._libs.tslibs.parsing import parse_datetime_string
38
39from pandas._libs.tslibs.conversion cimport (
40    _TSObject,
41    cast_from_unit,
42    convert_datetime_to_tsobject,
43    get_datetime64_nanos,
44    precision_from_unit,
45)
46from pandas._libs.tslibs.nattype cimport (
47    NPY_NAT,
48    c_NaT as NaT,
49    c_nat_strings as nat_strings,
50)
51from pandas._libs.tslibs.timestamps cimport _Timestamp
52
53from pandas._libs.tslibs.timestamps import Timestamp
54
55# Note: this is the only non-tslibs intra-pandas dependency here
56from pandas._libs.missing cimport checknull_with_nat_and_na
57from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
58
59
60def _test_parse_iso8601(ts: str):
61    """
62    TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used
63    only for testing, actual construction uses `convert_str_to_tsobject`
64    """
65    cdef:
66        _TSObject obj
67        int out_local = 0, out_tzoffset = 0
68
69    obj = _TSObject()
70
71    if ts == 'now':
72        return Timestamp.utcnow()
73    elif ts == 'today':
74        return Timestamp.now().normalize()
75
76    _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
77    obj.value = dtstruct_to_dt64(&obj.dts)
78    check_dts_bounds(&obj.dts)
79    if out_local == 1:
80        obj.tzinfo = pytz.FixedOffset(out_tzoffset)
81        obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo)
82        return Timestamp(obj.value, tz=obj.tzinfo)
83    else:
84        return Timestamp(obj.value)
85
86
87@cython.wraparound(False)
88@cython.boundscheck(False)
89def format_array_from_datetime(
90    ndarray[int64_t] values,
91    tzinfo tz=None,
92    str format=None,
93    object na_rep=None
94):
95    """
96    return a np object array of the string formatted values
97
98    Parameters
99    ----------
100    values : a 1-d i8 array
101    tz : tzinfo or None, default None
102    format : str or None, default None
103          a strftime capable string
104    na_rep : optional, default is None
105          a nat format
106
107    """
108    cdef:
109        int64_t val, ns, N = len(values)
110        ndarray[int64_t] consider_values
111        bint show_ms = False, show_us = False, show_ns = False
112        bint basic_format = False
113        ndarray[object] result = np.empty(N, dtype=object)
114        object ts, res
115        npy_datetimestruct dts
116
117    if na_rep is None:
118        na_rep = 'NaT'
119
120    # if we don't have a format nor tz, then choose
121    # a format based on precision
122    basic_format = format is None and tz is None
123    if basic_format:
124        consider_values = values[values != NPY_NAT]
125        show_ns = (consider_values % 1000).any()
126
127        if not show_ns:
128            consider_values //= 1000
129            show_us = (consider_values % 1000).any()
130
131            if not show_ms:
132                consider_values //= 1000
133                show_ms = (consider_values % 1000).any()
134
135    for i in range(N):
136        val = values[i]
137
138        if val == NPY_NAT:
139            result[i] = na_rep
140        elif basic_format:
141
142            dt64_to_dtstruct(val, &dts)
143            res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} '
144                   f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}')
145
146            if show_ns:
147                ns = dts.ps // 1000
148                res += f'.{ns + dts.us * 1000:09d}'
149            elif show_us:
150                res += f'.{dts.us:06d}'
151            elif show_ms:
152                res += f'.{dts.us // 1000:03d}'
153
154            result[i] = res
155
156        else:
157
158            ts = Timestamp(val, tz=tz)
159            if format is None:
160                result[i] = str(ts)
161            else:
162
163                # invalid format string
164                # requires dates > 1900
165                try:
166                    result[i] = ts.strftime(format)
167                except ValueError:
168                    result[i] = str(ts)
169
170    return result
171
172
173def array_with_unit_to_datetime(
174    ndarray values,
175    str unit,
176    str errors="coerce"
177):
178    """
179    Convert the ndarray to datetime according to the time unit.
180
181    This function converts an array of objects into a numpy array of
182    datetime64[ns]. It returns the converted array
183    and also returns the timezone offset
184
185    if errors:
186      - raise: return converted values or raise OutOfBoundsDatetime
187          if out of range on the conversion or
188          ValueError for other conversions (e.g. a string)
189      - ignore: return non-convertible values as the same unit
190      - coerce: NaT for non-convertibles
191
192    Parameters
193    ----------
194    values : ndarray of object
195         Date-like objects to convert.
196    unit : str
197         Time unit to use during conversion.
198    errors : str, default 'raise'
199         Error behavior when parsing.
200
201    Returns
202    -------
203    result : ndarray of m8 values
204    tz : parsed timezone offset or None
205    """
206    cdef:
207        Py_ssize_t i, j, n=len(values)
208        int64_t m
209        int prec = 0
210        ndarray[float64_t] fvalues
211        bint is_ignore = errors=='ignore'
212        bint is_coerce = errors=='coerce'
213        bint is_raise = errors=='raise'
214        bint need_to_iterate = True
215        ndarray[int64_t] iresult
216        ndarray[object] oresult
217        ndarray mask
218        object tz = None
219
220    assert is_ignore or is_coerce or is_raise
221
222    if unit == "ns":
223        if issubclass(values.dtype.type, (np.integer, np.float_)):
224            result = values.astype("M8[ns]", copy=False)
225        else:
226            result, tz = array_to_datetime(values.astype(object), errors=errors)
227        return result, tz
228
229    m, p = precision_from_unit(unit)
230
231    if is_raise:
232        # try a quick conversion to i8/f8
233        # if we have nulls that are not type-compat
234        # then need to iterate
235
236        if values.dtype.kind == "i" or values.dtype.kind == "f":
237            iresult = values.astype("i8", copy=False)
238            # fill missing values by comparing to NPY_NAT
239            mask = iresult == NPY_NAT
240            iresult[mask] = 0
241            fvalues = iresult.astype("f8") * m
242            need_to_iterate = False
243
244        if not need_to_iterate:
245            # check the bounds
246            if (fvalues < Timestamp.min.value).any() or (
247                (fvalues > Timestamp.max.value).any()
248            ):
249                raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
250
251            if values.dtype.kind == "i":
252                result = (iresult * m).astype("M8[ns]")
253
254            elif values.dtype.kind == "f":
255                fresult = (values * m).astype("f8")
256                fresult[mask] = 0
257                if prec:
258                    fresult = round(fresult, prec)
259                result = fresult.astype("M8[ns]", copy=False)
260
261            iresult = result.view("i8")
262            iresult[mask] = NPY_NAT
263
264            return result, tz
265
266    result = np.empty(n, dtype='M8[ns]')
267    iresult = result.view('i8')
268
269    try:
270        for i in range(n):
271            val = values[i]
272
273            if checknull_with_nat_and_na(val):
274                iresult[i] = NPY_NAT
275
276            elif is_integer_object(val) or is_float_object(val):
277
278                if val != val or val == NPY_NAT:
279                    iresult[i] = NPY_NAT
280                else:
281                    try:
282                        iresult[i] = cast_from_unit(val, unit)
283                    except OverflowError:
284                        if is_raise:
285                            raise OutOfBoundsDatetime(
286                                f"cannot convert input {val} with the unit '{unit}'"
287                            )
288                        elif is_ignore:
289                            raise AssertionError
290                        iresult[i] = NPY_NAT
291
292            elif isinstance(val, str):
293                if len(val) == 0 or val in nat_strings:
294                    iresult[i] = NPY_NAT
295
296                else:
297                    try:
298                        iresult[i] = cast_from_unit(float(val), unit)
299                    except ValueError:
300                        if is_raise:
301                            raise ValueError(
302                                f"non convertible value {val} with the unit '{unit}'"
303                            )
304                        elif is_ignore:
305                            raise AssertionError
306                        iresult[i] = NPY_NAT
307                    except OverflowError:
308                        if is_raise:
309                            raise OutOfBoundsDatetime(
310                                f"cannot convert input {val} with the unit '{unit}'"
311                            )
312                        elif is_ignore:
313                            raise AssertionError
314                        iresult[i] = NPY_NAT
315
316            else:
317
318                if is_raise:
319                    raise ValueError(
320                        f"unit='{unit}' not valid with non-numerical val='{val}'"
321                    )
322                if is_ignore:
323                    raise AssertionError
324
325                iresult[i] = NPY_NAT
326
327        return result, tz
328
329    except AssertionError:
330        pass
331
332    # we have hit an exception
333    # and are in ignore mode
334    # redo as object
335
336    oresult = np.empty(n, dtype=object)
337    for i in range(n):
338        val = values[i]
339
340        if checknull_with_nat_and_na(val):
341            oresult[i] = <object>NaT
342        elif is_integer_object(val) or is_float_object(val):
343
344            if val != val or val == NPY_NAT:
345                oresult[i] = <object>NaT
346            else:
347                try:
348                    oresult[i] = Timestamp(cast_from_unit(val, unit))
349                except OverflowError:
350                    oresult[i] = val
351
352        elif isinstance(val, str):
353            if len(val) == 0 or val in nat_strings:
354                oresult[i] = <object>NaT
355
356            else:
357                oresult[i] = val
358
359    return oresult, tz
360
361
362@cython.wraparound(False)
363@cython.boundscheck(False)
364cpdef array_to_datetime(
365    ndarray[object] values,
366    str errors='raise',
367    bint dayfirst=False,
368    bint yearfirst=False,
369    bint utc=False,
370    bint require_iso8601=False
371):
372    """
373    Converts a 1D array of date-like values to a numpy array of either:
374        1) datetime64[ns] data
375        2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
376           is encountered
377
378    Also returns a pytz.FixedOffset if an array of strings with the same
379    timezone offset is passed and utc=True is not passed. Otherwise, None
380    is returned
381
382    Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric,
383    strings
384
385    Parameters
386    ----------
387    values : ndarray of object
388         date-like objects to convert
389    errors : str, default 'raise'
390         error behavior when parsing
391    dayfirst : bool, default False
392         dayfirst parsing behavior when encountering datetime strings
393    yearfirst : bool, default False
394         yearfirst parsing behavior when encountering datetime strings
395    utc : bool, default False
396         indicator whether the dates should be UTC
397    require_iso8601 : bool, default False
398         indicator whether the datetime string should be iso8601
399
400    Returns
401    -------
402    tuple (ndarray, tzoffset)
403    """
404    cdef:
405        Py_ssize_t i, n = len(values)
406        object val, py_dt, tz, tz_out = None
407        ndarray[int64_t] iresult
408        ndarray[object] oresult
409        npy_datetimestruct dts
410        bint utc_convert = bool(utc)
411        bint seen_integer = False
412        bint seen_string = False
413        bint seen_datetime = False
414        bint seen_datetime_offset = False
415        bint is_raise = errors=='raise'
416        bint is_ignore = errors=='ignore'
417        bint is_coerce = errors=='coerce'
418        bint is_same_offsets
419        _TSObject _ts
420        int64_t value
421        int out_local = 0, out_tzoffset = 0
422        float offset_seconds, tz_offset
423        set out_tzoffset_vals = set()
424        bint string_to_dts_failed
425
426    # specify error conditions
427    assert is_raise or is_ignore or is_coerce
428
429    result = np.empty(n, dtype='M8[ns]')
430    iresult = result.view('i8')
431
432    try:
433        for i in range(n):
434            val = values[i]
435
436            try:
437                if checknull_with_nat_and_na(val):
438                    iresult[i] = NPY_NAT
439
440                elif PyDateTime_Check(val):
441                    seen_datetime = True
442                    if val.tzinfo is not None:
443                        if utc_convert:
444                            _ts = convert_datetime_to_tsobject(val, None)
445                            iresult[i] = _ts.value
446                        else:
447                            raise ValueError('Tz-aware datetime.datetime '
448                                             'cannot be converted to '
449                                             'datetime64 unless utc=True')
450                    else:
451                        iresult[i] = pydatetime_to_dt64(val, &dts)
452                        if isinstance(val, _Timestamp):
453                            iresult[i] += val.nanosecond
454                        check_dts_bounds(&dts)
455
456                elif PyDate_Check(val):
457                    seen_datetime = True
458                    iresult[i] = pydate_to_dt64(val, &dts)
459                    check_dts_bounds(&dts)
460
461                elif is_datetime64_object(val):
462                    seen_datetime = True
463                    iresult[i] = get_datetime64_nanos(val)
464
465                elif is_integer_object(val) or is_float_object(val):
466                    # these must be ns unit by-definition
467                    seen_integer = True
468
469                    if val != val or val == NPY_NAT:
470                        iresult[i] = NPY_NAT
471                    elif is_raise or is_ignore:
472                        iresult[i] = val
473                    else:
474                        # coerce
475                        # we now need to parse this as if unit='ns'
476                        # we can ONLY accept integers at this point
477                        # if we have previously (or in future accept
478                        # datetimes/strings, then we must coerce)
479                        try:
480                            iresult[i] = cast_from_unit(val, 'ns')
481                        except OverflowError:
482                            iresult[i] = NPY_NAT
483
484                elif isinstance(val, str):
485                    # string
486                    seen_string = True
487
488                    if len(val) == 0 or val in nat_strings:
489                        iresult[i] = NPY_NAT
490                        continue
491
492                    string_to_dts_failed = _string_to_dts(
493                        val, &dts, &out_local,
494                        &out_tzoffset, False
495                    )
496                    if string_to_dts_failed:
497                        # An error at this point is a _parsing_ error
498                        # specifically _not_ OutOfBoundsDatetime
499                        if _parse_today_now(val, &iresult[i]):
500                            continue
501                        elif require_iso8601:
502                            # if requiring iso8601 strings, skip trying
503                            # other formats
504                            if is_coerce:
505                                iresult[i] = NPY_NAT
506                                continue
507                            elif is_raise:
508                                raise ValueError(
509                                    f"time data {val} doesn't match format specified"
510                                )
511                            return values, tz_out
512
513                        try:
514                            py_dt = parse_datetime_string(val,
515                                                          dayfirst=dayfirst,
516                                                          yearfirst=yearfirst)
517                            # If the dateutil parser returned tzinfo, capture it
518                            # to check if all arguments have the same tzinfo
519                            tz = py_dt.utcoffset()
520
521                        except (ValueError, OverflowError):
522                            if is_coerce:
523                                iresult[i] = NPY_NAT
524                                continue
525                            raise TypeError("invalid string coercion to datetime")
526
527                        if tz is not None:
528                            seen_datetime_offset = True
529                            # dateutil timezone objects cannot be hashed, so
530                            # store the UTC offsets in seconds instead
531                            out_tzoffset_vals.add(tz.total_seconds())
532                        else:
533                            # Add a marker for naive string, to track if we are
534                            # parsing mixed naive and aware strings
535                            out_tzoffset_vals.add('naive')
536
537                        _ts = convert_datetime_to_tsobject(py_dt, None)
538                        iresult[i] = _ts.value
539                    if not string_to_dts_failed:
540                        # No error reported by string_to_dts, pick back up
541                        # where we left off
542                        value = dtstruct_to_dt64(&dts)
543                        if out_local == 1:
544                            seen_datetime_offset = True
545                            # Store the out_tzoffset in seconds
546                            # since we store the total_seconds of
547                            # dateutil.tz.tzoffset objects
548                            out_tzoffset_vals.add(out_tzoffset * 60.)
549                            tz = pytz.FixedOffset(out_tzoffset)
550                            value = tz_localize_to_utc_single(value, tz)
551                            out_local = 0
552                            out_tzoffset = 0
553                        else:
554                            # Add a marker for naive string, to track if we are
555                            # parsing mixed naive and aware strings
556                            out_tzoffset_vals.add('naive')
557                        iresult[i] = value
558                        check_dts_bounds(&dts)
559
560                else:
561                    if is_coerce:
562                        iresult[i] = NPY_NAT
563                    else:
564                        raise TypeError(f"{type(val)} is not convertible to datetime")
565
566            except OutOfBoundsDatetime:
567                if is_coerce:
568                    iresult[i] = NPY_NAT
569                    continue
570                elif require_iso8601 and isinstance(val, str):
571                    # GH#19382 for just-barely-OutOfBounds falling back to
572                    # dateutil parser will return incorrect result because
573                    # it will ignore nanoseconds
574                    if is_raise:
575
576                        # Still raise OutOfBoundsDatetime,
577                        # as error message is informative.
578                        raise
579
580                    assert is_ignore
581                    return values, tz_out
582                raise
583
584    except OutOfBoundsDatetime:
585        if is_raise:
586            raise
587
588        return ignore_errors_out_of_bounds_fallback(values), tz_out
589
590    except TypeError:
591        return array_to_datetime_object(values, errors, dayfirst, yearfirst)
592
593    if seen_datetime and seen_integer:
594        # we have mixed datetimes & integers
595
596        if is_coerce:
597            # coerce all of the integers/floats to NaT, preserve
598            # the datetimes and other convertibles
599            for i in range(n):
600                val = values[i]
601                if is_integer_object(val) or is_float_object(val):
602                    result[i] = NPY_NAT
603        elif is_raise:
604            raise ValueError("mixed datetimes and integers in passed array")
605        else:
606            return array_to_datetime_object(values, errors, dayfirst, yearfirst)
607
608    if seen_datetime_offset and not utc_convert:
609        # GH#17697
610        # 1) If all the offsets are equal, return one offset for
611        #    the parsed dates to (maybe) pass to DatetimeIndex
612        # 2) If the offsets are different, then force the parsing down the
613        #    object path where an array of datetimes
614        #    (with individual dateutil.tzoffsets) are returned
615        is_same_offsets = len(out_tzoffset_vals) == 1
616        if not is_same_offsets:
617            return array_to_datetime_object(values, errors, dayfirst, yearfirst)
618        else:
619            tz_offset = out_tzoffset_vals.pop()
620            tz_out = pytz.FixedOffset(tz_offset / 60.)
621    return result, tz_out
622
623
624cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values):
625    """
626    Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
627    and errors == "ignore"
628
629    Parameters
630    ----------
631    values : ndarray[object]
632
633    Returns
634    -------
635    ndarray[object]
636    """
637    cdef:
638        Py_ssize_t i, n = len(values)
639        object val
640
641    oresult = np.empty(n, dtype=object)
642
643    for i in range(n):
644        val = values[i]
645
646        # set as nan except if its a NaT
647        if checknull_with_nat_and_na(val):
648            if isinstance(val, float):
649                oresult[i] = np.nan
650            else:
651                oresult[i] = NaT
652        elif is_datetime64_object(val):
653            if get_datetime64_value(val) == NPY_NAT:
654                oresult[i] = NaT
655            else:
656                oresult[i] = val.item()
657        else:
658            oresult[i] = val
659    return oresult
660
661
662@cython.wraparound(False)
663@cython.boundscheck(False)
664cdef array_to_datetime_object(
665    ndarray[object] values,
666    str errors,
667    bint dayfirst=False,
668    bint yearfirst=False,
669):
670    """
671    Fall back function for array_to_datetime
672
673    Attempts to parse datetime strings with dateutil to return an array
674    of datetime objects
675
676    Parameters
677    ----------
678    values : ndarray of object
679         date-like objects to convert
680    errors : str
681         error behavior when parsing
682    dayfirst : bool, default False
683         dayfirst parsing behavior when encountering datetime strings
684    yearfirst : bool, default False
685         yearfirst parsing behavior when encountering datetime strings
686
687    Returns
688    -------
689    tuple (ndarray, None)
690    """
691    cdef:
692        Py_ssize_t i, n = len(values)
693        object val
694        bint is_ignore = errors == 'ignore'
695        bint is_coerce = errors == 'coerce'
696        bint is_raise = errors == 'raise'
697        ndarray[object] oresult
698        npy_datetimestruct dts
699
700    assert is_raise or is_ignore or is_coerce
701
702    oresult = np.empty(n, dtype=object)
703
704    # We return an object array and only attempt to parse:
705    # 1) NaT or NaT-like values
706    # 2) datetime strings, which we return as datetime.datetime
707    for i in range(n):
708        val = values[i]
709        if checknull_with_nat_and_na(val) or PyDateTime_Check(val):
710            # GH 25978. No need to parse NaT-like or datetime-like vals
711            oresult[i] = val
712        elif isinstance(val, str):
713            if len(val) == 0 or val in nat_strings:
714                oresult[i] = 'NaT'
715                continue
716            try:
717                oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
718                                                   yearfirst=yearfirst)
719                pydatetime_to_dt64(oresult[i], &dts)
720                check_dts_bounds(&dts)
721            except (ValueError, OverflowError):
722                if is_coerce:
723                    oresult[i] = <object>NaT
724                    continue
725                if is_raise:
726                    raise
727                return values, None
728        else:
729            if is_raise:
730                raise
731            return values, None
732    return oresult, None
733
734
735cdef inline bint _parse_today_now(str val, int64_t* iresult):
736    # We delay this check for as long as possible
737    # because it catches relatively rare cases
738    if val == 'now':
739        # Note: this is *not* the same as Timestamp('now')
740        iresult[0] = Timestamp.utcnow().value
741        return True
742    elif val == 'today':
743        iresult[0] = Timestamp.today().value
744        return True
745    return False
746