1import cython 2 3from cpython.datetime cimport ( 4 PyDate_Check, 5 PyDateTime_Check, 6 PyDateTime_IMPORT, 7 datetime, 8 tzinfo, 9) 10 11# import datetime C API 12PyDateTime_IMPORT 13 14 15cimport numpy as cnp 16from numpy cimport float64_t, int64_t, ndarray 17 18import numpy as np 19 20cnp.import_array() 21 22import pytz 23 24from pandas._libs.tslibs.np_datetime cimport ( 25 _string_to_dts, 26 check_dts_bounds, 27 dt64_to_dtstruct, 28 dtstruct_to_dt64, 29 get_datetime64_value, 30 npy_datetimestruct, 31 pydate_to_dt64, 32 pydatetime_to_dt64, 33) 34from pandas._libs.util cimport is_datetime64_object, is_float_object, is_integer_object 35 36from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime 37from pandas._libs.tslibs.parsing import parse_datetime_string 38 39from pandas._libs.tslibs.conversion cimport ( 40 _TSObject, 41 cast_from_unit, 42 convert_datetime_to_tsobject, 43 get_datetime64_nanos, 44 precision_from_unit, 45) 46from pandas._libs.tslibs.nattype cimport ( 47 NPY_NAT, 48 c_NaT as NaT, 49 c_nat_strings as nat_strings, 50) 51from pandas._libs.tslibs.timestamps cimport _Timestamp 52 53from pandas._libs.tslibs.timestamps import Timestamp 54 55# Note: this is the only non-tslibs intra-pandas dependency here 56from pandas._libs.missing cimport checknull_with_nat_and_na 57from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single 58 59 60def _test_parse_iso8601(ts: str): 61 """ 62 TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used 63 only for testing, actual construction uses `convert_str_to_tsobject` 64 """ 65 cdef: 66 _TSObject obj 67 int out_local = 0, out_tzoffset = 0 68 69 obj = _TSObject() 70 71 if ts == 'now': 72 return Timestamp.utcnow() 73 elif ts == 'today': 74 return Timestamp.now().normalize() 75 76 _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True) 77 obj.value = dtstruct_to_dt64(&obj.dts) 78 check_dts_bounds(&obj.dts) 79 if out_local == 1: 80 obj.tzinfo = pytz.FixedOffset(out_tzoffset) 81 obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) 82 return Timestamp(obj.value, tz=obj.tzinfo) 83 else: 84 return Timestamp(obj.value) 85 86 87@cython.wraparound(False) 88@cython.boundscheck(False) 89def format_array_from_datetime( 90 ndarray[int64_t] values, 91 tzinfo tz=None, 92 str format=None, 93 object na_rep=None 94): 95 """ 96 return a np object array of the string formatted values 97 98 Parameters 99 ---------- 100 values : a 1-d i8 array 101 tz : tzinfo or None, default None 102 format : str or None, default None 103 a strftime capable string 104 na_rep : optional, default is None 105 a nat format 106 107 """ 108 cdef: 109 int64_t val, ns, N = len(values) 110 ndarray[int64_t] consider_values 111 bint show_ms = False, show_us = False, show_ns = False 112 bint basic_format = False 113 ndarray[object] result = np.empty(N, dtype=object) 114 object ts, res 115 npy_datetimestruct dts 116 117 if na_rep is None: 118 na_rep = 'NaT' 119 120 # if we don't have a format nor tz, then choose 121 # a format based on precision 122 basic_format = format is None and tz is None 123 if basic_format: 124 consider_values = values[values != NPY_NAT] 125 show_ns = (consider_values % 1000).any() 126 127 if not show_ns: 128 consider_values //= 1000 129 show_us = (consider_values % 1000).any() 130 131 if not show_ms: 132 consider_values //= 1000 133 show_ms = (consider_values % 1000).any() 134 135 for i in range(N): 136 val = values[i] 137 138 if val == NPY_NAT: 139 result[i] = na_rep 140 elif basic_format: 141 142 dt64_to_dtstruct(val, &dts) 143 res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' 144 f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') 145 146 if show_ns: 147 ns = dts.ps // 1000 148 res += f'.{ns + dts.us * 1000:09d}' 149 elif show_us: 150 res += f'.{dts.us:06d}' 151 elif show_ms: 152 res += f'.{dts.us // 1000:03d}' 153 154 result[i] = res 155 156 else: 157 158 ts = Timestamp(val, tz=tz) 159 if format is None: 160 result[i] = str(ts) 161 else: 162 163 # invalid format string 164 # requires dates > 1900 165 try: 166 result[i] = ts.strftime(format) 167 except ValueError: 168 result[i] = str(ts) 169 170 return result 171 172 173def array_with_unit_to_datetime( 174 ndarray values, 175 str unit, 176 str errors="coerce" 177): 178 """ 179 Convert the ndarray to datetime according to the time unit. 180 181 This function converts an array of objects into a numpy array of 182 datetime64[ns]. It returns the converted array 183 and also returns the timezone offset 184 185 if errors: 186 - raise: return converted values or raise OutOfBoundsDatetime 187 if out of range on the conversion or 188 ValueError for other conversions (e.g. a string) 189 - ignore: return non-convertible values as the same unit 190 - coerce: NaT for non-convertibles 191 192 Parameters 193 ---------- 194 values : ndarray of object 195 Date-like objects to convert. 196 unit : str 197 Time unit to use during conversion. 198 errors : str, default 'raise' 199 Error behavior when parsing. 200 201 Returns 202 ------- 203 result : ndarray of m8 values 204 tz : parsed timezone offset or None 205 """ 206 cdef: 207 Py_ssize_t i, j, n=len(values) 208 int64_t m 209 int prec = 0 210 ndarray[float64_t] fvalues 211 bint is_ignore = errors=='ignore' 212 bint is_coerce = errors=='coerce' 213 bint is_raise = errors=='raise' 214 bint need_to_iterate = True 215 ndarray[int64_t] iresult 216 ndarray[object] oresult 217 ndarray mask 218 object tz = None 219 220 assert is_ignore or is_coerce or is_raise 221 222 if unit == "ns": 223 if issubclass(values.dtype.type, (np.integer, np.float_)): 224 result = values.astype("M8[ns]", copy=False) 225 else: 226 result, tz = array_to_datetime(values.astype(object), errors=errors) 227 return result, tz 228 229 m, p = precision_from_unit(unit) 230 231 if is_raise: 232 # try a quick conversion to i8/f8 233 # if we have nulls that are not type-compat 234 # then need to iterate 235 236 if values.dtype.kind == "i" or values.dtype.kind == "f": 237 iresult = values.astype("i8", copy=False) 238 # fill missing values by comparing to NPY_NAT 239 mask = iresult == NPY_NAT 240 iresult[mask] = 0 241 fvalues = iresult.astype("f8") * m 242 need_to_iterate = False 243 244 if not need_to_iterate: 245 # check the bounds 246 if (fvalues < Timestamp.min.value).any() or ( 247 (fvalues > Timestamp.max.value).any() 248 ): 249 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") 250 251 if values.dtype.kind == "i": 252 result = (iresult * m).astype("M8[ns]") 253 254 elif values.dtype.kind == "f": 255 fresult = (values * m).astype("f8") 256 fresult[mask] = 0 257 if prec: 258 fresult = round(fresult, prec) 259 result = fresult.astype("M8[ns]", copy=False) 260 261 iresult = result.view("i8") 262 iresult[mask] = NPY_NAT 263 264 return result, tz 265 266 result = np.empty(n, dtype='M8[ns]') 267 iresult = result.view('i8') 268 269 try: 270 for i in range(n): 271 val = values[i] 272 273 if checknull_with_nat_and_na(val): 274 iresult[i] = NPY_NAT 275 276 elif is_integer_object(val) or is_float_object(val): 277 278 if val != val or val == NPY_NAT: 279 iresult[i] = NPY_NAT 280 else: 281 try: 282 iresult[i] = cast_from_unit(val, unit) 283 except OverflowError: 284 if is_raise: 285 raise OutOfBoundsDatetime( 286 f"cannot convert input {val} with the unit '{unit}'" 287 ) 288 elif is_ignore: 289 raise AssertionError 290 iresult[i] = NPY_NAT 291 292 elif isinstance(val, str): 293 if len(val) == 0 or val in nat_strings: 294 iresult[i] = NPY_NAT 295 296 else: 297 try: 298 iresult[i] = cast_from_unit(float(val), unit) 299 except ValueError: 300 if is_raise: 301 raise ValueError( 302 f"non convertible value {val} with the unit '{unit}'" 303 ) 304 elif is_ignore: 305 raise AssertionError 306 iresult[i] = NPY_NAT 307 except OverflowError: 308 if is_raise: 309 raise OutOfBoundsDatetime( 310 f"cannot convert input {val} with the unit '{unit}'" 311 ) 312 elif is_ignore: 313 raise AssertionError 314 iresult[i] = NPY_NAT 315 316 else: 317 318 if is_raise: 319 raise ValueError( 320 f"unit='{unit}' not valid with non-numerical val='{val}'" 321 ) 322 if is_ignore: 323 raise AssertionError 324 325 iresult[i] = NPY_NAT 326 327 return result, tz 328 329 except AssertionError: 330 pass 331 332 # we have hit an exception 333 # and are in ignore mode 334 # redo as object 335 336 oresult = np.empty(n, dtype=object) 337 for i in range(n): 338 val = values[i] 339 340 if checknull_with_nat_and_na(val): 341 oresult[i] = <object>NaT 342 elif is_integer_object(val) or is_float_object(val): 343 344 if val != val or val == NPY_NAT: 345 oresult[i] = <object>NaT 346 else: 347 try: 348 oresult[i] = Timestamp(cast_from_unit(val, unit)) 349 except OverflowError: 350 oresult[i] = val 351 352 elif isinstance(val, str): 353 if len(val) == 0 or val in nat_strings: 354 oresult[i] = <object>NaT 355 356 else: 357 oresult[i] = val 358 359 return oresult, tz 360 361 362@cython.wraparound(False) 363@cython.boundscheck(False) 364cpdef array_to_datetime( 365 ndarray[object] values, 366 str errors='raise', 367 bint dayfirst=False, 368 bint yearfirst=False, 369 bint utc=False, 370 bint require_iso8601=False 371): 372 """ 373 Converts a 1D array of date-like values to a numpy array of either: 374 1) datetime64[ns] data 375 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError 376 is encountered 377 378 Also returns a pytz.FixedOffset if an array of strings with the same 379 timezone offset is passed and utc=True is not passed. Otherwise, None 380 is returned 381 382 Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, 383 strings 384 385 Parameters 386 ---------- 387 values : ndarray of object 388 date-like objects to convert 389 errors : str, default 'raise' 390 error behavior when parsing 391 dayfirst : bool, default False 392 dayfirst parsing behavior when encountering datetime strings 393 yearfirst : bool, default False 394 yearfirst parsing behavior when encountering datetime strings 395 utc : bool, default False 396 indicator whether the dates should be UTC 397 require_iso8601 : bool, default False 398 indicator whether the datetime string should be iso8601 399 400 Returns 401 ------- 402 tuple (ndarray, tzoffset) 403 """ 404 cdef: 405 Py_ssize_t i, n = len(values) 406 object val, py_dt, tz, tz_out = None 407 ndarray[int64_t] iresult 408 ndarray[object] oresult 409 npy_datetimestruct dts 410 bint utc_convert = bool(utc) 411 bint seen_integer = False 412 bint seen_string = False 413 bint seen_datetime = False 414 bint seen_datetime_offset = False 415 bint is_raise = errors=='raise' 416 bint is_ignore = errors=='ignore' 417 bint is_coerce = errors=='coerce' 418 bint is_same_offsets 419 _TSObject _ts 420 int64_t value 421 int out_local = 0, out_tzoffset = 0 422 float offset_seconds, tz_offset 423 set out_tzoffset_vals = set() 424 bint string_to_dts_failed 425 426 # specify error conditions 427 assert is_raise or is_ignore or is_coerce 428 429 result = np.empty(n, dtype='M8[ns]') 430 iresult = result.view('i8') 431 432 try: 433 for i in range(n): 434 val = values[i] 435 436 try: 437 if checknull_with_nat_and_na(val): 438 iresult[i] = NPY_NAT 439 440 elif PyDateTime_Check(val): 441 seen_datetime = True 442 if val.tzinfo is not None: 443 if utc_convert: 444 _ts = convert_datetime_to_tsobject(val, None) 445 iresult[i] = _ts.value 446 else: 447 raise ValueError('Tz-aware datetime.datetime ' 448 'cannot be converted to ' 449 'datetime64 unless utc=True') 450 else: 451 iresult[i] = pydatetime_to_dt64(val, &dts) 452 if isinstance(val, _Timestamp): 453 iresult[i] += val.nanosecond 454 check_dts_bounds(&dts) 455 456 elif PyDate_Check(val): 457 seen_datetime = True 458 iresult[i] = pydate_to_dt64(val, &dts) 459 check_dts_bounds(&dts) 460 461 elif is_datetime64_object(val): 462 seen_datetime = True 463 iresult[i] = get_datetime64_nanos(val) 464 465 elif is_integer_object(val) or is_float_object(val): 466 # these must be ns unit by-definition 467 seen_integer = True 468 469 if val != val or val == NPY_NAT: 470 iresult[i] = NPY_NAT 471 elif is_raise or is_ignore: 472 iresult[i] = val 473 else: 474 # coerce 475 # we now need to parse this as if unit='ns' 476 # we can ONLY accept integers at this point 477 # if we have previously (or in future accept 478 # datetimes/strings, then we must coerce) 479 try: 480 iresult[i] = cast_from_unit(val, 'ns') 481 except OverflowError: 482 iresult[i] = NPY_NAT 483 484 elif isinstance(val, str): 485 # string 486 seen_string = True 487 488 if len(val) == 0 or val in nat_strings: 489 iresult[i] = NPY_NAT 490 continue 491 492 string_to_dts_failed = _string_to_dts( 493 val, &dts, &out_local, 494 &out_tzoffset, False 495 ) 496 if string_to_dts_failed: 497 # An error at this point is a _parsing_ error 498 # specifically _not_ OutOfBoundsDatetime 499 if _parse_today_now(val, &iresult[i]): 500 continue 501 elif require_iso8601: 502 # if requiring iso8601 strings, skip trying 503 # other formats 504 if is_coerce: 505 iresult[i] = NPY_NAT 506 continue 507 elif is_raise: 508 raise ValueError( 509 f"time data {val} doesn't match format specified" 510 ) 511 return values, tz_out 512 513 try: 514 py_dt = parse_datetime_string(val, 515 dayfirst=dayfirst, 516 yearfirst=yearfirst) 517 # If the dateutil parser returned tzinfo, capture it 518 # to check if all arguments have the same tzinfo 519 tz = py_dt.utcoffset() 520 521 except (ValueError, OverflowError): 522 if is_coerce: 523 iresult[i] = NPY_NAT 524 continue 525 raise TypeError("invalid string coercion to datetime") 526 527 if tz is not None: 528 seen_datetime_offset = True 529 # dateutil timezone objects cannot be hashed, so 530 # store the UTC offsets in seconds instead 531 out_tzoffset_vals.add(tz.total_seconds()) 532 else: 533 # Add a marker for naive string, to track if we are 534 # parsing mixed naive and aware strings 535 out_tzoffset_vals.add('naive') 536 537 _ts = convert_datetime_to_tsobject(py_dt, None) 538 iresult[i] = _ts.value 539 if not string_to_dts_failed: 540 # No error reported by string_to_dts, pick back up 541 # where we left off 542 value = dtstruct_to_dt64(&dts) 543 if out_local == 1: 544 seen_datetime_offset = True 545 # Store the out_tzoffset in seconds 546 # since we store the total_seconds of 547 # dateutil.tz.tzoffset objects 548 out_tzoffset_vals.add(out_tzoffset * 60.) 549 tz = pytz.FixedOffset(out_tzoffset) 550 value = tz_localize_to_utc_single(value, tz) 551 out_local = 0 552 out_tzoffset = 0 553 else: 554 # Add a marker for naive string, to track if we are 555 # parsing mixed naive and aware strings 556 out_tzoffset_vals.add('naive') 557 iresult[i] = value 558 check_dts_bounds(&dts) 559 560 else: 561 if is_coerce: 562 iresult[i] = NPY_NAT 563 else: 564 raise TypeError(f"{type(val)} is not convertible to datetime") 565 566 except OutOfBoundsDatetime: 567 if is_coerce: 568 iresult[i] = NPY_NAT 569 continue 570 elif require_iso8601 and isinstance(val, str): 571 # GH#19382 for just-barely-OutOfBounds falling back to 572 # dateutil parser will return incorrect result because 573 # it will ignore nanoseconds 574 if is_raise: 575 576 # Still raise OutOfBoundsDatetime, 577 # as error message is informative. 578 raise 579 580 assert is_ignore 581 return values, tz_out 582 raise 583 584 except OutOfBoundsDatetime: 585 if is_raise: 586 raise 587 588 return ignore_errors_out_of_bounds_fallback(values), tz_out 589 590 except TypeError: 591 return array_to_datetime_object(values, errors, dayfirst, yearfirst) 592 593 if seen_datetime and seen_integer: 594 # we have mixed datetimes & integers 595 596 if is_coerce: 597 # coerce all of the integers/floats to NaT, preserve 598 # the datetimes and other convertibles 599 for i in range(n): 600 val = values[i] 601 if is_integer_object(val) or is_float_object(val): 602 result[i] = NPY_NAT 603 elif is_raise: 604 raise ValueError("mixed datetimes and integers in passed array") 605 else: 606 return array_to_datetime_object(values, errors, dayfirst, yearfirst) 607 608 if seen_datetime_offset and not utc_convert: 609 # GH#17697 610 # 1) If all the offsets are equal, return one offset for 611 # the parsed dates to (maybe) pass to DatetimeIndex 612 # 2) If the offsets are different, then force the parsing down the 613 # object path where an array of datetimes 614 # (with individual dateutil.tzoffsets) are returned 615 is_same_offsets = len(out_tzoffset_vals) == 1 616 if not is_same_offsets: 617 return array_to_datetime_object(values, errors, dayfirst, yearfirst) 618 else: 619 tz_offset = out_tzoffset_vals.pop() 620 tz_out = pytz.FixedOffset(tz_offset / 60.) 621 return result, tz_out 622 623 624cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values): 625 """ 626 Fallback for array_to_datetime if an OutOfBoundsDatetime is raised 627 and errors == "ignore" 628 629 Parameters 630 ---------- 631 values : ndarray[object] 632 633 Returns 634 ------- 635 ndarray[object] 636 """ 637 cdef: 638 Py_ssize_t i, n = len(values) 639 object val 640 641 oresult = np.empty(n, dtype=object) 642 643 for i in range(n): 644 val = values[i] 645 646 # set as nan except if its a NaT 647 if checknull_with_nat_and_na(val): 648 if isinstance(val, float): 649 oresult[i] = np.nan 650 else: 651 oresult[i] = NaT 652 elif is_datetime64_object(val): 653 if get_datetime64_value(val) == NPY_NAT: 654 oresult[i] = NaT 655 else: 656 oresult[i] = val.item() 657 else: 658 oresult[i] = val 659 return oresult 660 661 662@cython.wraparound(False) 663@cython.boundscheck(False) 664cdef array_to_datetime_object( 665 ndarray[object] values, 666 str errors, 667 bint dayfirst=False, 668 bint yearfirst=False, 669): 670 """ 671 Fall back function for array_to_datetime 672 673 Attempts to parse datetime strings with dateutil to return an array 674 of datetime objects 675 676 Parameters 677 ---------- 678 values : ndarray of object 679 date-like objects to convert 680 errors : str 681 error behavior when parsing 682 dayfirst : bool, default False 683 dayfirst parsing behavior when encountering datetime strings 684 yearfirst : bool, default False 685 yearfirst parsing behavior when encountering datetime strings 686 687 Returns 688 ------- 689 tuple (ndarray, None) 690 """ 691 cdef: 692 Py_ssize_t i, n = len(values) 693 object val 694 bint is_ignore = errors == 'ignore' 695 bint is_coerce = errors == 'coerce' 696 bint is_raise = errors == 'raise' 697 ndarray[object] oresult 698 npy_datetimestruct dts 699 700 assert is_raise or is_ignore or is_coerce 701 702 oresult = np.empty(n, dtype=object) 703 704 # We return an object array and only attempt to parse: 705 # 1) NaT or NaT-like values 706 # 2) datetime strings, which we return as datetime.datetime 707 for i in range(n): 708 val = values[i] 709 if checknull_with_nat_and_na(val) or PyDateTime_Check(val): 710 # GH 25978. No need to parse NaT-like or datetime-like vals 711 oresult[i] = val 712 elif isinstance(val, str): 713 if len(val) == 0 or val in nat_strings: 714 oresult[i] = 'NaT' 715 continue 716 try: 717 oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, 718 yearfirst=yearfirst) 719 pydatetime_to_dt64(oresult[i], &dts) 720 check_dts_bounds(&dts) 721 except (ValueError, OverflowError): 722 if is_coerce: 723 oresult[i] = <object>NaT 724 continue 725 if is_raise: 726 raise 727 return values, None 728 else: 729 if is_raise: 730 raise 731 return values, None 732 return oresult, None 733 734 735cdef inline bint _parse_today_now(str val, int64_t* iresult): 736 # We delay this check for as long as possible 737 # because it catches relatively rare cases 738 if val == 'now': 739 # Note: this is *not* the same as Timestamp('now') 740 iresult[0] = Timestamp.utcnow().value 741 return True 742 elif val == 'today': 743 iresult[0] = Timestamp.today().value 744 return True 745 return False 746