1import collections 2import warnings 3 4import cython 5 6from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompare 7 8import numpy as np 9 10cimport numpy as cnp 11from numpy cimport int64_t, ndarray 12 13cnp.import_array() 14 15from cpython.datetime cimport ( 16 PyDateTime_Check, 17 PyDateTime_IMPORT, 18 PyDelta_Check, 19 timedelta, 20) 21 22PyDateTime_IMPORT 23 24 25cimport pandas._libs.tslibs.util as util 26from pandas._libs.tslibs.base cimport ABCTimestamp 27from pandas._libs.tslibs.conversion cimport cast_from_unit 28from pandas._libs.tslibs.nattype cimport ( 29 NPY_NAT, 30 c_NaT as NaT, 31 c_nat_strings as nat_strings, 32 checknull_with_nat, 33) 34from pandas._libs.tslibs.np_datetime cimport ( 35 cmp_scalar, 36 pandas_timedeltastruct, 37 td64_to_tdstruct, 38) 39from pandas._libs.tslibs.offsets cimport is_tick_object 40from pandas._libs.tslibs.util cimport ( 41 is_array, 42 is_datetime64_object, 43 is_float_object, 44 is_integer_object, 45 is_timedelta64_object, 46) 47 48# ---------------------------------------------------------------------- 49# Constants 50 51# components named tuple 52Components = collections.namedtuple( 53 "Components", 54 [ 55 "days", 56 "hours", 57 "minutes", 58 "seconds", 59 "milliseconds", 60 "microseconds", 61 "nanoseconds", 62 ], 63) 64 65cdef dict timedelta_abbrevs = { 66 "Y": "Y", 67 "y": "Y", 68 "M": "M", 69 "W": "W", 70 "w": "W", 71 "D": "D", 72 "d": "D", 73 "days": "D", 74 "day": "D", 75 "hours": "h", 76 "hour": "h", 77 "hr": "h", 78 "h": "h", 79 "m": "m", 80 "minute": "m", 81 "min": "m", 82 "minutes": "m", 83 "t": "m", 84 "s": "s", 85 "seconds": "s", 86 "sec": "s", 87 "second": "s", 88 "ms": "ms", 89 "milliseconds": "ms", 90 "millisecond": "ms", 91 "milli": "ms", 92 "millis": "ms", 93 "l": "ms", 94 "us": "us", 95 "microseconds": "us", 96 "microsecond": "us", 97 "µs": "us", 98 "micro": "us", 99 "micros": "us", 100 "u": "us", 101 "ns": "ns", 102 "nanoseconds": "ns", 103 "nano": "ns", 104 "nanos": "ns", 105 "nanosecond": "ns", 106 "n": "ns", 107} 108 109_no_input = object() 110 111 112# ---------------------------------------------------------------------- 113# API 114 115@cython.boundscheck(False) 116@cython.wraparound(False) 117def ints_to_pytimedelta(const int64_t[:] arr, box=False): 118 """ 119 convert an i8 repr to an ndarray of timedelta or Timedelta (if box == 120 True) 121 122 Parameters 123 ---------- 124 arr : ndarray[int64_t] 125 box : bool, default False 126 127 Returns 128 ------- 129 result : ndarray[object] 130 array of Timedelta or timedeltas objects 131 """ 132 cdef: 133 Py_ssize_t i, n = len(arr) 134 int64_t value 135 object[:] result = np.empty(n, dtype=object) 136 137 for i in range(n): 138 139 value = arr[i] 140 if value == NPY_NAT: 141 result[i] = <object>NaT 142 else: 143 if box: 144 result[i] = Timedelta(value) 145 else: 146 result[i] = timedelta(microseconds=int(value) / 1000) 147 148 return result.base # .base to access underlying np.ndarray 149 150 151# ---------------------------------------------------------------------- 152 153cpdef int64_t delta_to_nanoseconds(delta) except? -1: 154 if is_tick_object(delta): 155 return delta.nanos 156 if isinstance(delta, _Timedelta): 157 delta = delta.value 158 if is_timedelta64_object(delta): 159 return delta.astype("timedelta64[ns]").item() 160 if is_integer_object(delta): 161 return delta 162 if PyDelta_Check(delta): 163 return ( 164 delta.days * 24 * 60 * 60 * 1_000_000 165 + delta.seconds * 1_000_000 166 + delta.microseconds 167 ) * 1000 168 169 raise TypeError(type(delta)) 170 171 172cdef convert_to_timedelta64(object ts, str unit): 173 """ 174 Convert an incoming object to a timedelta64 if possible. 175 Before calling, unit must be standardized to avoid repeated unit conversion 176 177 Handle these types of objects: 178 - timedelta/Timedelta 179 - timedelta64 180 - an offset 181 - np.int64 (with unit providing a possible modifier) 182 - None/NaT 183 184 Return an ns based int64 185 """ 186 if checknull_with_nat(ts): 187 return np.timedelta64(NPY_NAT) 188 elif isinstance(ts, _Timedelta): 189 # already in the proper format 190 ts = np.timedelta64(ts.value) 191 elif is_datetime64_object(ts): 192 # only accept a NaT here 193 if ts.astype('int64') == NPY_NAT: 194 return np.timedelta64(NPY_NAT) 195 elif is_timedelta64_object(ts): 196 ts = ts.astype(f"m8[{unit.lower()}]") 197 elif is_integer_object(ts): 198 if ts == NPY_NAT: 199 return np.timedelta64(NPY_NAT) 200 else: 201 if unit in ['Y', 'M', 'W']: 202 ts = np.timedelta64(ts, unit) 203 else: 204 ts = cast_from_unit(ts, unit) 205 ts = np.timedelta64(ts) 206 elif is_float_object(ts): 207 if unit in ['Y', 'M', 'W']: 208 ts = np.timedelta64(int(ts), unit) 209 else: 210 ts = cast_from_unit(ts, unit) 211 ts = np.timedelta64(ts) 212 elif isinstance(ts, str): 213 if len(ts) > 0 and ts[0] == 'P': 214 ts = parse_iso_format_string(ts) 215 else: 216 ts = parse_timedelta_string(ts) 217 ts = np.timedelta64(ts) 218 elif is_tick_object(ts): 219 ts = np.timedelta64(ts.nanos, 'ns') 220 221 if PyDelta_Check(ts): 222 ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') 223 elif not is_timedelta64_object(ts): 224 raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") 225 return ts.astype('timedelta64[ns]') 226 227 228@cython.boundscheck(False) 229@cython.wraparound(False) 230def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"): 231 """ 232 Convert an ndarray to an array of timedeltas. If errors == 'coerce', 233 coerce non-convertible objects to NaT. Otherwise, raise. 234 """ 235 236 cdef: 237 Py_ssize_t i, n 238 int64_t[:] iresult 239 240 if errors not in {'ignore', 'raise', 'coerce'}: 241 raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") 242 243 n = values.shape[0] 244 result = np.empty(n, dtype='m8[ns]') 245 iresult = result.view('i8') 246 247 if unit is not None: 248 for i in range(n): 249 if isinstance(values[i], str) and errors != "coerce": 250 raise ValueError( 251 "unit must not be specified if the input contains a str" 252 ) 253 254 # Usually, we have all strings. If so, we hit the fast path. 255 # If this path fails, we try conversion a different way, and 256 # this is where all of the error handling will take place. 257 try: 258 for i in range(n): 259 if values[i] is NaT: 260 # we allow this check in the fast-path because NaT is a C-object 261 # so this is an inexpensive check 262 iresult[i] = NPY_NAT 263 else: 264 result[i] = parse_timedelta_string(values[i]) 265 except (TypeError, ValueError): 266 parsed_unit = parse_timedelta_unit(unit or 'ns') 267 for i in range(n): 268 try: 269 result[i] = convert_to_timedelta64(values[i], parsed_unit) 270 except ValueError: 271 if errors == 'coerce': 272 result[i] = NPY_NAT 273 else: 274 raise 275 276 return iresult.base # .base to access underlying np.ndarray 277 278 279cdef inline int64_t parse_timedelta_string(str ts) except? -1: 280 """ 281 Parse a regular format timedelta string. Return an int64_t (in ns) 282 or raise a ValueError on an invalid parse. 283 """ 284 285 cdef: 286 unicode c 287 bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 288 object current_unit = None 289 int64_t result = 0, m = 0, r 290 list number = [], frac = [], unit = [] 291 292 # neg : tracks if we have a leading negative for the value 293 # have_dot : tracks if we are processing a dot (either post hhmmss or 294 # inside an expression) 295 # have_value : track if we have at least 1 leading unit 296 # have_hhmmss : tracks if we have a regular format hh:mm:ss 297 298 if len(ts) == 0 or ts in nat_strings: 299 return NPY_NAT 300 301 for c in ts: 302 303 # skip whitespace / commas 304 if c == ' ' or c == ',': 305 pass 306 307 # positive signs are ignored 308 elif c == '+': 309 pass 310 311 # neg 312 elif c == '-': 313 314 if neg or have_value or have_hhmmss: 315 raise ValueError("only leading negative signs are allowed") 316 317 neg = 1 318 319 # number (ascii codes) 320 elif ord(c) >= 48 and ord(c) <= 57: 321 322 if have_dot: 323 324 # we found a dot, but now its just a fraction 325 if len(unit): 326 number.append(c) 327 have_dot = 0 328 else: 329 frac.append(c) 330 331 elif not len(unit): 332 number.append(c) 333 334 else: 335 r = timedelta_from_spec(number, frac, unit) 336 unit, number, frac = [], [c], [] 337 338 result += timedelta_as_neg(r, neg) 339 340 # hh:mm:ss. 341 elif c == ':': 342 343 # we flip this off if we have a leading value 344 if have_value: 345 neg = 0 346 347 # we are in the pattern hh:mm:ss pattern 348 if len(number): 349 if current_unit is None: 350 current_unit = 'h' 351 m = 1000000000 * 3600 352 elif current_unit == 'h': 353 current_unit = 'm' 354 m = 1000000000 * 60 355 elif current_unit == 'm': 356 current_unit = 's' 357 m = 1000000000 358 r = <int64_t>int(''.join(number)) * m 359 result += timedelta_as_neg(r, neg) 360 have_hhmmss = 1 361 else: 362 raise ValueError(f"expecting hh:mm:ss format, received: {ts}") 363 364 unit, number = [], [] 365 366 # after the decimal point 367 elif c == '.': 368 369 if len(number) and current_unit is not None: 370 371 # by definition we had something like 372 # so we need to evaluate the final field from a 373 # hh:mm:ss (so current_unit is 'm') 374 if current_unit != 'm': 375 raise ValueError("expected hh:mm:ss format before .") 376 m = 1000000000 377 r = <int64_t>int(''.join(number)) * m 378 result += timedelta_as_neg(r, neg) 379 have_value = 1 380 unit, number, frac = [], [], [] 381 382 have_dot = 1 383 384 # unit 385 else: 386 unit.append(c) 387 have_value = 1 388 have_dot = 0 389 390 # we had a dot, but we have a fractional 391 # value since we have an unit 392 if have_dot and len(unit): 393 r = timedelta_from_spec(number, frac, unit) 394 result += timedelta_as_neg(r, neg) 395 396 # we have a dot as part of a regular format 397 # e.g. hh:mm:ss.fffffff 398 elif have_dot: 399 400 if ((len(number) or len(frac)) and not len(unit) 401 and current_unit is None): 402 raise ValueError("no units specified") 403 404 if len(frac) > 0 and len(frac) <= 3: 405 m = 10**(3 -len(frac)) * 1000 * 1000 406 elif len(frac) > 3 and len(frac) <= 6: 407 m = 10**(6 -len(frac)) * 1000 408 elif len(frac) > 6 and len(frac) <= 9: 409 m = 10**(9 -len(frac)) 410 else: 411 m = 1 412 frac = frac[:9] 413 r = <int64_t>int(''.join(frac)) * m 414 result += timedelta_as_neg(r, neg) 415 416 # we have a regular format 417 # we must have seconds at this point (hence the unit is still 'm') 418 elif current_unit is not None: 419 if current_unit != 'm': 420 raise ValueError("expected hh:mm:ss format") 421 m = 1000000000 422 r = <int64_t>int(''.join(number)) * m 423 result += timedelta_as_neg(r, neg) 424 425 # we have a last abbreviation 426 elif len(unit): 427 if len(number): 428 r = timedelta_from_spec(number, frac, unit) 429 result += timedelta_as_neg(r, neg) 430 else: 431 raise ValueError("unit abbreviation w/o a number") 432 433 # treat as nanoseconds 434 # but only if we don't have anything else 435 else: 436 if have_value: 437 raise ValueError("have leftover units") 438 if len(number): 439 r = timedelta_from_spec(number, frac, 'ns') 440 result += timedelta_as_neg(r, neg) 441 442 return result 443 444 445cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): 446 """ 447 448 Parameters 449 ---------- 450 value : int64_t of the timedelta value 451 neg : boolean if the a negative value 452 """ 453 if neg: 454 return -value 455 return value 456 457 458cdef inline timedelta_from_spec(object number, object frac, object unit): 459 """ 460 461 Parameters 462 ---------- 463 number : a list of number digits 464 frac : a list of frac digits 465 unit : a list of unit characters 466 """ 467 cdef: 468 str n 469 470 try: 471 unit = ''.join(unit) 472 473 if unit in ["M", "Y", "y"]: 474 warnings.warn( 475 "Units 'M', 'Y' and 'y' do not represent unambiguous " 476 "timedelta values and will be removed in a future version", 477 FutureWarning, 478 stacklevel=2, 479 ) 480 481 if unit == 'M': 482 # To parse ISO 8601 string, 'M' should be treated as minute, 483 # not month 484 unit = 'm' 485 unit = parse_timedelta_unit(unit) 486 except KeyError: 487 raise ValueError(f"invalid abbreviation: {unit}") 488 489 n = ''.join(number) + '.' + ''.join(frac) 490 return cast_from_unit(float(n), unit) 491 492 493cpdef inline str parse_timedelta_unit(str unit): 494 """ 495 Parameters 496 ---------- 497 unit : str or None 498 499 Returns 500 ------- 501 str 502 Canonical unit string. 503 504 Raises 505 ------ 506 ValueError : on non-parseable input 507 """ 508 if unit is None: 509 return "ns" 510 elif unit == "M": 511 return unit 512 try: 513 return timedelta_abbrevs[unit.lower()] 514 except (KeyError, AttributeError): 515 raise ValueError(f"invalid unit abbreviation: {unit}") 516 517# ---------------------------------------------------------------------- 518# Timedelta ops utilities 519 520cdef bint _validate_ops_compat(other): 521 # return True if we are compat with operating 522 if checknull_with_nat(other): 523 return True 524 elif is_any_td_scalar(other): 525 return True 526 elif isinstance(other, str): 527 return True 528 return False 529 530 531def _op_unary_method(func, name): 532 def f(self): 533 return Timedelta(func(self.value), unit='ns') 534 f.__name__ = name 535 return f 536 537 538def _binary_op_method_timedeltalike(op, name): 539 # define a binary operation that only works if the other argument is 540 # timedelta like or an array of timedeltalike 541 def f(self, other): 542 if other is NaT: 543 return NaT 544 545 elif is_datetime64_object(other) or ( 546 PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)): 547 # this case is for a datetime object that is specifically 548 # *not* a Timestamp, as the Timestamp case will be 549 # handled after `_validate_ops_compat` returns False below 550 from pandas._libs.tslibs.timestamps import Timestamp 551 return op(self, Timestamp(other)) 552 # We are implicitly requiring the canonical behavior to be 553 # defined by Timestamp methods. 554 555 elif is_array(other): 556 # nd-array like 557 if other.dtype.kind in ['m', 'M']: 558 return op(self.to_timedelta64(), other) 559 elif other.dtype.kind == 'O': 560 return np.array([op(self, x) for x in other]) 561 else: 562 return NotImplemented 563 564 elif not _validate_ops_compat(other): 565 # Includes any of our non-cython classes 566 return NotImplemented 567 568 try: 569 other = Timedelta(other) 570 except ValueError: 571 # failed to parse as timedelta 572 return NotImplemented 573 574 if other is NaT: 575 # e.g. if original other was timedelta64('NaT') 576 return NaT 577 return Timedelta(op(self.value, other.value), unit='ns') 578 579 f.__name__ = name 580 return f 581 582 583# ---------------------------------------------------------------------- 584# Timedelta Construction 585 586cdef inline int64_t parse_iso_format_string(str ts) except? -1: 587 """ 588 Extracts and cleanses the appropriate values from a match object with 589 groups for each component of an ISO 8601 duration 590 591 Parameters 592 ---------- 593 ts: str 594 ISO 8601 Duration formatted string 595 596 Returns 597 ------- 598 ns: int64_t 599 Precision in nanoseconds of matched ISO 8601 duration 600 601 Raises 602 ------ 603 ValueError 604 If ``ts`` cannot be parsed 605 """ 606 607 cdef: 608 unicode c 609 int64_t result = 0, r 610 int p = 0 611 object dec_unit = 'ms', err_msg 612 bint have_dot = 0, have_value = 0, neg = 0 613 list number = [], unit = [] 614 615 err_msg = f"Invalid ISO 8601 Duration format - {ts}" 616 617 for c in ts: 618 # number (ascii codes) 619 if 48 <= ord(c) <= 57: 620 621 have_value = 1 622 if have_dot: 623 if p == 3 and dec_unit != 'ns': 624 unit.append(dec_unit) 625 if dec_unit == 'ms': 626 dec_unit = 'us' 627 elif dec_unit == 'us': 628 dec_unit = 'ns' 629 p = 0 630 p += 1 631 632 if not len(unit): 633 number.append(c) 634 else: 635 r = timedelta_from_spec(number, '0', unit) 636 result += timedelta_as_neg(r, neg) 637 638 neg = 0 639 unit, number = [], [c] 640 else: 641 if c == 'P' or c == 'T': 642 pass # ignore marking characters P and T 643 elif c == '-': 644 if neg or have_value: 645 raise ValueError(err_msg) 646 else: 647 neg = 1 648 elif c in ['W', 'D', 'H', 'M']: 649 if c in ['H', 'M'] and len(number) > 2: 650 raise ValueError(err_msg) 651 if c == 'M': 652 c = 'min' 653 unit.append(c) 654 r = timedelta_from_spec(number, '0', unit) 655 result += timedelta_as_neg(r, neg) 656 657 neg = 0 658 unit, number = [], [] 659 elif c == '.': 660 # append any seconds 661 if len(number): 662 r = timedelta_from_spec(number, '0', 'S') 663 result += timedelta_as_neg(r, neg) 664 unit, number = [], [] 665 have_dot = 1 666 elif c == 'S': 667 if have_dot: # ms, us, or ns 668 if not len(number) or p > 3: 669 raise ValueError(err_msg) 670 # pad to 3 digits as required 671 pad = 3 - p 672 while pad > 0: 673 number.append('0') 674 pad -= 1 675 676 r = timedelta_from_spec(number, '0', dec_unit) 677 result += timedelta_as_neg(r, neg) 678 else: # seconds 679 r = timedelta_from_spec(number, '0', 'S') 680 result += timedelta_as_neg(r, neg) 681 else: 682 raise ValueError(err_msg) 683 684 if not have_value: 685 # Received string only - never parsed any values 686 raise ValueError(err_msg) 687 688 return result 689 690 691cdef _to_py_int_float(v): 692 # Note: This used to be defined inside Timedelta.__new__ 693 # but cython will not allow `cdef` functions to be defined dynamically. 694 if is_integer_object(v): 695 return int(v) 696 elif is_float_object(v): 697 return float(v) 698 raise TypeError(f"Invalid type {type(v)}. Must be int or float.") 699 700 701# Similar to Timestamp/datetime, this is a construction requirement for 702# timedeltas that we need to do object instantiation in python. This will 703# serve as a C extension type that shadows the Python class, where we do any 704# heavy lifting. 705cdef class _Timedelta(timedelta): 706 # cdef readonly: 707 # int64_t value # nanoseconds 708 # object freq # frequency reference 709 # bint is_populated # are my components populated 710 # int64_t _d, _h, _m, _s, _ms, _us, _ns 711 712 # higher than np.ndarray and np.matrix 713 __array_priority__ = 100 714 715 def __hash__(_Timedelta self): 716 if self._has_ns(): 717 return hash(self.value) 718 else: 719 return timedelta.__hash__(self) 720 721 def __richcmp__(_Timedelta self, object other, int op): 722 cdef: 723 _Timedelta ots 724 int ndim 725 726 if isinstance(other, _Timedelta): 727 ots = other 728 elif is_any_td_scalar(other): 729 ots = Timedelta(other) 730 # TODO: watch out for overflows 731 732 elif other is NaT: 733 return op == Py_NE 734 735 elif util.is_array(other): 736 # TODO: watch out for zero-dim 737 if other.dtype.kind == "m": 738 return PyObject_RichCompare(self.asm8, other, op) 739 elif other.dtype.kind == "O": 740 # operate element-wise 741 return np.array( 742 [PyObject_RichCompare(self, x, op) for x in other], 743 dtype=bool, 744 ) 745 if op == Py_EQ: 746 return np.zeros(other.shape, dtype=bool) 747 elif op == Py_NE: 748 return np.ones(other.shape, dtype=bool) 749 return NotImplemented # let other raise TypeError 750 751 else: 752 return NotImplemented 753 754 return cmp_scalar(self.value, ots.value, op) 755 756 cpdef bint _has_ns(self): 757 return self.value % 1000 != 0 758 759 def _ensure_components(_Timedelta self): 760 """ 761 compute the components 762 """ 763 if self.is_populated: 764 return 765 766 cdef: 767 pandas_timedeltastruct tds 768 769 td64_to_tdstruct(self.value, &tds) 770 self._d = tds.days 771 self._h = tds.hrs 772 self._m = tds.min 773 self._s = tds.sec 774 self._ms = tds.ms 775 self._us = tds.us 776 self._ns = tds.ns 777 self._seconds = tds.seconds 778 self._microseconds = tds.microseconds 779 780 self.is_populated = 1 781 782 cpdef timedelta to_pytimedelta(_Timedelta self): 783 """ 784 Convert a pandas Timedelta object into a python timedelta object. 785 786 Timedelta objects are internally saved as numpy datetime64[ns] dtype. 787 Use to_pytimedelta() to convert to object dtype. 788 789 Returns 790 ------- 791 datetime.timedelta or numpy.array of datetime.timedelta 792 793 See Also 794 -------- 795 to_timedelta : Convert argument to Timedelta type. 796 797 Notes 798 ----- 799 Any nanosecond resolution will be lost. 800 """ 801 return timedelta(microseconds=int(self.value) / 1000) 802 803 def to_timedelta64(self) -> np.timedelta64: 804 """ 805 Return a numpy.timedelta64 object with 'ns' precision. 806 """ 807 return np.timedelta64(self.value, 'ns') 808 809 def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: 810 """ 811 Convert the Timedelta to a NumPy timedelta64. 812 813 .. versionadded:: 0.25.0 814 815 This is an alias method for `Timedelta.to_timedelta64()`. The dtype and 816 copy parameters are available here only for compatibility. Their values 817 will not affect the return value. 818 819 Returns 820 ------- 821 numpy.timedelta64 822 823 See Also 824 -------- 825 Series.to_numpy : Similar method for Series. 826 """ 827 return self.to_timedelta64() 828 829 def view(self, dtype): 830 """ 831 Array view compatibility. 832 """ 833 return np.timedelta64(self.value).view(dtype) 834 835 @property 836 def components(self): 837 """ 838 Return a components namedtuple-like. 839 """ 840 self._ensure_components() 841 # return the named tuple 842 return Components(self._d, self._h, self._m, self._s, 843 self._ms, self._us, self._ns) 844 845 @property 846 def delta(self): 847 """ 848 Return the timedelta in nanoseconds (ns), for internal compatibility. 849 850 Returns 851 ------- 852 int 853 Timedelta in nanoseconds. 854 855 Examples 856 -------- 857 >>> td = pd.Timedelta('1 days 42 ns') 858 >>> td.delta 859 86400000000042 860 861 >>> td = pd.Timedelta('3 s') 862 >>> td.delta 863 3000000000 864 865 >>> td = pd.Timedelta('3 ms 5 us') 866 >>> td.delta 867 3005000 868 869 >>> td = pd.Timedelta(42, unit='ns') 870 >>> td.delta 871 42 872 """ 873 return self.value 874 875 @property 876 def asm8(self) -> np.timedelta64: 877 """ 878 Return a numpy timedelta64 array scalar view. 879 880 Provides access to the array scalar view (i.e. a combination of the 881 value and the units) associated with the numpy.timedelta64().view(), 882 including a 64-bit integer representation of the timedelta in 883 nanoseconds (Python int compatible). 884 885 Returns 886 ------- 887 numpy timedelta64 array scalar view 888 Array scalar view of the timedelta in nanoseconds. 889 890 Examples 891 -------- 892 >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') 893 >>> td.asm8 894 numpy.timedelta64(86520000003042,'ns') 895 896 >>> td = pd.Timedelta('2 min 3 s') 897 >>> td.asm8 898 numpy.timedelta64(123000000000,'ns') 899 900 >>> td = pd.Timedelta('3 ms 5 us') 901 >>> td.asm8 902 numpy.timedelta64(3005000,'ns') 903 904 >>> td = pd.Timedelta(42, unit='ns') 905 >>> td.asm8 906 numpy.timedelta64(42,'ns') 907 """ 908 return np.int64(self.value).view('m8[ns]') 909 910 @property 911 def resolution_string(self) -> str: 912 """ 913 Return a string representing the lowest timedelta resolution. 914 915 Each timedelta has a defined resolution that represents the lowest OR 916 most granular level of precision. Each level of resolution is 917 represented by a short string as defined below: 918 919 Resolution: Return value 920 921 * Days: 'D' 922 * Hours: 'H' 923 * Minutes: 'T' 924 * Seconds: 'S' 925 * Milliseconds: 'L' 926 * Microseconds: 'U' 927 * Nanoseconds: 'N' 928 929 Returns 930 ------- 931 str 932 Timedelta resolution. 933 934 Examples 935 -------- 936 >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') 937 >>> td.resolution_string 938 'N' 939 940 >>> td = pd.Timedelta('1 days 2 min 3 us') 941 >>> td.resolution_string 942 'U' 943 944 >>> td = pd.Timedelta('2 min 3 s') 945 >>> td.resolution_string 946 'S' 947 948 >>> td = pd.Timedelta(36, unit='us') 949 >>> td.resolution_string 950 'U' 951 """ 952 self._ensure_components() 953 if self._ns: 954 return "N" 955 elif self._us: 956 return "U" 957 elif self._ms: 958 return "L" 959 elif self._s: 960 return "S" 961 elif self._m: 962 return "T" 963 elif self._h: 964 return "H" 965 else: 966 return "D" 967 968 @property 969 def nanoseconds(self): 970 """ 971 Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. 972 973 Returns 974 ------- 975 int 976 Number of nanoseconds. 977 978 See Also 979 -------- 980 Timedelta.components : Return all attributes with assigned values 981 (i.e. days, hours, minutes, seconds, milliseconds, microseconds, 982 nanoseconds). 983 984 Examples 985 -------- 986 **Using string input** 987 988 >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') 989 990 >>> td.nanoseconds 991 42 992 993 **Using integer input** 994 995 >>> td = pd.Timedelta(42, unit='ns') 996 >>> td.nanoseconds 997 42 998 """ 999 self._ensure_components() 1000 return self._ns 1001 1002 def _repr_base(self, format=None) -> str: 1003 """ 1004 1005 Parameters 1006 ---------- 1007 format : None|all|sub_day|long 1008 1009 Returns 1010 ------- 1011 converted : string of a Timedelta 1012 1013 """ 1014 cdef object sign, seconds_pretty, subs, fmt, comp_dict 1015 1016 self._ensure_components() 1017 1018 if self._d < 0: 1019 sign = " +" 1020 else: 1021 sign = " " 1022 1023 if format == 'all': 1024 fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." 1025 "{milliseconds:03}{microseconds:03}{nanoseconds:03}") 1026 else: 1027 # if we have a partial day 1028 subs = (self._h or self._m or self._s or 1029 self._ms or self._us or self._ns) 1030 1031 if self._ms or self._us or self._ns: 1032 seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}" 1033 if self._ns: 1034 # GH#9309 1035 seconds_fmt += "{nanoseconds:03}" 1036 else: 1037 seconds_fmt = "{seconds:02}" 1038 1039 if format == 'sub_day' and not self._d: 1040 fmt = "{hours:02}:{minutes:02}:" + seconds_fmt 1041 elif subs or format == 'long': 1042 fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt 1043 else: 1044 fmt = "{days} days" 1045 1046 comp_dict = self.components._asdict() 1047 comp_dict['sign'] = sign 1048 1049 return fmt.format(**comp_dict) 1050 1051 def __repr__(self) -> str: 1052 repr_based = self._repr_base(format='long') 1053 return f"Timedelta('{repr_based}')" 1054 1055 def __str__(self) -> str: 1056 return self._repr_base(format='long') 1057 1058 def __bool__(self) -> bool: 1059 return self.value != 0 1060 1061 def isoformat(self) -> str: 1062 """ 1063 Format Timedelta as ISO 8601 Duration like 1064 ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the 1065 values. See https://en.wikipedia.org/wiki/ISO_8601#Durations. 1066 1067 Returns 1068 ------- 1069 str 1070 1071 See Also 1072 -------- 1073 Timestamp.isoformat : Function is used to convert the given 1074 Timestamp object into the ISO format. 1075 1076 Notes 1077 ----- 1078 The longest component is days, whose value may be larger than 1079 365. 1080 Every component is always included, even if its value is 0. 1081 Pandas uses nanosecond precision, so up to 9 decimal places may 1082 be included in the seconds component. 1083 Trailing 0's are removed from the seconds component after the decimal. 1084 We do not 0 pad components, so it's `...T5H...`, not `...T05H...` 1085 1086 Examples 1087 -------- 1088 >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, 1089 ... milliseconds=10, microseconds=10, nanoseconds=12) 1090 1091 >>> td.isoformat() 1092 'P6DT0H50M3.010010012S' 1093 >>> pd.Timedelta(hours=1, seconds=10).isoformat() 1094 'P0DT0H0M10S' 1095 >>> pd.Timedelta(hours=1, seconds=10).isoformat() 1096 'P0DT0H0M10S' 1097 >>> pd.Timedelta(days=500.5).isoformat() 1098 'P500DT12H0MS' 1099 """ 1100 components = self.components 1101 seconds = (f'{components.seconds}.' 1102 f'{components.milliseconds:0>3}' 1103 f'{components.microseconds:0>3}' 1104 f'{components.nanoseconds:0>3}') 1105 # Trim unnecessary 0s, 1.000000000 -> 1 1106 seconds = seconds.rstrip('0').rstrip('.') 1107 tpl = (f'P{components.days}DT{components.hours}' 1108 f'H{components.minutes}M{seconds}S') 1109 return tpl 1110 1111 1112# Python front end to C extension type _Timedelta 1113# This serves as the box for timedelta64 1114 1115class Timedelta(_Timedelta): 1116 """ 1117 Represents a duration, the difference between two dates or times. 1118 1119 Timedelta is the pandas equivalent of python's ``datetime.timedelta`` 1120 and is interchangeable with it in most cases. 1121 1122 Parameters 1123 ---------- 1124 value : Timedelta, timedelta, np.timedelta64, str, or int 1125 unit : str, default 'ns' 1126 Denote the unit of the input, if input is an integer. 1127 1128 Possible values: 1129 1130 * 'W', 'D', 'T', 'S', 'L', 'U', or 'N' 1131 * 'days' or 'day' 1132 * 'hours', 'hour', 'hr', or 'h' 1133 * 'minutes', 'minute', 'min', or 'm' 1134 * 'seconds', 'second', or 'sec' 1135 * 'milliseconds', 'millisecond', 'millis', or 'milli' 1136 * 'microseconds', 'microsecond', 'micros', or 'micro' 1137 * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'. 1138 1139 **kwargs 1140 Available kwargs: {days, seconds, microseconds, 1141 milliseconds, minutes, hours, weeks}. 1142 Values for construction in compat with datetime.timedelta. 1143 Numpy ints and floats will be coerced to python ints and floats. 1144 1145 Notes 1146 ----- 1147 The ``.value`` attribute is always in ns. 1148 1149 If the precision is higher than nanoseconds, the precision of the duration is 1150 truncated to nanoseconds. 1151 """ 1152 1153 def __new__(cls, object value=_no_input, unit=None, **kwargs): 1154 cdef _Timedelta td_base 1155 1156 if value is _no_input: 1157 if not len(kwargs): 1158 raise ValueError("cannot construct a Timedelta without a " 1159 "value/unit or descriptive keywords " 1160 "(days,seconds....)") 1161 1162 kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} 1163 1164 nano = convert_to_timedelta64(kwargs.pop('nanoseconds', 0), 'ns') 1165 try: 1166 value = nano + convert_to_timedelta64(timedelta(**kwargs), 1167 'ns') 1168 except TypeError as e: 1169 raise ValueError( 1170 "cannot construct a Timedelta from the passed arguments, " 1171 "allowed keywords are " 1172 "[weeks, days, hours, minutes, seconds, " 1173 "milliseconds, microseconds, nanoseconds]" 1174 ) 1175 1176 if unit in {'Y', 'y', 'M'}: 1177 raise ValueError( 1178 "Units 'M', 'Y', and 'y' are no longer supported, as they do not " 1179 "represent unambiguous timedelta values durations." 1180 ) 1181 1182 # GH 30543 if pd.Timedelta already passed, return it 1183 # check that only value is passed 1184 if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: 1185 return value 1186 elif isinstance(value, _Timedelta): 1187 value = value.value 1188 elif isinstance(value, str): 1189 if unit is not None: 1190 raise ValueError("unit must not be specified if the value is a str") 1191 if len(value) > 0 and value[0] == 'P': 1192 value = parse_iso_format_string(value) 1193 else: 1194 value = parse_timedelta_string(value) 1195 value = np.timedelta64(value) 1196 elif PyDelta_Check(value): 1197 value = convert_to_timedelta64(value, 'ns') 1198 elif is_timedelta64_object(value): 1199 if unit is not None: 1200 value = value.astype(f'timedelta64[{unit}]') 1201 value = value.astype('timedelta64[ns]') 1202 elif is_tick_object(value): 1203 value = np.timedelta64(value.nanos, 'ns') 1204 elif is_integer_object(value) or is_float_object(value): 1205 # unit=None is de-facto 'ns' 1206 unit = parse_timedelta_unit(unit) 1207 value = convert_to_timedelta64(value, unit) 1208 elif checknull_with_nat(value): 1209 return NaT 1210 else: 1211 raise ValueError( 1212 "Value must be Timedelta, string, integer, " 1213 f"float, timedelta or convertible, not {type(value).__name__}" 1214 ) 1215 1216 if is_timedelta64_object(value): 1217 value = value.view('i8') 1218 1219 # nat 1220 if value == NPY_NAT: 1221 return NaT 1222 1223 # make timedelta happy 1224 td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000) 1225 td_base.value = value 1226 td_base.is_populated = 0 1227 return td_base 1228 1229 def __setstate__(self, state): 1230 (value) = state 1231 self.value = value 1232 1233 def __reduce__(self): 1234 object_state = self.value, 1235 return (Timedelta, object_state) 1236 1237 def _round(self, freq, rounder): 1238 cdef: 1239 int64_t result, unit 1240 1241 from pandas._libs.tslibs.offsets import to_offset 1242 unit = to_offset(freq).nanos 1243 result = unit * rounder(self.value / float(unit)) 1244 return Timedelta(result, unit='ns') 1245 1246 def round(self, freq): 1247 """ 1248 Round the Timedelta to the specified resolution. 1249 1250 Parameters 1251 ---------- 1252 freq : str 1253 Frequency string indicating the rounding resolution. 1254 1255 Returns 1256 ------- 1257 a new Timedelta rounded to the given resolution of `freq` 1258 1259 Raises 1260 ------ 1261 ValueError if the freq cannot be converted 1262 """ 1263 return self._round(freq, np.round) 1264 1265 def floor(self, freq): 1266 """ 1267 Return a new Timedelta floored to this resolution. 1268 1269 Parameters 1270 ---------- 1271 freq : str 1272 Frequency string indicating the flooring resolution. 1273 """ 1274 return self._round(freq, np.floor) 1275 1276 def ceil(self, freq): 1277 """ 1278 Return a new Timedelta ceiled to this resolution. 1279 1280 Parameters 1281 ---------- 1282 freq : str 1283 Frequency string indicating the ceiling resolution. 1284 """ 1285 return self._round(freq, np.ceil) 1286 1287 # ---------------------------------------------------------------- 1288 # Arithmetic Methods 1289 # TODO: Can some of these be defined in the cython class? 1290 1291 __inv__ = _op_unary_method(lambda x: -x, '__inv__') 1292 __neg__ = _op_unary_method(lambda x: -x, '__neg__') 1293 __pos__ = _op_unary_method(lambda x: x, '__pos__') 1294 __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') 1295 1296 __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') 1297 __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') 1298 __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') 1299 __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') 1300 1301 def __mul__(self, other): 1302 if is_integer_object(other) or is_float_object(other): 1303 return Timedelta(other * self.value, unit='ns') 1304 1305 elif is_array(other): 1306 # ndarray-like 1307 return other * self.to_timedelta64() 1308 1309 return NotImplemented 1310 1311 __rmul__ = __mul__ 1312 1313 def __truediv__(self, other): 1314 if _should_cast_to_timedelta(other): 1315 # We interpret NaT as timedelta64("NaT") 1316 other = Timedelta(other) 1317 if other is NaT: 1318 return np.nan 1319 return self.value / float(other.value) 1320 1321 elif is_integer_object(other) or is_float_object(other): 1322 # integers or floats 1323 return Timedelta(self.value / other, unit='ns') 1324 1325 elif is_array(other): 1326 return self.to_timedelta64() / other 1327 1328 return NotImplemented 1329 1330 def __rtruediv__(self, other): 1331 if _should_cast_to_timedelta(other): 1332 # We interpret NaT as timedelta64("NaT") 1333 other = Timedelta(other) 1334 if other is NaT: 1335 return np.nan 1336 return float(other.value) / self.value 1337 1338 elif is_array(other): 1339 if other.dtype.kind == "O": 1340 # GH#31869 1341 return np.array([x / self for x in other]) 1342 return other / self.to_timedelta64() 1343 1344 return NotImplemented 1345 1346 def __floordiv__(self, other): 1347 # numpy does not implement floordiv for timedelta64 dtype, so we cannot 1348 # just defer 1349 if _should_cast_to_timedelta(other): 1350 # We interpret NaT as timedelta64("NaT") 1351 other = Timedelta(other) 1352 if other is NaT: 1353 return np.nan 1354 return self.value // other.value 1355 1356 elif is_integer_object(other) or is_float_object(other): 1357 return Timedelta(self.value // other, unit='ns') 1358 1359 elif is_array(other): 1360 if other.dtype.kind == 'm': 1361 # also timedelta-like 1362 return _broadcast_floordiv_td64(self.value, other, _floordiv) 1363 elif other.dtype.kind in ['i', 'u', 'f']: 1364 if other.ndim == 0: 1365 return Timedelta(self.value // other) 1366 else: 1367 return self.to_timedelta64() // other 1368 1369 raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') 1370 1371 return NotImplemented 1372 1373 def __rfloordiv__(self, other): 1374 # numpy does not implement floordiv for timedelta64 dtype, so we cannot 1375 # just defer 1376 if _should_cast_to_timedelta(other): 1377 # We interpret NaT as timedelta64("NaT") 1378 other = Timedelta(other) 1379 if other is NaT: 1380 return np.nan 1381 return other.value // self.value 1382 1383 elif is_array(other): 1384 if other.dtype.kind == 'm': 1385 # also timedelta-like 1386 return _broadcast_floordiv_td64(self.value, other, _rfloordiv) 1387 1388 # Includes integer array // Timedelta, disallowed in GH#19761 1389 raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') 1390 1391 return NotImplemented 1392 1393 def __mod__(self, other): 1394 # Naive implementation, room for optimization 1395 return self.__divmod__(other)[1] 1396 1397 def __rmod__(self, other): 1398 # Naive implementation, room for optimization 1399 return self.__rdivmod__(other)[1] 1400 1401 def __divmod__(self, other): 1402 # Naive implementation, room for optimization 1403 div = self // other 1404 return div, self - div * other 1405 1406 def __rdivmod__(self, other): 1407 # Naive implementation, room for optimization 1408 div = other // self 1409 return div, other - div * self 1410 1411 1412cdef bint is_any_td_scalar(object obj): 1413 """ 1414 Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))` 1415 1416 Parameters 1417 ---------- 1418 obj : object 1419 1420 Returns 1421 ------- 1422 bool 1423 """ 1424 return ( 1425 PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj) 1426 ) 1427 1428 1429cdef bint _should_cast_to_timedelta(object obj): 1430 """ 1431 Should we treat this object as a Timedelta for the purpose of a binary op 1432 """ 1433 return ( 1434 is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str) 1435 ) 1436 1437 1438cdef _floordiv(int64_t value, right): 1439 return value // right 1440 1441 1442cdef _rfloordiv(int64_t value, right): 1443 # analogous to referencing operator.div, but there is no operator.rfloordiv 1444 return right // value 1445 1446 1447cdef _broadcast_floordiv_td64( 1448 int64_t value, 1449 ndarray other, 1450 object (*operation)(int64_t value, object right) 1451): 1452 """ 1453 Boilerplate code shared by Timedelta.__floordiv__ and 1454 Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. 1455 1456 Parameters 1457 ---------- 1458 value : int64_t; `self.value` from a Timedelta object 1459 other : object 1460 operation : function, either _floordiv or _rfloordiv 1461 1462 Returns 1463 ------- 1464 result : varies based on `other` 1465 """ 1466 # assumes other.dtype.kind == 'm', i.e. other is timedelta-like 1467 1468 # We need to watch out for np.timedelta64('NaT'). 1469 mask = other.view('i8') == NPY_NAT 1470 1471 if other.ndim == 0: 1472 if mask: 1473 return np.nan 1474 1475 return operation(value, other.astype('m8[ns]').astype('i8')) 1476 1477 else: 1478 res = operation(value, other.astype('m8[ns]').astype('i8')) 1479 1480 if mask.any(): 1481 res = res.astype('f8') 1482 res[mask] = np.nan 1483 return res 1484 1485 1486# resolution in ns 1487Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) 1488Timedelta.max = Timedelta(np.iinfo(np.int64).max) 1489Timedelta.resolution = Timedelta(nanoseconds=1) 1490