1import numbers 2from operator import le, lt 3 4from cpython.datetime cimport PyDateTime_IMPORT, PyDelta_Check 5 6PyDateTime_IMPORT 7 8from cpython.object cimport ( 9 Py_EQ, 10 Py_GE, 11 Py_GT, 12 Py_LE, 13 Py_LT, 14 Py_NE, 15 PyObject_RichCompare, 16) 17 18import cython 19from cython import Py_ssize_t 20import numpy as np 21 22cimport numpy as cnp 23from numpy cimport ( 24 NPY_QUICKSORT, 25 PyArray_ArgSort, 26 PyArray_Take, 27 float32_t, 28 float64_t, 29 int32_t, 30 int64_t, 31 ndarray, 32 uint64_t, 33) 34 35cnp.import_array() 36 37 38from pandas._libs cimport util 39from pandas._libs.hashtable cimport Int64Vector 40from pandas._libs.tslibs.timedeltas cimport _Timedelta 41from pandas._libs.tslibs.timestamps cimport _Timestamp 42from pandas._libs.tslibs.timezones cimport tz_compare 43from pandas._libs.tslibs.util cimport ( 44 is_float_object, 45 is_integer_object, 46 is_timedelta64_object, 47) 48 49VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) 50 51 52cdef class IntervalMixin: 53 54 @property 55 def closed_left(self): 56 """ 57 Check if the interval is closed on the left side. 58 59 For the meaning of `closed` and `open` see :class:`~pandas.Interval`. 60 61 Returns 62 ------- 63 bool 64 True if the Interval is closed on the left-side. 65 """ 66 return self.closed in ('left', 'both') 67 68 @property 69 def closed_right(self): 70 """ 71 Check if the interval is closed on the right side. 72 73 For the meaning of `closed` and `open` see :class:`~pandas.Interval`. 74 75 Returns 76 ------- 77 bool 78 True if the Interval is closed on the left-side. 79 """ 80 return self.closed in ('right', 'both') 81 82 @property 83 def open_left(self): 84 """ 85 Check if the interval is open on the left side. 86 87 For the meaning of `closed` and `open` see :class:`~pandas.Interval`. 88 89 Returns 90 ------- 91 bool 92 True if the Interval is closed on the left-side. 93 """ 94 return not self.closed_left 95 96 @property 97 def open_right(self): 98 """ 99 Check if the interval is open on the right side. 100 101 For the meaning of `closed` and `open` see :class:`~pandas.Interval`. 102 103 Returns 104 ------- 105 bool 106 True if the Interval is closed on the left-side. 107 """ 108 return not self.closed_right 109 110 @property 111 def mid(self): 112 """ 113 Return the midpoint of the Interval. 114 """ 115 try: 116 return 0.5 * (self.left + self.right) 117 except TypeError: 118 # datetime safe version 119 return self.left + 0.5 * self.length 120 121 @property 122 def length(self): 123 """ 124 Return the length of the Interval. 125 """ 126 return self.right - self.left 127 128 @property 129 def is_empty(self): 130 """ 131 Indicates if an interval is empty, meaning it contains no points. 132 133 .. versionadded:: 0.25.0 134 135 Returns 136 ------- 137 bool or ndarray 138 A boolean indicating if a scalar :class:`Interval` is empty, or a 139 boolean ``ndarray`` positionally indicating if an ``Interval`` in 140 an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is 141 empty. 142 143 Examples 144 -------- 145 An :class:`Interval` that contains points is not empty: 146 147 >>> pd.Interval(0, 1, closed='right').is_empty 148 False 149 150 An ``Interval`` that does not contain any points is empty: 151 152 >>> pd.Interval(0, 0, closed='right').is_empty 153 True 154 >>> pd.Interval(0, 0, closed='left').is_empty 155 True 156 >>> pd.Interval(0, 0, closed='neither').is_empty 157 True 158 159 An ``Interval`` that contains a single point is not empty: 160 161 >>> pd.Interval(0, 0, closed='both').is_empty 162 False 163 164 An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a 165 boolean ``ndarray`` positionally indicating if an ``Interval`` is 166 empty: 167 168 >>> ivs = [pd.Interval(0, 0, closed='neither'), 169 ... pd.Interval(1, 2, closed='neither')] 170 >>> pd.arrays.IntervalArray(ivs).is_empty 171 array([ True, False]) 172 173 Missing values are not considered empty: 174 175 >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan] 176 >>> pd.IntervalIndex(ivs).is_empty 177 array([ True, False]) 178 """ 179 return (self.right == self.left) & (self.closed != 'both') 180 181 def _check_closed_matches(self, other, name='other'): 182 """ 183 Check if the closed attribute of `other` matches. 184 185 Note that 'left' and 'right' are considered different from 'both'. 186 187 Parameters 188 ---------- 189 other : Interval, IntervalIndex, IntervalArray 190 name : str 191 Name to use for 'other' in the error message. 192 193 Raises 194 ------ 195 ValueError 196 When `other` is not closed exactly the same as self. 197 """ 198 if self.closed != other.closed: 199 raise ValueError(f"'{name}.closed' is {repr(other.closed)}, " 200 f"expected {repr(self.closed)}.") 201 202 203cdef bint _interval_like(other): 204 return (hasattr(other, 'left') 205 and hasattr(other, 'right') 206 and hasattr(other, 'closed')) 207 208 209cdef class Interval(IntervalMixin): 210 """ 211 Immutable object implementing an Interval, a bounded slice-like interval. 212 213 Parameters 214 ---------- 215 left : orderable scalar 216 Left bound for the interval. 217 right : orderable scalar 218 Right bound for the interval. 219 closed : {'right', 'left', 'both', 'neither'}, default 'right' 220 Whether the interval is closed on the left-side, right-side, both or 221 neither. See the Notes for more detailed explanation. 222 223 See Also 224 -------- 225 IntervalIndex : An Index of Interval objects that are all closed on the 226 same side. 227 cut : Convert continuous data into discrete bins (Categorical 228 of Interval objects). 229 qcut : Convert continuous data into bins (Categorical of Interval objects) 230 based on quantiles. 231 Period : Represents a period of time. 232 233 Notes 234 ----- 235 The parameters `left` and `right` must be from the same type, you must be 236 able to compare them and they must satisfy ``left <= right``. 237 238 A closed interval (in mathematics denoted by square brackets) contains 239 its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the 240 conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for. 241 An open interval (in mathematics denoted by parentheses) does not contain 242 its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the 243 conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for. 244 Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is 245 described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is 246 described by ``0 < x <= 5`` (``closed='right'``). 247 248 Examples 249 -------- 250 It is possible to build Intervals of different types, like numeric ones: 251 252 >>> iv = pd.Interval(left=0, right=5) 253 >>> iv 254 Interval(0, 5, closed='right') 255 256 You can check if an element belongs to it 257 258 >>> 2.5 in iv 259 True 260 261 You can test the bounds (``closed='right'``, so ``0 < x <= 5``): 262 263 >>> 0 in iv 264 False 265 >>> 5 in iv 266 True 267 >>> 0.0001 in iv 268 True 269 270 Calculate its length 271 272 >>> iv.length 273 5 274 275 You can operate with `+` and `*` over an Interval and the operation 276 is applied to each of its bounds, so the result depends on the type 277 of the bound elements 278 279 >>> shifted_iv = iv + 3 280 >>> shifted_iv 281 Interval(3, 8, closed='right') 282 >>> extended_iv = iv * 10.0 283 >>> extended_iv 284 Interval(0.0, 50.0, closed='right') 285 286 To create a time interval you can use Timestamps as the bounds 287 288 >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'), 289 ... pd.Timestamp('2018-01-01 00:00:00'), 290 ... closed='left') 291 >>> pd.Timestamp('2017-01-01 00:00') in year_2017 292 True 293 >>> year_2017.length 294 Timedelta('365 days 00:00:00') 295 """ 296 _typ = "interval" 297 __array_priority__ = 1000 298 299 cdef readonly object left 300 """ 301 Left bound for the interval. 302 """ 303 304 cdef readonly object right 305 """ 306 Right bound for the interval. 307 """ 308 309 cdef readonly str closed 310 """ 311 Whether the interval is closed on the left-side, right-side, both or 312 neither. 313 """ 314 315 def __init__(self, left, right, str closed='right'): 316 # note: it is faster to just do these checks than to use a special 317 # constructor (__cinit__/__new__) to avoid them 318 319 self._validate_endpoint(left) 320 self._validate_endpoint(right) 321 322 if closed not in VALID_CLOSED: 323 raise ValueError(f"invalid option for 'closed': {closed}") 324 if not left <= right: 325 raise ValueError("left side of interval must be <= right side") 326 if (isinstance(left, _Timestamp) and 327 not tz_compare(left.tzinfo, right.tzinfo)): 328 # GH 18538 329 raise ValueError("left and right must have the same time zone, got " 330 f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}") 331 self.left = left 332 self.right = right 333 self.closed = closed 334 335 def _validate_endpoint(self, endpoint): 336 # GH 23013 337 if not (is_integer_object(endpoint) or is_float_object(endpoint) or 338 isinstance(endpoint, (_Timestamp, _Timedelta))): 339 raise ValueError("Only numeric, Timestamp and Timedelta endpoints " 340 "are allowed when constructing an Interval.") 341 342 def __hash__(self): 343 return hash((self.left, self.right, self.closed)) 344 345 def __contains__(self, key) -> bool: 346 if _interval_like(key): 347 raise TypeError("__contains__ not defined for two intervals") 348 return ((self.left < key if self.open_left else self.left <= key) and 349 (key < self.right if self.open_right else key <= self.right)) 350 351 def __richcmp__(self, other, op: int): 352 if isinstance(other, Interval): 353 self_tuple = (self.left, self.right, self.closed) 354 other_tuple = (other.left, other.right, other.closed) 355 return PyObject_RichCompare(self_tuple, other_tuple, op) 356 elif util.is_array(other): 357 return np.array( 358 [PyObject_RichCompare(self, x, op) for x in other], 359 dtype=bool, 360 ) 361 362 return NotImplemented 363 364 def __reduce__(self): 365 args = (self.left, self.right, self.closed) 366 return (type(self), args) 367 368 def _repr_base(self): 369 left = self.left 370 right = self.right 371 372 # TODO: need more general formatting methodology here 373 if isinstance(left, _Timestamp) and isinstance(right, _Timestamp): 374 left = left._short_repr 375 right = right._short_repr 376 377 return left, right 378 379 def __repr__(self) -> str: 380 381 left, right = self._repr_base() 382 name = type(self).__name__ 383 repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})' 384 return repr_str 385 386 def __str__(self) -> str: 387 388 left, right = self._repr_base() 389 start_symbol = '[' if self.closed_left else '(' 390 end_symbol = ']' if self.closed_right else ')' 391 return f'{start_symbol}{left}, {right}{end_symbol}' 392 393 def __add__(self, y): 394 if ( 395 isinstance(y, numbers.Number) 396 or PyDelta_Check(y) 397 or is_timedelta64_object(y) 398 ): 399 return Interval(self.left + y, self.right + y, closed=self.closed) 400 elif ( 401 isinstance(y, Interval) 402 and ( 403 isinstance(self, numbers.Number) 404 or PyDelta_Check(self) 405 or is_timedelta64_object(self) 406 ) 407 ): 408 return Interval(y.left + self, y.right + self, closed=y.closed) 409 return NotImplemented 410 411 def __sub__(self, y): 412 if ( 413 isinstance(y, numbers.Number) 414 or PyDelta_Check(y) 415 or is_timedelta64_object(y) 416 ): 417 return Interval(self.left - y, self.right - y, closed=self.closed) 418 return NotImplemented 419 420 def __mul__(self, y): 421 if isinstance(y, numbers.Number): 422 return Interval(self.left * y, self.right * y, closed=self.closed) 423 elif isinstance(y, Interval) and isinstance(self, numbers.Number): 424 return Interval(y.left * self, y.right * self, closed=y.closed) 425 return NotImplemented 426 427 def __truediv__(self, y): 428 if isinstance(y, numbers.Number): 429 return Interval(self.left / y, self.right / y, closed=self.closed) 430 return NotImplemented 431 432 def __floordiv__(self, y): 433 if isinstance(y, numbers.Number): 434 return Interval( 435 self.left // y, self.right // y, closed=self.closed) 436 return NotImplemented 437 438 def overlaps(self, other): 439 """ 440 Check whether two Interval objects overlap. 441 442 Two intervals overlap if they share a common point, including closed 443 endpoints. Intervals that only have an open endpoint in common do not 444 overlap. 445 446 .. versionadded:: 0.24.0 447 448 Parameters 449 ---------- 450 other : Interval 451 Interval to check against for an overlap. 452 453 Returns 454 ------- 455 bool 456 True if the two intervals overlap. 457 458 See Also 459 -------- 460 IntervalArray.overlaps : The corresponding method for IntervalArray. 461 IntervalIndex.overlaps : The corresponding method for IntervalIndex. 462 463 Examples 464 -------- 465 >>> i1 = pd.Interval(0, 2) 466 >>> i2 = pd.Interval(1, 3) 467 >>> i1.overlaps(i2) 468 True 469 >>> i3 = pd.Interval(4, 5) 470 >>> i1.overlaps(i3) 471 False 472 473 Intervals that share closed endpoints overlap: 474 475 >>> i4 = pd.Interval(0, 1, closed='both') 476 >>> i5 = pd.Interval(1, 2, closed='both') 477 >>> i4.overlaps(i5) 478 True 479 480 Intervals that only have an open endpoint in common do not overlap: 481 482 >>> i6 = pd.Interval(1, 2, closed='neither') 483 >>> i4.overlaps(i6) 484 False 485 """ 486 if not isinstance(other, Interval): 487 raise TypeError("`other` must be an Interval, " 488 f"got {type(other).__name__}") 489 490 # equality is okay if both endpoints are closed (overlap at a point) 491 op1 = le if (self.closed_left and other.closed_right) else lt 492 op2 = le if (other.closed_left and self.closed_right) else lt 493 494 # overlaps is equivalent negation of two interval being disjoint: 495 # disjoint = (A.left > B.right) or (B.left > A.right) 496 # (simplifying the negation allows this to be done in less operations) 497 return op1(self.left, other.right) and op2(other.left, self.right) 498 499 500@cython.wraparound(False) 501@cython.boundscheck(False) 502def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True): 503 """ 504 Parameters 505 ---------- 506 intervals : ndarray 507 Object array of Intervals / nulls. 508 509 validate_closed: bool, default True 510 Boolean indicating if all intervals must be closed on the same side. 511 Mismatching closed will raise if True, else return None for closed. 512 513 Returns 514 ------- 515 tuple of tuples 516 left : (ndarray, object, array) 517 right : (ndarray, object, array) 518 closed: str 519 """ 520 cdef: 521 object closed = None, interval 522 Py_ssize_t i, n = len(intervals) 523 ndarray left, right 524 bint seen_closed = False 525 526 left = np.empty(n, dtype=intervals.dtype) 527 right = np.empty(n, dtype=intervals.dtype) 528 529 for i in range(n): 530 interval = intervals[i] 531 if interval is None or util.is_nan(interval): 532 left[i] = np.nan 533 right[i] = np.nan 534 continue 535 536 if not isinstance(interval, Interval): 537 raise TypeError(f"type {type(interval)} with value " 538 f"{interval} is not an interval") 539 540 left[i] = interval.left 541 right[i] = interval.right 542 if not seen_closed: 543 seen_closed = True 544 closed = interval.closed 545 elif closed != interval.closed: 546 closed = None 547 if validate_closed: 548 raise ValueError("intervals must all be closed on the same side") 549 550 return left, right, closed 551 552 553include "intervaltree.pxi" 554