1import numbers
2from operator import le, lt
3
4from cpython.datetime cimport PyDateTime_IMPORT, PyDelta_Check
5
6PyDateTime_IMPORT
7
8from cpython.object cimport (
9    Py_EQ,
10    Py_GE,
11    Py_GT,
12    Py_LE,
13    Py_LT,
14    Py_NE,
15    PyObject_RichCompare,
16)
17
18import cython
19from cython import Py_ssize_t
20import numpy as np
21
22cimport numpy as cnp
23from numpy cimport (
24    NPY_QUICKSORT,
25    PyArray_ArgSort,
26    PyArray_Take,
27    float32_t,
28    float64_t,
29    int32_t,
30    int64_t,
31    ndarray,
32    uint64_t,
33)
34
35cnp.import_array()
36
37
38from pandas._libs cimport util
39from pandas._libs.hashtable cimport Int64Vector
40from pandas._libs.tslibs.timedeltas cimport _Timedelta
41from pandas._libs.tslibs.timestamps cimport _Timestamp
42from pandas._libs.tslibs.timezones cimport tz_compare
43from pandas._libs.tslibs.util cimport (
44    is_float_object,
45    is_integer_object,
46    is_timedelta64_object,
47)
48
49VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
50
51
52cdef class IntervalMixin:
53
54    @property
55    def closed_left(self):
56        """
57        Check if the interval is closed on the left side.
58
59        For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
60
61        Returns
62        -------
63        bool
64            True if the Interval is closed on the left-side.
65        """
66        return self.closed in ('left', 'both')
67
68    @property
69    def closed_right(self):
70        """
71        Check if the interval is closed on the right side.
72
73        For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
74
75        Returns
76        -------
77        bool
78            True if the Interval is closed on the left-side.
79        """
80        return self.closed in ('right', 'both')
81
82    @property
83    def open_left(self):
84        """
85        Check if the interval is open on the left side.
86
87        For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
88
89        Returns
90        -------
91        bool
92            True if the Interval is closed on the left-side.
93        """
94        return not self.closed_left
95
96    @property
97    def open_right(self):
98        """
99        Check if the interval is open on the right side.
100
101        For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
102
103        Returns
104        -------
105        bool
106            True if the Interval is closed on the left-side.
107        """
108        return not self.closed_right
109
110    @property
111    def mid(self):
112        """
113        Return the midpoint of the Interval.
114        """
115        try:
116            return 0.5 * (self.left + self.right)
117        except TypeError:
118            # datetime safe version
119            return self.left + 0.5 * self.length
120
121    @property
122    def length(self):
123        """
124        Return the length of the Interval.
125        """
126        return self.right - self.left
127
128    @property
129    def is_empty(self):
130        """
131        Indicates if an interval is empty, meaning it contains no points.
132
133        .. versionadded:: 0.25.0
134
135        Returns
136        -------
137        bool or ndarray
138            A boolean indicating if a scalar :class:`Interval` is empty, or a
139            boolean ``ndarray`` positionally indicating if an ``Interval`` in
140            an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
141            empty.
142
143        Examples
144        --------
145        An :class:`Interval` that contains points is not empty:
146
147        >>> pd.Interval(0, 1, closed='right').is_empty
148        False
149
150        An ``Interval`` that does not contain any points is empty:
151
152        >>> pd.Interval(0, 0, closed='right').is_empty
153        True
154        >>> pd.Interval(0, 0, closed='left').is_empty
155        True
156        >>> pd.Interval(0, 0, closed='neither').is_empty
157        True
158
159        An ``Interval`` that contains a single point is not empty:
160
161        >>> pd.Interval(0, 0, closed='both').is_empty
162        False
163
164        An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a
165        boolean ``ndarray`` positionally indicating if an ``Interval`` is
166        empty:
167
168        >>> ivs = [pd.Interval(0, 0, closed='neither'),
169        ...        pd.Interval(1, 2, closed='neither')]
170        >>> pd.arrays.IntervalArray(ivs).is_empty
171        array([ True, False])
172
173        Missing values are not considered empty:
174
175        >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan]
176        >>> pd.IntervalIndex(ivs).is_empty
177        array([ True, False])
178        """
179        return (self.right == self.left) & (self.closed != 'both')
180
181    def _check_closed_matches(self, other, name='other'):
182        """
183        Check if the closed attribute of `other` matches.
184
185        Note that 'left' and 'right' are considered different from 'both'.
186
187        Parameters
188        ----------
189        other : Interval, IntervalIndex, IntervalArray
190        name : str
191            Name to use for 'other' in the error message.
192
193        Raises
194        ------
195        ValueError
196            When `other` is not closed exactly the same as self.
197        """
198        if self.closed != other.closed:
199            raise ValueError(f"'{name}.closed' is {repr(other.closed)}, "
200                             f"expected {repr(self.closed)}.")
201
202
203cdef bint _interval_like(other):
204    return (hasattr(other, 'left')
205            and hasattr(other, 'right')
206            and hasattr(other, 'closed'))
207
208
209cdef class Interval(IntervalMixin):
210    """
211    Immutable object implementing an Interval, a bounded slice-like interval.
212
213    Parameters
214    ----------
215    left : orderable scalar
216        Left bound for the interval.
217    right : orderable scalar
218        Right bound for the interval.
219    closed : {'right', 'left', 'both', 'neither'}, default 'right'
220        Whether the interval is closed on the left-side, right-side, both or
221        neither. See the Notes for more detailed explanation.
222
223    See Also
224    --------
225    IntervalIndex : An Index of Interval objects that are all closed on the
226        same side.
227    cut : Convert continuous data into discrete bins (Categorical
228        of Interval objects).
229    qcut : Convert continuous data into bins (Categorical of Interval objects)
230        based on quantiles.
231    Period : Represents a period of time.
232
233    Notes
234    -----
235    The parameters `left` and `right` must be from the same type, you must be
236    able to compare them and they must satisfy ``left <= right``.
237
238    A closed interval (in mathematics denoted by square brackets) contains
239    its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the
240    conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for.
241    An open interval (in mathematics denoted by parentheses) does not contain
242    its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the
243    conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for.
244    Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is
245    described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is
246    described by ``0 < x <= 5`` (``closed='right'``).
247
248    Examples
249    --------
250    It is possible to build Intervals of different types, like numeric ones:
251
252    >>> iv = pd.Interval(left=0, right=5)
253    >>> iv
254    Interval(0, 5, closed='right')
255
256    You can check if an element belongs to it
257
258    >>> 2.5 in iv
259    True
260
261    You can test the bounds (``closed='right'``, so ``0 < x <= 5``):
262
263    >>> 0 in iv
264    False
265    >>> 5 in iv
266    True
267    >>> 0.0001 in iv
268    True
269
270    Calculate its length
271
272    >>> iv.length
273    5
274
275    You can operate with `+` and `*` over an Interval and the operation
276    is applied to each of its bounds, so the result depends on the type
277    of the bound elements
278
279    >>> shifted_iv = iv + 3
280    >>> shifted_iv
281    Interval(3, 8, closed='right')
282    >>> extended_iv = iv * 10.0
283    >>> extended_iv
284    Interval(0.0, 50.0, closed='right')
285
286    To create a time interval you can use Timestamps as the bounds
287
288    >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'),
289    ...                         pd.Timestamp('2018-01-01 00:00:00'),
290    ...                         closed='left')
291    >>> pd.Timestamp('2017-01-01 00:00') in year_2017
292    True
293    >>> year_2017.length
294    Timedelta('365 days 00:00:00')
295    """
296    _typ = "interval"
297    __array_priority__ = 1000
298
299    cdef readonly object left
300    """
301    Left bound for the interval.
302    """
303
304    cdef readonly object right
305    """
306    Right bound for the interval.
307    """
308
309    cdef readonly str closed
310    """
311    Whether the interval is closed on the left-side, right-side, both or
312    neither.
313    """
314
315    def __init__(self, left, right, str closed='right'):
316        # note: it is faster to just do these checks than to use a special
317        # constructor (__cinit__/__new__) to avoid them
318
319        self._validate_endpoint(left)
320        self._validate_endpoint(right)
321
322        if closed not in VALID_CLOSED:
323            raise ValueError(f"invalid option for 'closed': {closed}")
324        if not left <= right:
325            raise ValueError("left side of interval must be <= right side")
326        if (isinstance(left, _Timestamp) and
327                not tz_compare(left.tzinfo, right.tzinfo)):
328            # GH 18538
329            raise ValueError("left and right must have the same time zone, got "
330                             f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}")
331        self.left = left
332        self.right = right
333        self.closed = closed
334
335    def _validate_endpoint(self, endpoint):
336        # GH 23013
337        if not (is_integer_object(endpoint) or is_float_object(endpoint) or
338                isinstance(endpoint, (_Timestamp, _Timedelta))):
339            raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
340                             "are allowed when constructing an Interval.")
341
342    def __hash__(self):
343        return hash((self.left, self.right, self.closed))
344
345    def __contains__(self, key) -> bool:
346        if _interval_like(key):
347            raise TypeError("__contains__ not defined for two intervals")
348        return ((self.left < key if self.open_left else self.left <= key) and
349                (key < self.right if self.open_right else key <= self.right))
350
351    def __richcmp__(self, other, op: int):
352        if isinstance(other, Interval):
353            self_tuple = (self.left, self.right, self.closed)
354            other_tuple = (other.left, other.right, other.closed)
355            return PyObject_RichCompare(self_tuple, other_tuple, op)
356        elif util.is_array(other):
357            return np.array(
358                [PyObject_RichCompare(self, x, op) for x in other],
359                dtype=bool,
360            )
361
362        return NotImplemented
363
364    def __reduce__(self):
365        args = (self.left, self.right, self.closed)
366        return (type(self), args)
367
368    def _repr_base(self):
369        left = self.left
370        right = self.right
371
372        # TODO: need more general formatting methodology here
373        if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
374            left = left._short_repr
375            right = right._short_repr
376
377        return left, right
378
379    def __repr__(self) -> str:
380
381        left, right = self._repr_base()
382        name = type(self).__name__
383        repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})'
384        return repr_str
385
386    def __str__(self) -> str:
387
388        left, right = self._repr_base()
389        start_symbol = '[' if self.closed_left else '('
390        end_symbol = ']' if self.closed_right else ')'
391        return f'{start_symbol}{left}, {right}{end_symbol}'
392
393    def __add__(self, y):
394        if (
395            isinstance(y, numbers.Number)
396            or PyDelta_Check(y)
397            or is_timedelta64_object(y)
398        ):
399            return Interval(self.left + y, self.right + y, closed=self.closed)
400        elif (
401            isinstance(y, Interval)
402            and (
403                isinstance(self, numbers.Number)
404                or PyDelta_Check(self)
405                or is_timedelta64_object(self)
406            )
407        ):
408            return Interval(y.left + self, y.right + self, closed=y.closed)
409        return NotImplemented
410
411    def __sub__(self, y):
412        if (
413            isinstance(y, numbers.Number)
414            or PyDelta_Check(y)
415            or is_timedelta64_object(y)
416        ):
417            return Interval(self.left - y, self.right - y, closed=self.closed)
418        return NotImplemented
419
420    def __mul__(self, y):
421        if isinstance(y, numbers.Number):
422            return Interval(self.left * y, self.right * y, closed=self.closed)
423        elif isinstance(y, Interval) and isinstance(self, numbers.Number):
424            return Interval(y.left * self, y.right * self, closed=y.closed)
425        return NotImplemented
426
427    def __truediv__(self, y):
428        if isinstance(y, numbers.Number):
429            return Interval(self.left / y, self.right / y, closed=self.closed)
430        return NotImplemented
431
432    def __floordiv__(self, y):
433        if isinstance(y, numbers.Number):
434            return Interval(
435                self.left // y, self.right // y, closed=self.closed)
436        return NotImplemented
437
438    def overlaps(self, other):
439        """
440        Check whether two Interval objects overlap.
441
442        Two intervals overlap if they share a common point, including closed
443        endpoints. Intervals that only have an open endpoint in common do not
444        overlap.
445
446        .. versionadded:: 0.24.0
447
448        Parameters
449        ----------
450        other : Interval
451            Interval to check against for an overlap.
452
453        Returns
454        -------
455        bool
456            True if the two intervals overlap.
457
458        See Also
459        --------
460        IntervalArray.overlaps : The corresponding method for IntervalArray.
461        IntervalIndex.overlaps : The corresponding method for IntervalIndex.
462
463        Examples
464        --------
465        >>> i1 = pd.Interval(0, 2)
466        >>> i2 = pd.Interval(1, 3)
467        >>> i1.overlaps(i2)
468        True
469        >>> i3 = pd.Interval(4, 5)
470        >>> i1.overlaps(i3)
471        False
472
473        Intervals that share closed endpoints overlap:
474
475        >>> i4 = pd.Interval(0, 1, closed='both')
476        >>> i5 = pd.Interval(1, 2, closed='both')
477        >>> i4.overlaps(i5)
478        True
479
480        Intervals that only have an open endpoint in common do not overlap:
481
482        >>> i6 = pd.Interval(1, 2, closed='neither')
483        >>> i4.overlaps(i6)
484        False
485        """
486        if not isinstance(other, Interval):
487            raise TypeError("`other` must be an Interval, "
488                            f"got {type(other).__name__}")
489
490        # equality is okay if both endpoints are closed (overlap at a point)
491        op1 = le if (self.closed_left and other.closed_right) else lt
492        op2 = le if (other.closed_left and self.closed_right) else lt
493
494        # overlaps is equivalent negation of two interval being disjoint:
495        # disjoint = (A.left > B.right) or (B.left > A.right)
496        # (simplifying the negation allows this to be done in less operations)
497        return op1(self.left, other.right) and op2(other.left, self.right)
498
499
500@cython.wraparound(False)
501@cython.boundscheck(False)
502def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):
503    """
504    Parameters
505    ----------
506    intervals : ndarray
507        Object array of Intervals / nulls.
508
509    validate_closed: bool, default True
510        Boolean indicating if all intervals must be closed on the same side.
511        Mismatching closed will raise if True, else return None for closed.
512
513    Returns
514    -------
515    tuple of tuples
516        left : (ndarray, object, array)
517        right : (ndarray, object, array)
518        closed: str
519    """
520    cdef:
521        object closed = None, interval
522        Py_ssize_t i, n = len(intervals)
523        ndarray left, right
524        bint seen_closed = False
525
526    left = np.empty(n, dtype=intervals.dtype)
527    right = np.empty(n, dtype=intervals.dtype)
528
529    for i in range(n):
530        interval = intervals[i]
531        if interval is None or util.is_nan(interval):
532            left[i] = np.nan
533            right[i] = np.nan
534            continue
535
536        if not isinstance(interval, Interval):
537            raise TypeError(f"type {type(interval)} with value "
538                            f"{interval} is not an interval")
539
540        left[i] = interval.left
541        right[i] = interval.right
542        if not seen_closed:
543            seen_closed = True
544            closed = interval.closed
545        elif closed != interval.closed:
546            closed = None
547            if validate_closed:
548                raise ValueError("intervals must all be closed on the same side")
549
550    return left, right, closed
551
552
553include "intervaltree.pxi"
554