1from collections import abc
2from decimal import Decimal
3import warnings
4
5import cython
6from cython import Py_ssize_t
7
8from cpython.datetime cimport (
9    PyDate_Check,
10    PyDateTime_Check,
11    PyDateTime_IMPORT,
12    PyDelta_Check,
13    PyTime_Check,
14)
15from cpython.iterator cimport PyIter_Check
16from cpython.number cimport PyNumber_Check
17from cpython.object cimport Py_EQ, PyObject_RichCompareBool
18from cpython.ref cimport Py_INCREF
19from cpython.sequence cimport PySequence_Check
20from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM
21
22PyDateTime_IMPORT
23
24import numpy as np
25
26cimport numpy as cnp
27from numpy cimport (
28    NPY_OBJECT,
29    PyArray_Check,
30    PyArray_GETITEM,
31    PyArray_ITER_DATA,
32    PyArray_ITER_NEXT,
33    PyArray_IterNew,
34    complex128_t,
35    flatiter,
36    float32_t,
37    float64_t,
38    int64_t,
39    intp_t,
40    ndarray,
41    uint8_t,
42    uint64_t,
43)
44
45cnp.import_array()
46
47cdef extern from "numpy/arrayobject.h":
48    # cython's numpy.dtype specification is incorrect, which leads to
49    # errors in issubclass(self.dtype.type, np.bool_), so we directly
50    # include the correct version
51    # https://github.com/cython/cython/issues/2022
52
53    ctypedef class numpy.dtype [object PyArray_Descr]:
54        # Use PyDataType_* macros when possible, however there are no macros
55        # for accessing some of the fields, so some are defined. Please
56        # ask on cython-dev if you need more.
57        cdef:
58            int type_num
59            int itemsize "elsize"
60            char byteorder
61            object fields
62            tuple names
63
64
65cdef extern from "src/parse_helper.h":
66    int floatify(object, float64_t *result, int *maybe_int) except -1
67
68from pandas._libs cimport util
69from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX, is_nan
70
71from pandas._libs.tslib import array_to_datetime
72
73from pandas._libs.missing cimport (
74    C_NA,
75    checknull,
76    is_null_datetime64,
77    is_null_timedelta64,
78    isnaobj,
79)
80from pandas._libs.tslibs.conversion cimport convert_to_tsobject
81from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT, checknull_with_nat
82from pandas._libs.tslibs.offsets cimport is_offset_object
83from pandas._libs.tslibs.period cimport is_period_object
84from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
85from pandas._libs.tslibs.timezones cimport tz_compare
86
87# constants that will be compared to potentially arbitrarily large
88# python int
89cdef:
90    object oINT64_MAX = <int64_t>INT64_MAX
91    object oINT64_MIN = <int64_t>INT64_MIN
92    object oUINT64_MAX = <uint64_t>UINT64_MAX
93
94    float64_t NaN = <float64_t>np.NaN
95
96
97@cython.wraparound(False)
98@cython.boundscheck(False)
99def memory_usage_of_objects(arr: object[:]) -> int64_t:
100    """
101    Return the memory usage of an object array in bytes.
102
103    Does not include the actual bytes of the pointers
104    """
105    i: Py_ssize_t
106    n: Py_ssize_t
107    size: int64_t
108
109    size = 0
110    n = len(arr)
111    for i in range(n):
112        size += arr[i].__sizeof__()
113    return size
114
115
116# ----------------------------------------------------------------------
117
118
119def is_scalar(val: object) -> bool:
120    """
121    Return True if given object is scalar.
122
123    Parameters
124    ----------
125    val : object
126        This includes:
127
128        - numpy array scalar (e.g. np.int64)
129        - Python builtin numerics
130        - Python builtin byte arrays and strings
131        - None
132        - datetime.datetime
133        - datetime.timedelta
134        - Period
135        - decimal.Decimal
136        - Interval
137        - DateOffset
138        - Fraction
139        - Number.
140
141    Returns
142    -------
143    bool
144        Return True if given object is scalar.
145
146    Examples
147    --------
148    >>> dt = datetime.datetime(2018, 10, 3)
149    >>> pd.api.types.is_scalar(dt)
150    True
151
152    >>> pd.api.types.is_scalar([2, 3])
153    False
154
155    >>> pd.api.types.is_scalar({0: 1, 2: 3})
156    False
157
158    >>> pd.api.types.is_scalar((0, 2))
159    False
160
161    pandas supports PEP 3141 numbers:
162
163    >>> from fractions import Fraction
164    >>> pd.api.types.is_scalar(Fraction(3, 5))
165    True
166    """
167
168    # Start with C-optimized checks
169    if (cnp.PyArray_IsAnyScalar(val)
170            # PyArray_IsAnyScalar is always False for bytearrays on Py3
171            or PyDate_Check(val)
172            or PyDelta_Check(val)
173            or PyTime_Check(val)
174            # We differ from numpy, which claims that None is not scalar;
175            # see np.isscalar
176            or val is C_NA
177            or val is None):
178        return True
179
180    # Next use C-optimized checks to exclude common non-scalars before falling
181    #  back to non-optimized checks.
182    if PySequence_Check(val):
183        # e.g. list, tuple
184        # includes np.ndarray, Series which PyNumber_Check can return True for
185        return False
186
187    # Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number
188    return (PyNumber_Check(val)
189            or is_period_object(val)
190            or is_interval(val)
191            or is_offset_object(val))
192
193
194def is_iterator(obj: object) -> bool:
195    """
196    Check if the object is an iterator.
197
198    This is intended for generators, not list-like objects.
199
200    Parameters
201    ----------
202    obj : The object to check
203
204    Returns
205    -------
206    is_iter : bool
207        Whether `obj` is an iterator.
208
209    Examples
210    --------
211    >>> is_iterator((x for x in []))
212    True
213    >>> is_iterator([1, 2, 3])
214    False
215    >>> is_iterator(datetime(2017, 1, 1))
216    False
217    >>> is_iterator("foo")
218    False
219    >>> is_iterator(1)
220    False
221    """
222    return PyIter_Check(obj)
223
224
225def item_from_zerodim(val: object) -> object:
226    """
227    If the value is a zerodim array, return the item it contains.
228
229    Parameters
230    ----------
231    val : object
232
233    Returns
234    -------
235    object
236
237    Examples
238    --------
239    >>> item_from_zerodim(1)
240    1
241    >>> item_from_zerodim('foobar')
242    'foobar'
243    >>> item_from_zerodim(np.array(1))
244    1
245    >>> item_from_zerodim(np.array([1]))
246    array([1])
247    """
248    if cnp.PyArray_IsZeroDim(val):
249        return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val)
250    return val
251
252
253@cython.wraparound(False)
254@cython.boundscheck(False)
255def fast_unique_multiple(list arrays, sort: bool = True):
256    """
257    Generate a list of unique values from a list of arrays.
258
259    Parameters
260    ----------
261    list : array-like
262        List of array-like objects.
263    sort : bool
264        Whether or not to sort the resulting unique list.
265
266    Returns
267    -------
268    list of unique values
269    """
270    cdef:
271        ndarray[object] buf
272        Py_ssize_t k = len(arrays)
273        Py_ssize_t i, j, n
274        list uniques = []
275        dict table = {}
276        object val, stub = 0
277
278    for i in range(k):
279        buf = arrays[i]
280        n = len(buf)
281        for j in range(n):
282            val = buf[j]
283            if val not in table:
284                table[val] = stub
285                uniques.append(val)
286
287    if sort is None:
288        try:
289            uniques.sort()
290        except TypeError:
291            warnings.warn(
292                "The values in the array are unorderable. "
293                "Pass `sort=False` to suppress this warning.",
294                RuntimeWarning,
295            )
296            pass
297
298    return uniques
299
300
301@cython.wraparound(False)
302@cython.boundscheck(False)
303def fast_unique_multiple_list(lists: list, sort: bool = True) -> list:
304    cdef:
305        list buf
306        Py_ssize_t k = len(lists)
307        Py_ssize_t i, j, n
308        list uniques = []
309        dict table = {}
310        object val, stub = 0
311
312    for i in range(k):
313        buf = lists[i]
314        n = len(buf)
315        for j in range(n):
316            val = buf[j]
317            if val not in table:
318                table[val] = stub
319                uniques.append(val)
320    if sort:
321        try:
322            uniques.sort()
323        except TypeError:
324            pass
325
326    return uniques
327
328
329@cython.wraparound(False)
330@cython.boundscheck(False)
331def fast_unique_multiple_list_gen(object gen, bint sort=True):
332    """
333    Generate a list of unique values from a generator of lists.
334
335    Parameters
336    ----------
337    gen : generator object
338        Generator of lists from which the unique list is created.
339    sort : bool
340        Whether or not to sort the resulting unique list.
341
342    Returns
343    -------
344    list of unique values
345    """
346    cdef:
347        list buf
348        Py_ssize_t j, n
349        list uniques = []
350        dict table = {}
351        object val, stub = 0
352
353    for buf in gen:
354        n = len(buf)
355        for j in range(n):
356            val = buf[j]
357            if val not in table:
358                table[val] = stub
359                uniques.append(val)
360    if sort:
361        try:
362            uniques.sort()
363        except TypeError:
364            pass
365
366    return uniques
367
368
369@cython.wraparound(False)
370@cython.boundscheck(False)
371def dicts_to_array(dicts: list, columns: list):
372    cdef:
373        Py_ssize_t i, j, k, n
374        ndarray[object, ndim=2] result
375        dict row
376        object col, onan = np.nan
377
378    k = len(columns)
379    n = len(dicts)
380
381    result = np.empty((n, k), dtype='O')
382
383    for i in range(n):
384        row = dicts[i]
385        for j in range(k):
386            col = columns[j]
387            if col in row:
388                result[i, j] = row[col]
389            else:
390                result[i, j] = onan
391
392    return result
393
394
395def fast_zip(list ndarrays):
396    """
397    For zipping multiple ndarrays into an ndarray of tuples.
398    """
399    cdef:
400        Py_ssize_t i, j, k, n
401        ndarray[object] result
402        flatiter it
403        object val, tup
404
405    k = len(ndarrays)
406    n = len(ndarrays[0])
407
408    result = np.empty(n, dtype=object)
409
410    # initialize tuples on first pass
411    arr = ndarrays[0]
412    it = <flatiter>PyArray_IterNew(arr)
413    for i in range(n):
414        val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
415        tup = PyTuple_New(k)
416
417        PyTuple_SET_ITEM(tup, 0, val)
418        Py_INCREF(val)
419        result[i] = tup
420        PyArray_ITER_NEXT(it)
421
422    for j in range(1, k):
423        arr = ndarrays[j]
424        it = <flatiter>PyArray_IterNew(arr)
425        if len(arr) != n:
426            raise ValueError("all arrays must be same length")
427
428        for i in range(n):
429            val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
430            PyTuple_SET_ITEM(result[i], j, val)
431            Py_INCREF(val)
432            PyArray_ITER_NEXT(it)
433
434    return result
435
436
437def get_reverse_indexer(const int64_t[:] indexer, Py_ssize_t length):
438    """
439    Reverse indexing operation.
440
441    Given `indexer`, make `indexer_inv` of it, such that::
442
443        indexer_inv[indexer[x]] = x
444
445    .. note:: If indexer is not unique, only first occurrence is accounted.
446    """
447    cdef:
448        Py_ssize_t i, n = len(indexer)
449        ndarray[int64_t] rev_indexer
450        int64_t idx
451
452    rev_indexer = np.empty(length, dtype=np.int64)
453    rev_indexer[:] = -1
454    for i in range(n):
455        idx = indexer[i]
456        if idx != -1:
457            rev_indexer[idx] = i
458
459    return rev_indexer
460
461
462@cython.wraparound(False)
463@cython.boundscheck(False)
464def has_infs_f4(const float32_t[:] arr) -> bool:
465    cdef:
466        Py_ssize_t i, n = len(arr)
467        float32_t inf, neginf, val
468
469    inf = np.inf
470    neginf = -inf
471
472    for i in range(n):
473        val = arr[i]
474        if val == inf or val == neginf:
475            return True
476    return False
477
478
479@cython.wraparound(False)
480@cython.boundscheck(False)
481def has_infs_f8(const float64_t[:] arr) -> bool:
482    cdef:
483        Py_ssize_t i, n = len(arr)
484        float64_t inf, neginf, val
485
486    inf = np.inf
487    neginf = -inf
488
489    for i in range(n):
490        val = arr[i]
491        if val == inf or val == neginf:
492            return True
493    return False
494
495
496def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len):
497    cdef:
498        Py_ssize_t i, n = len(indices)
499        int k, vstart, vlast, v
500
501    if n == 0:
502        return slice(0, 0)
503
504    vstart = indices[0]
505    if vstart < 0 or max_len <= vstart:
506        return indices
507
508    if n == 1:
509        return slice(vstart, vstart + 1)
510
511    vlast = indices[n - 1]
512    if vlast < 0 or max_len <= vlast:
513        return indices
514
515    k = indices[1] - indices[0]
516    if k == 0:
517        return indices
518    else:
519        for i in range(2, n):
520            v = indices[i]
521            if v - indices[i - 1] != k:
522                return indices
523
524        if k > 0:
525            return slice(vstart, vlast + 1, k)
526        else:
527            if vlast == 0:
528                return slice(vstart, None, k)
529            else:
530                return slice(vstart, vlast - 1, k)
531
532
533@cython.wraparound(False)
534@cython.boundscheck(False)
535def maybe_booleans_to_slice(ndarray[uint8_t] mask):
536    cdef:
537        Py_ssize_t i, n = len(mask)
538        Py_ssize_t start = 0, end = 0
539        bint started = False, finished = False
540
541    for i in range(n):
542        if mask[i]:
543            if finished:
544                return mask.view(np.bool_)
545            if not started:
546                started = True
547                start = i
548        else:
549            if finished:
550                continue
551
552            if started:
553                end = i
554                finished = True
555
556    if not started:
557        return slice(0, 0)
558    if not finished:
559        return slice(start, None)
560    else:
561        return slice(start, end)
562
563
564@cython.wraparound(False)
565@cython.boundscheck(False)
566def array_equivalent_object(left: object[:], right: object[:]) -> bool:
567    """
568    Perform an element by element comparison on 1-d object arrays
569    taking into account nan positions.
570    """
571    cdef:
572        Py_ssize_t i, n = left.shape[0]
573        object x, y
574
575    for i in range(n):
576        x = left[i]
577        y = right[i]
578
579        # we are either not equal or both nan
580        # I think None == None will be true here
581        try:
582            if PyArray_Check(x) and PyArray_Check(y):
583                if not array_equivalent_object(x, y):
584                    return False
585            elif (x is C_NA) ^ (y is C_NA):
586                return False
587            elif not (PyObject_RichCompareBool(x, y, Py_EQ) or
588                      (x is None or is_nan(x)) and (y is None or is_nan(y))):
589                return False
590        except ValueError:
591            # Avoid raising ValueError when comparing Numpy arrays to other types
592            if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y):
593                # Only compare scalars to scalars and non-scalars to non-scalars
594                return False
595            elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y))
596                  and not (isinstance(x, type(y)) or isinstance(y, type(x)))):
597                # Check if non-scalars have the same type
598                return False
599            raise
600    return True
601
602
603@cython.wraparound(False)
604@cython.boundscheck(False)
605def astype_intsafe(ndarray[object] arr, new_dtype):
606    cdef:
607        Py_ssize_t i, n = len(arr)
608        object val
609        bint is_datelike
610        ndarray result
611
612    is_datelike = new_dtype == 'm8[ns]'
613    result = np.empty(n, dtype=new_dtype)
614    for i in range(n):
615        val = arr[i]
616        if is_datelike and checknull(val):
617            result[i] = NPY_NAT
618        else:
619            result[i] = val
620
621    return result
622
623
624@cython.wraparound(False)
625@cython.boundscheck(False)
626cpdef ndarray[object] ensure_string_array(
627        arr,
628        object na_value=np.nan,
629        bint convert_na_value=True,
630        bint copy=True,
631        bint skipna=True,
632):
633    """Returns a new numpy array with object dtype and only strings and na values.
634
635    Parameters
636    ----------
637    arr : array-like
638        The values to be converted to str, if needed.
639    na_value : Any, default np.nan
640        The value to use for na. For example, np.nan or pd.NA.
641    convert_na_value : bool, default True
642        If False, existing na values will be used unchanged in the new array.
643    copy : bool, default True
644        Whether to ensure that a new array is returned.
645    skipna : bool, default True
646        Whether or not to coerce nulls to their stringified form
647        (e.g. if False, NaN becomes 'nan').
648
649    Returns
650    -------
651    ndarray
652        An array with the input array's elements casted to str or nan-like.
653    """
654    cdef:
655        Py_ssize_t i = 0, n = len(arr)
656
657    if hasattr(arr, "to_numpy"):
658        arr = arr.to_numpy()
659    elif not isinstance(arr, np.ndarray):
660        arr = np.array(arr, dtype="object")
661
662    result = np.asarray(arr, dtype="object")
663
664    if copy and result is arr:
665        result = result.copy()
666
667    for i in range(n):
668        val = arr[i]
669
670        if isinstance(val, str):
671            continue
672
673        if not checknull(val):
674            result[i] = str(val)
675        else:
676            if convert_na_value:
677                val = na_value
678            if skipna:
679                result[i] = val
680            else:
681                result[i] = str(val)
682
683    return result
684
685
686@cython.wraparound(False)
687@cython.boundscheck(False)
688def clean_index_list(obj: list):
689    """
690    Utility used in ``pandas.core.indexes.api.ensure_index``.
691    """
692    cdef:
693        Py_ssize_t i, n = len(obj)
694        object val
695        bint all_arrays = True
696
697    for i in range(n):
698        val = obj[i]
699        if not (isinstance(val, list) or
700                util.is_array(val) or hasattr(val, '_data')):
701            all_arrays = False
702            break
703
704    if all_arrays:
705        return obj, all_arrays
706
707    # don't force numpy coerce with nan's
708    inferred = infer_dtype(obj, skipna=False)
709    if inferred in ['string', 'bytes', 'mixed', 'mixed-integer']:
710        return np.asarray(obj, dtype=object), 0
711    elif inferred in ['integer']:
712        # TODO: we infer an integer but it *could* be a uint64
713        try:
714            return np.asarray(obj, dtype='int64'), 0
715        except OverflowError:
716            return np.asarray(obj, dtype='object'), 0
717
718    return np.asarray(obj), 0
719
720
721# ------------------------------------------------------------------------------
722# Groupby-related functions
723
724# TODO: could do even better if we know something about the data. eg, index has
725# 1-min data, binner has 5-min data, then bins are just strides in index. This
726# is a general, O(max(len(values), len(binner))) method.
727@cython.boundscheck(False)
728@cython.wraparound(False)
729def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner,
730                       object closed='left', bint hasnans=False):
731    """
732    Int64 (datetime64) version of generic python version in ``groupby.py``.
733    """
734    cdef:
735        Py_ssize_t lenidx, lenbin, i, j, bc, vc
736        ndarray[int64_t] bins
737        int64_t l_bin, r_bin, nat_count
738        bint right_closed = closed == 'right'
739
740    nat_count = 0
741    if hasnans:
742        mask = values == NPY_NAT
743        nat_count = np.sum(mask)
744        values = values[~mask]
745
746    lenidx = len(values)
747    lenbin = len(binner)
748
749    if lenidx <= 0 or lenbin <= 0:
750        raise ValueError("Invalid length for values or for binner")
751
752    # check binner fits data
753    if values[0] < binner[0]:
754        raise ValueError("Values falls before first bin")
755
756    if values[lenidx - 1] > binner[lenbin - 1]:
757        raise ValueError("Values falls after last bin")
758
759    bins = np.empty(lenbin - 1, dtype=np.int64)
760
761    j = 0  # index into values
762    bc = 0  # bin count
763
764    # linear scan
765    if right_closed:
766        for i in range(0, lenbin - 1):
767            r_bin = binner[i + 1]
768            # count values in current bin, advance to next bin
769            while j < lenidx and values[j] <= r_bin:
770                j += 1
771            bins[bc] = j
772            bc += 1
773    else:
774        for i in range(0, lenbin - 1):
775            r_bin = binner[i + 1]
776            # count values in current bin, advance to next bin
777            while j < lenidx and values[j] < r_bin:
778                j += 1
779            bins[bc] = j
780            bc += 1
781
782    if nat_count > 0:
783        # shift bins by the number of NaT
784        bins = bins + nat_count
785        bins = np.insert(bins, 0, nat_count)
786
787    return bins
788
789
790@cython.boundscheck(False)
791@cython.wraparound(False)
792def get_level_sorter(const int64_t[:] label, const int64_t[:] starts):
793    """
794    Argsort for a single level of a multi-index, keeping the order of higher
795    levels unchanged. `starts` points to starts of same-key indices w.r.t
796    to leading levels; equivalent to:
797        np.hstack([label[starts[i]:starts[i+1]].argsort(kind='mergesort')
798            + starts[i] for i in range(len(starts) - 1)])
799    """
800    cdef:
801        int64_t l, r
802        Py_ssize_t i
803        ndarray[int64_t, ndim=1] out = np.empty(len(label), dtype=np.int64)
804        ndarray[int64_t, ndim=1] label_arr = np.asarray(label)
805
806    for i in range(len(starts) - 1):
807        l, r = starts[i], starts[i + 1]
808        out[l:r] = l + label_arr[l:r].argsort(kind='mergesort')
809
810    return out
811
812
813@cython.boundscheck(False)
814@cython.wraparound(False)
815def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
816                   const int64_t[:] labels,
817                   Py_ssize_t max_bin,
818                   int axis):
819    cdef:
820        Py_ssize_t i, j, k, n
821        ndarray[int64_t, ndim=2] counts
822
823    assert (axis == 0 or axis == 1)
824    n, k = (<object>mask).shape
825
826    if axis == 0:
827        counts = np.zeros((max_bin, k), dtype='i8')
828        with nogil:
829            for i in range(n):
830                for j in range(k):
831                    if mask[i, j]:
832                        counts[labels[i], j] += 1
833
834    else:  # axis == 1
835        counts = np.zeros((n, max_bin), dtype='i8')
836        with nogil:
837            for i in range(n):
838                for j in range(k):
839                    if mask[i, j]:
840                        counts[i, labels[j]] += 1
841
842    return counts
843
844
845def generate_slices(const int64_t[:] labels, Py_ssize_t ngroups):
846    cdef:
847        Py_ssize_t i, group_size, n, start
848        int64_t lab
849        object slobj
850        ndarray[int64_t] starts, ends
851
852    n = len(labels)
853
854    starts = np.zeros(ngroups, dtype=np.int64)
855    ends = np.zeros(ngroups, dtype=np.int64)
856
857    start = 0
858    group_size = 0
859    for i in range(n):
860        lab = labels[i]
861        if lab < 0:
862            start += 1
863        else:
864            group_size += 1
865            if i == n - 1 or lab != labels[i + 1]:
866                starts[lab] = start
867                ends[lab] = start + group_size
868                start += group_size
869                group_size = 0
870
871    return starts, ends
872
873
874def indices_fast(ndarray index, const int64_t[:] labels, list keys,
875                 list sorted_labels):
876    """
877    Parameters
878    ----------
879    index : ndarray
880    labels : ndarray[int64]
881    keys : list
882    sorted_labels : list[ndarray[int64]]
883    """
884    cdef:
885        Py_ssize_t i, j, k, lab, cur, start, n = len(labels)
886        dict result = {}
887        object tup
888
889    k = len(keys)
890
891    if n == 0:
892        return result
893
894    start = 0
895    cur = labels[0]
896    for i in range(1, n):
897        lab = labels[i]
898
899        if lab != cur:
900            if lab != -1:
901                if k == 1:
902                    # When k = 1 we do not want to return a tuple as key
903                    tup = keys[0][sorted_labels[0][i - 1]]
904                else:
905                    tup = PyTuple_New(k)
906                    for j in range(k):
907                        val = keys[j][sorted_labels[j][i - 1]]
908                        PyTuple_SET_ITEM(tup, j, val)
909                        Py_INCREF(val)
910                result[tup] = index[start:i]
911            start = i
912        cur = lab
913
914    if k == 1:
915        # When k = 1 we do not want to return a tuple as key
916        tup = keys[0][sorted_labels[0][n - 1]]
917    else:
918        tup = PyTuple_New(k)
919        for j in range(k):
920            val = keys[j][sorted_labels[j][n - 1]]
921            PyTuple_SET_ITEM(tup, j, val)
922            Py_INCREF(val)
923    result[tup] = index[start:]
924
925    return result
926
927
928# core.common import for fast inference checks
929
930def is_float(obj: object) -> bool:
931    """
932    Return True if given object is float.
933
934    Returns
935    -------
936    bool
937    """
938    return util.is_float_object(obj)
939
940
941def is_integer(obj: object) -> bool:
942    """
943    Return True if given object is integer.
944
945    Returns
946    -------
947    bool
948    """
949    return util.is_integer_object(obj)
950
951
952def is_bool(obj: object) -> bool:
953    """
954    Return True if given object is boolean.
955
956    Returns
957    -------
958    bool
959    """
960    return util.is_bool_object(obj)
961
962
963def is_complex(obj: object) -> bool:
964    """
965    Return True if given object is complex.
966
967    Returns
968    -------
969    bool
970    """
971    return util.is_complex_object(obj)
972
973
974cpdef bint is_decimal(object obj):
975    return isinstance(obj, Decimal)
976
977
978cpdef bint is_interval(object obj):
979    return getattr(obj, '_typ', '_typ') == 'interval'
980
981
982def is_period(val: object) -> bool:
983    """
984    Return True if given object is Period.
985
986    Returns
987    -------
988    bool
989    """
990    return is_period_object(val)
991
992
993def is_list_like(obj: object, allow_sets: bool = True) -> bool:
994    """
995    Check if the object is list-like.
996
997    Objects that are considered list-like are for example Python
998    lists, tuples, sets, NumPy arrays, and Pandas Series.
999
1000    Strings and datetime objects, however, are not considered list-like.
1001
1002    Parameters
1003    ----------
1004    obj : object
1005        Object to check.
1006    allow_sets : bool, default True
1007        If this parameter is False, sets will not be considered list-like.
1008
1009        .. versionadded:: 0.24.0
1010
1011    Returns
1012    -------
1013    bool
1014        Whether `obj` has list-like properties.
1015
1016    Examples
1017    --------
1018    >>> is_list_like([1, 2, 3])
1019    True
1020    >>> is_list_like({1, 2, 3})
1021    True
1022    >>> is_list_like(datetime(2017, 1, 1))
1023    False
1024    >>> is_list_like("foo")
1025    False
1026    >>> is_list_like(1)
1027    False
1028    >>> is_list_like(np.array([2]))
1029    True
1030    >>> is_list_like(np.array(2))
1031    False
1032    """
1033    return c_is_list_like(obj, allow_sets)
1034
1035
1036cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
1037    return (
1038        isinstance(obj, abc.Iterable)
1039        # we do not count strings/unicode/bytes as list-like
1040        and not isinstance(obj, (str, bytes))
1041        # exclude zero-dimensional numpy arrays, effectively scalars
1042        and not (util.is_array(obj) and obj.ndim == 0)
1043        # exclude sets if allow_sets is False
1044        and not (allow_sets is False and isinstance(obj, abc.Set))
1045    )
1046
1047
1048_TYPE_MAP = {
1049    "categorical": "categorical",
1050    "category": "categorical",
1051    "int8": "integer",
1052    "int16": "integer",
1053    "int32": "integer",
1054    "int64": "integer",
1055    "i": "integer",
1056    "uint8": "integer",
1057    "uint16": "integer",
1058    "uint32": "integer",
1059    "uint64": "integer",
1060    "u": "integer",
1061    "float32": "floating",
1062    "float64": "floating",
1063    "f": "floating",
1064    "complex64": "complex",
1065    "complex128": "complex",
1066    "c": "complex",
1067    "string": "string",
1068    "S": "bytes",
1069    "U": "string",
1070    "bool": "boolean",
1071    "b": "boolean",
1072    "datetime64[ns]": "datetime64",
1073    "M": "datetime64",
1074    "timedelta64[ns]": "timedelta64",
1075    "m": "timedelta64",
1076    "interval": "interval",
1077}
1078
1079# types only exist on certain platform
1080try:
1081    np.float128
1082    _TYPE_MAP['float128'] = 'floating'
1083except AttributeError:
1084    pass
1085try:
1086    np.complex256
1087    _TYPE_MAP['complex256'] = 'complex'
1088except AttributeError:
1089    pass
1090try:
1091    np.float16
1092    _TYPE_MAP['float16'] = 'floating'
1093except AttributeError:
1094    pass
1095
1096
1097cdef class Seen:
1098    """
1099    Class for keeping track of the types of elements
1100    encountered when trying to perform type conversions.
1101    """
1102
1103    cdef:
1104        bint int_             # seen_int
1105        bint nat_             # seen nat
1106        bint bool_            # seen_bool
1107        bint null_            # seen_null
1108        bint nan_             # seen_np.nan
1109        bint uint_            # seen_uint (unsigned integer)
1110        bint sint_            # seen_sint (signed integer)
1111        bint float_           # seen_float
1112        bint object_          # seen_object
1113        bint complex_         # seen_complex
1114        bint datetime_        # seen_datetime
1115        bint coerce_numeric   # coerce data to numeric
1116        bint timedelta_       # seen_timedelta
1117        bint datetimetz_      # seen_datetimetz
1118
1119    def __cinit__(self, bint coerce_numeric=False):
1120        """
1121        Initialize a Seen instance.
1122
1123        Parameters
1124        ----------
1125        coerce_numeric : bool, default False
1126            Whether or not to force conversion to a numeric data type if
1127            initial methods to convert to numeric fail.
1128        """
1129        self.int_ = False
1130        self.nat_ = False
1131        self.bool_ = False
1132        self.null_ = False
1133        self.nan_ = False
1134        self.uint_ = False
1135        self.sint_ = False
1136        self.float_ = False
1137        self.object_ = False
1138        self.complex_ = False
1139        self.datetime_ = False
1140        self.timedelta_ = False
1141        self.datetimetz_ = False
1142        self.coerce_numeric = coerce_numeric
1143
1144    cdef inline bint check_uint64_conflict(self) except -1:
1145        """
1146        Check whether we can safely convert a uint64 array to a numeric dtype.
1147
1148        There are two cases when conversion to numeric dtype with a uint64
1149        array is not safe (and will therefore not be performed)
1150
1151        1) A NaN element is encountered.
1152
1153           uint64 cannot be safely cast to float64 due to truncation issues
1154           at the extreme ends of the range.
1155
1156        2) A negative number is encountered.
1157
1158           There is no numerical dtype that can hold both negative numbers
1159           and numbers greater than INT64_MAX. Hence, at least one number
1160           will be improperly cast if we convert to a numeric dtype.
1161
1162        Returns
1163        -------
1164        bool
1165            Whether or not we should return the original input array to avoid
1166            data truncation.
1167
1168        Raises
1169        ------
1170        ValueError
1171            uint64 elements were detected, and at least one of the
1172            two conflict cases was also detected. However, we are
1173            trying to force conversion to a numeric dtype.
1174        """
1175        return (self.uint_ and (self.null_ or self.sint_)
1176                and not self.coerce_numeric)
1177
1178    cdef inline saw_null(self):
1179        """
1180        Set flags indicating that a null value was encountered.
1181        """
1182        self.null_ = True
1183        self.float_ = True
1184
1185    cdef saw_int(self, object val):
1186        """
1187        Set flags indicating that an integer value was encountered.
1188
1189        In addition to setting a flag that an integer was seen, we
1190        also set two flags depending on the type of integer seen:
1191
1192        1) sint_ : a negative (signed) number in the
1193                   range of [-2**63, 0) was encountered
1194        2) uint_ : a positive number in the range of
1195                   [2**63, 2**64) was encountered
1196
1197        Parameters
1198        ----------
1199        val : Python int
1200            Value with which to set the flags.
1201        """
1202        self.int_ = True
1203        self.sint_ = self.sint_ or (oINT64_MIN <= val < 0)
1204        self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
1205
1206    @property
1207    def numeric_(self):
1208        return self.complex_ or self.float_ or self.int_
1209
1210    @property
1211    def is_bool(self):
1212        return not (self.datetime_ or self.numeric_ or self.timedelta_
1213                    or self.nat_)
1214
1215    @property
1216    def is_float_or_complex(self):
1217        return not (self.bool_ or self.datetime_ or self.timedelta_
1218                    or self.nat_)
1219
1220
1221cdef object _try_infer_map(object dtype):
1222    """
1223    If its in our map, just return the dtype.
1224    """
1225    cdef:
1226        object val
1227        str attr
1228    for attr in ["name", "kind", "base"]:
1229        val = getattr(dtype, attr)
1230        if val in _TYPE_MAP:
1231            return _TYPE_MAP[val]
1232    return None
1233
1234
1235def infer_dtype(value: object, skipna: bool = True) -> str:
1236    """
1237    Efficiently infer the type of a passed val, or list-like
1238    array of values. Return a string describing the type.
1239
1240    Parameters
1241    ----------
1242    value : scalar, list, ndarray, or pandas type
1243    skipna : bool, default True
1244        Ignore NaN values when inferring the type.
1245
1246    Returns
1247    -------
1248    str
1249        Describing the common type of the input data.
1250    Results can include:
1251
1252    - string
1253    - bytes
1254    - floating
1255    - integer
1256    - mixed-integer
1257    - mixed-integer-float
1258    - decimal
1259    - complex
1260    - categorical
1261    - boolean
1262    - datetime64
1263    - datetime
1264    - date
1265    - timedelta64
1266    - timedelta
1267    - time
1268    - period
1269    - mixed
1270
1271    Raises
1272    ------
1273    TypeError
1274        If ndarray-like but cannot infer the dtype
1275
1276    Notes
1277    -----
1278    - 'mixed' is the catchall for anything that is not otherwise
1279      specialized
1280    - 'mixed-integer-float' are floats and integers
1281    - 'mixed-integer' are integers mixed with non-integers
1282
1283    Examples
1284    --------
1285    >>> infer_dtype(['foo', 'bar'])
1286    'string'
1287
1288    >>> infer_dtype(['a', np.nan, 'b'], skipna=True)
1289    'string'
1290
1291    >>> infer_dtype(['a', np.nan, 'b'], skipna=False)
1292    'mixed'
1293
1294    >>> infer_dtype([b'foo', b'bar'])
1295    'bytes'
1296
1297    >>> infer_dtype([1, 2, 3])
1298    'integer'
1299
1300    >>> infer_dtype([1, 2, 3.5])
1301    'mixed-integer-float'
1302
1303    >>> infer_dtype([1.0, 2.0, 3.5])
1304    'floating'
1305
1306    >>> infer_dtype(['a', 1])
1307    'mixed-integer'
1308
1309    >>> infer_dtype([Decimal(1), Decimal(2.0)])
1310    'decimal'
1311
1312    >>> infer_dtype([True, False])
1313    'boolean'
1314
1315    >>> infer_dtype([True, False, np.nan])
1316    'mixed'
1317
1318    >>> infer_dtype([pd.Timestamp('20130101')])
1319    'datetime'
1320
1321    >>> infer_dtype([datetime.date(2013, 1, 1)])
1322    'date'
1323
1324    >>> infer_dtype([np.datetime64('2013-01-01')])
1325    'datetime64'
1326
1327    >>> infer_dtype([datetime.timedelta(0, 1, 1)])
1328    'timedelta'
1329
1330    >>> infer_dtype(pd.Series(list('aabc')).astype('category'))
1331    'categorical'
1332    """
1333    cdef:
1334        Py_ssize_t i, n
1335        object val
1336        ndarray values
1337        bint seen_pdnat = False
1338        bint seen_val = False
1339
1340    if util.is_array(value):
1341        values = value
1342    elif hasattr(value, "inferred_type") and skipna is False:
1343        # Index, use the cached attribute if possible, populate the cache otherwise
1344        return value.inferred_type
1345    elif hasattr(value, "dtype"):
1346        # this will handle ndarray-like
1347        # e.g. categoricals
1348        dtype = value.dtype
1349        if not isinstance(dtype, np.dtype):
1350            value = _try_infer_map(value.dtype)
1351            if value is not None:
1352                return value
1353
1354            # its ndarray-like but we can't handle
1355            raise ValueError(f"cannot infer type for {type(value)}")
1356
1357        # Unwrap Series/Index
1358        values = np.asarray(value)
1359
1360    else:
1361        if not isinstance(value, list):
1362            value = list(value)
1363
1364        from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
1365        values = construct_1d_object_array_from_listlike(value)
1366
1367    # make contiguous
1368    # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
1369    values = values.ravel(order="K")
1370
1371    val = _try_infer_map(values.dtype)
1372    if val is not None:
1373        return val
1374
1375    if values.dtype != np.object_:
1376        values = values.astype("O")
1377
1378    if skipna:
1379        values = values[~isnaobj(values)]
1380
1381    n = len(values)
1382    if n == 0:
1383        return "empty"
1384
1385    # try to use a valid value
1386    for i in range(n):
1387        val = values[i]
1388
1389        # do not use is_nul_datetimelike to keep
1390        # np.datetime64('nat') and np.timedelta64('nat')
1391        if val is None or util.is_nan(val):
1392            pass
1393        elif val is NaT:
1394            seen_pdnat = True
1395        else:
1396            seen_val = True
1397            break
1398
1399    # if all values are nan/NaT
1400    if seen_val is False and seen_pdnat is True:
1401        return "datetime"
1402        # float/object nan is handled in latter logic
1403
1404    if util.is_datetime64_object(val):
1405        if is_datetime64_array(values):
1406            return "datetime64"
1407
1408    elif is_timedelta(val):
1409        if is_timedelta_or_timedelta64_array(values):
1410            return "timedelta"
1411
1412    elif util.is_integer_object(val):
1413        # ordering matters here; this check must come after the is_timedelta
1414        #  check otherwise numpy timedelta64 objects would come through here
1415
1416        if is_integer_array(values):
1417            return "integer"
1418        elif is_integer_float_array(values):
1419            if is_integer_na_array(values):
1420                return "integer-na"
1421            else:
1422                return "mixed-integer-float"
1423        return "mixed-integer"
1424
1425    elif PyDateTime_Check(val):
1426        if is_datetime_array(values, skipna=skipna):
1427            return "datetime"
1428        elif is_date_array(values, skipna=skipna):
1429            return "date"
1430
1431    elif PyDate_Check(val):
1432        if is_date_array(values, skipna=skipna):
1433            return "date"
1434
1435    elif PyTime_Check(val):
1436        if is_time_array(values, skipna=skipna):
1437            return "time"
1438
1439    elif is_decimal(val):
1440        if is_decimal_array(values):
1441            return "decimal"
1442
1443    elif is_complex(val):
1444        if is_complex_array(values):
1445            return "complex"
1446
1447    elif util.is_float_object(val):
1448        if is_float_array(values):
1449            return "floating"
1450        elif is_integer_float_array(values):
1451            if is_integer_na_array(values):
1452                return "integer-na"
1453            else:
1454                return "mixed-integer-float"
1455
1456    elif util.is_bool_object(val):
1457        if is_bool_array(values, skipna=skipna):
1458            return "boolean"
1459
1460    elif isinstance(val, str):
1461        if is_string_array(values, skipna=skipna):
1462            return "string"
1463
1464    elif isinstance(val, bytes):
1465        if is_bytes_array(values, skipna=skipna):
1466            return "bytes"
1467
1468    elif is_period_object(val):
1469        if is_period_array(values):
1470            return "period"
1471
1472    elif is_interval(val):
1473        if is_interval_array(values):
1474            return "interval"
1475
1476    for i in range(n):
1477        val = values[i]
1478        if (util.is_integer_object(val) and
1479                not util.is_timedelta64_object(val) and
1480                not util.is_datetime64_object(val)):
1481            return "mixed-integer"
1482
1483    return "mixed"
1484
1485
1486def infer_datetimelike_array(arr: ndarray[object]) -> str:
1487    """
1488    Infer if we have a datetime or timedelta array.
1489    - date: we have *only* date and maybe strings, nulls
1490    - datetime: we have *only* datetimes and maybe strings, nulls
1491    - timedelta: we have *only* timedeltas and maybe strings, nulls
1492    - nat: we do not have *any* date, datetimes or timedeltas, but do have
1493      at least a NaT
1494    - mixed: other objects (strings, a mix of tz-aware and tz-naive, or
1495                            actual objects)
1496
1497    Parameters
1498    ----------
1499    arr : ndarray[object]
1500
1501    Returns
1502    -------
1503    str: {datetime, timedelta, date, nat, mixed}
1504    """
1505    cdef:
1506        Py_ssize_t i, n = len(arr)
1507        bint seen_timedelta = False, seen_date = False, seen_datetime = False
1508        bint seen_tz_aware = False, seen_tz_naive = False
1509        bint seen_nat = False
1510        list objs = []
1511        object v
1512
1513    for i in range(n):
1514        v = arr[i]
1515        if isinstance(v, str):
1516            objs.append(v)
1517
1518            if len(objs) == 3:
1519                break
1520
1521        elif v is None or util.is_nan(v):
1522            # nan or None
1523            pass
1524        elif v is NaT:
1525            seen_nat = True
1526        elif PyDateTime_Check(v):
1527            # datetime
1528            seen_datetime = True
1529
1530            # disambiguate between tz-naive and tz-aware
1531            if v.tzinfo is None:
1532                seen_tz_naive = True
1533            else:
1534                seen_tz_aware = True
1535
1536            if seen_tz_naive and seen_tz_aware:
1537                return 'mixed'
1538        elif util.is_datetime64_object(v):
1539            # np.datetime64
1540            seen_datetime = True
1541        elif PyDate_Check(v):
1542            seen_date = True
1543        elif is_timedelta(v):
1544            # timedelta, or timedelta64
1545            seen_timedelta = True
1546        else:
1547            return "mixed"
1548
1549    if seen_date and not (seen_datetime or seen_timedelta):
1550        return "date"
1551    elif seen_datetime and not seen_timedelta:
1552        return "datetime"
1553    elif seen_timedelta and not seen_datetime:
1554        return "timedelta"
1555    elif seen_nat:
1556        return "nat"
1557
1558    # short-circuit by trying to
1559    # actually convert these strings
1560    # this is for performance as we don't need to try
1561    # convert *every* string array
1562    if len(objs):
1563        try:
1564            array_to_datetime(objs, errors="raise")
1565            return "datetime"
1566        except (ValueError, TypeError):
1567            pass
1568
1569        # we are *not* going to infer from strings
1570        # for timedelta as too much ambiguity
1571
1572    return 'mixed'
1573
1574
1575cdef inline bint is_timedelta(object o):
1576    return PyDelta_Check(o) or util.is_timedelta64_object(o)
1577
1578
1579cdef class Validator:
1580
1581    cdef:
1582        Py_ssize_t n
1583        dtype dtype
1584        bint skipna
1585
1586    def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_),
1587                  bint skipna=False):
1588        self.n = n
1589        self.dtype = dtype
1590        self.skipna = skipna
1591
1592    cdef bint validate(self, ndarray values) except -1:
1593        if not self.n:
1594            return False
1595
1596        if self.is_array_typed():
1597            return True
1598        elif self.dtype.type_num == NPY_OBJECT:
1599            if self.skipna:
1600                return self._validate_skipna(values)
1601            else:
1602                return self._validate(values)
1603        else:
1604            return False
1605
1606    @cython.wraparound(False)
1607    @cython.boundscheck(False)
1608    cdef bint _validate(self, ndarray values) except -1:
1609        cdef:
1610            Py_ssize_t i
1611            Py_ssize_t n = self.n
1612
1613        for i in range(n):
1614            if not self.is_valid(values[i]):
1615                return False
1616
1617        return self.finalize_validate()
1618
1619    @cython.wraparound(False)
1620    @cython.boundscheck(False)
1621    cdef bint _validate_skipna(self, ndarray values) except -1:
1622        cdef:
1623            Py_ssize_t i
1624            Py_ssize_t n = self.n
1625
1626        for i in range(n):
1627            if not self.is_valid_skipna(values[i]):
1628                return False
1629
1630        return self.finalize_validate_skipna()
1631
1632    cdef bint is_valid(self, object value) except -1:
1633        return self.is_value_typed(value)
1634
1635    cdef bint is_valid_skipna(self, object value) except -1:
1636        return self.is_valid(value) or self.is_valid_null(value)
1637
1638    cdef bint is_value_typed(self, object value) except -1:
1639        raise NotImplementedError(f"{type(self).__name__} child class "
1640                                  "must define is_value_typed")
1641
1642    cdef bint is_valid_null(self, object value) except -1:
1643        return value is None or value is C_NA or util.is_nan(value)
1644
1645    cdef bint is_array_typed(self) except -1:
1646        return False
1647
1648    cdef inline bint finalize_validate(self):
1649        return True
1650
1651    cdef bint finalize_validate_skipna(self):
1652        # TODO(phillipc): Remove the existing validate methods and replace them
1653        # with the skipna versions upon full deprecation of skipna=False
1654        return True
1655
1656
1657cdef class BoolValidator(Validator):
1658    cdef inline bint is_value_typed(self, object value) except -1:
1659        return util.is_bool_object(value)
1660
1661    cdef inline bint is_array_typed(self) except -1:
1662        return issubclass(self.dtype.type, np.bool_)
1663
1664
1665cpdef bint is_bool_array(ndarray values, bint skipna=False):
1666    cdef:
1667        BoolValidator validator = BoolValidator(len(values),
1668                                                values.dtype,
1669                                                skipna=skipna)
1670    return validator.validate(values)
1671
1672
1673cdef class IntegerValidator(Validator):
1674    cdef inline bint is_value_typed(self, object value) except -1:
1675        return util.is_integer_object(value)
1676
1677    cdef inline bint is_array_typed(self) except -1:
1678        return issubclass(self.dtype.type, np.integer)
1679
1680
1681cpdef bint is_integer_array(ndarray values):
1682    cdef:
1683        IntegerValidator validator = IntegerValidator(len(values),
1684                                                      values.dtype)
1685    return validator.validate(values)
1686
1687
1688cdef class IntegerNaValidator(Validator):
1689    cdef inline bint is_value_typed(self, object value) except -1:
1690        return (util.is_integer_object(value)
1691                or (util.is_nan(value) and util.is_float_object(value)))
1692
1693
1694cdef bint is_integer_na_array(ndarray values):
1695    cdef:
1696        IntegerNaValidator validator = IntegerNaValidator(len(values),
1697                                                          values.dtype)
1698    return validator.validate(values)
1699
1700
1701cdef class IntegerFloatValidator(Validator):
1702    cdef inline bint is_value_typed(self, object value) except -1:
1703        return util.is_integer_object(value) or util.is_float_object(value)
1704
1705    cdef inline bint is_array_typed(self) except -1:
1706        return issubclass(self.dtype.type, np.integer)
1707
1708
1709cdef bint is_integer_float_array(ndarray values):
1710    cdef:
1711        IntegerFloatValidator validator = IntegerFloatValidator(len(values),
1712                                                                values.dtype)
1713    return validator.validate(values)
1714
1715
1716cdef class FloatValidator(Validator):
1717    cdef inline bint is_value_typed(self, object value) except -1:
1718        return util.is_float_object(value)
1719
1720    cdef inline bint is_array_typed(self) except -1:
1721        return issubclass(self.dtype.type, np.floating)
1722
1723
1724cpdef bint is_float_array(ndarray values):
1725    cdef:
1726        FloatValidator validator = FloatValidator(len(values), values.dtype)
1727    return validator.validate(values)
1728
1729
1730cdef class ComplexValidator(Validator):
1731    cdef inline bint is_value_typed(self, object value) except -1:
1732        return (
1733            util.is_complex_object(value)
1734            or (util.is_float_object(value) and is_nan(value))
1735        )
1736
1737    cdef inline bint is_array_typed(self) except -1:
1738        return issubclass(self.dtype.type, np.complexfloating)
1739
1740
1741cdef bint is_complex_array(ndarray values):
1742    cdef:
1743        ComplexValidator validator = ComplexValidator(len(values), values.dtype)
1744    return validator.validate(values)
1745
1746
1747cdef class DecimalValidator(Validator):
1748    cdef inline bint is_value_typed(self, object value) except -1:
1749        return is_decimal(value)
1750
1751
1752cdef bint is_decimal_array(ndarray values):
1753    cdef:
1754        DecimalValidator validator = DecimalValidator(len(values), values.dtype)
1755    return validator.validate(values)
1756
1757
1758cdef class StringValidator(Validator):
1759    cdef inline bint is_value_typed(self, object value) except -1:
1760        return isinstance(value, str)
1761
1762    cdef inline bint is_array_typed(self) except -1:
1763        return issubclass(self.dtype.type, np.str_)
1764
1765    cdef bint is_valid_null(self, object value) except -1:
1766        # We deliberately exclude None / NaN here since StringArray uses NA
1767        return value is C_NA
1768
1769
1770cpdef bint is_string_array(ndarray values, bint skipna=False):
1771    cdef:
1772        StringValidator validator = StringValidator(len(values),
1773                                                    values.dtype,
1774                                                    skipna=skipna)
1775    return validator.validate(values)
1776
1777
1778cdef class BytesValidator(Validator):
1779    cdef inline bint is_value_typed(self, object value) except -1:
1780        return isinstance(value, bytes)
1781
1782    cdef inline bint is_array_typed(self) except -1:
1783        return issubclass(self.dtype.type, np.bytes_)
1784
1785
1786cdef bint is_bytes_array(ndarray values, bint skipna=False):
1787    cdef:
1788        BytesValidator validator = BytesValidator(len(values), values.dtype,
1789                                                  skipna=skipna)
1790    return validator.validate(values)
1791
1792
1793cdef class TemporalValidator(Validator):
1794    cdef:
1795        Py_ssize_t generic_null_count
1796
1797    def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_),
1798                  bint skipna=False):
1799        self.n = n
1800        self.dtype = dtype
1801        self.skipna = skipna
1802        self.generic_null_count = 0
1803
1804    cdef inline bint is_valid(self, object value) except -1:
1805        return self.is_value_typed(value) or self.is_valid_null(value)
1806
1807    cdef bint is_valid_null(self, object value) except -1:
1808        raise NotImplementedError(f"{type(self).__name__} child class "
1809                                  "must define is_valid_null")
1810
1811    cdef inline bint is_valid_skipna(self, object value) except -1:
1812        cdef:
1813            bint is_typed_null = self.is_valid_null(value)
1814            bint is_generic_null = value is None or util.is_nan(value)
1815        self.generic_null_count += is_typed_null and is_generic_null
1816        return self.is_value_typed(value) or is_typed_null or is_generic_null
1817
1818    cdef inline bint finalize_validate_skipna(self):
1819        return self.generic_null_count != self.n
1820
1821
1822cdef class DatetimeValidator(TemporalValidator):
1823    cdef bint is_value_typed(self, object value) except -1:
1824        return PyDateTime_Check(value)
1825
1826    cdef inline bint is_valid_null(self, object value) except -1:
1827        return is_null_datetime64(value)
1828
1829
1830cpdef bint is_datetime_array(ndarray values, bint skipna=True):
1831    cdef:
1832        DatetimeValidator validator = DatetimeValidator(len(values),
1833                                                        skipna=skipna)
1834    return validator.validate(values)
1835
1836
1837cdef class Datetime64Validator(DatetimeValidator):
1838    cdef inline bint is_value_typed(self, object value) except -1:
1839        return util.is_datetime64_object(value)
1840
1841
1842cpdef bint is_datetime64_array(ndarray values):
1843    cdef:
1844        Datetime64Validator validator = Datetime64Validator(len(values),
1845                                                            skipna=True)
1846    return validator.validate(values)
1847
1848
1849# TODO: only non-here use is in test
1850def is_datetime_with_singletz_array(values: ndarray) -> bool:
1851    """
1852    Check values have the same tzinfo attribute.
1853    Doesn't check values are datetime-like types.
1854    """
1855    cdef:
1856        Py_ssize_t i = 0, j, n = len(values)
1857        object base_val, base_tz, val, tz
1858
1859    if n == 0:
1860        return False
1861    # Get a reference timezone to compare with the rest of the tzs in the array
1862    for i in range(n):
1863        base_val = values[i]
1864        if base_val is not NaT:
1865            base_tz = getattr(base_val, 'tzinfo', None)
1866            break
1867
1868    for j in range(i, n):
1869        # Compare val's timezone with the reference timezone
1870        # NaT can coexist with tz-aware datetimes, so skip if encountered
1871        val = values[j]
1872        if val is not NaT:
1873            tz = getattr(val, 'tzinfo', None)
1874            if not tz_compare(base_tz, tz):
1875                return False
1876
1877    return True
1878
1879
1880cdef class TimedeltaValidator(TemporalValidator):
1881    cdef bint is_value_typed(self, object value) except -1:
1882        return PyDelta_Check(value)
1883
1884    cdef inline bint is_valid_null(self, object value) except -1:
1885        return is_null_timedelta64(value)
1886
1887
1888cdef class AnyTimedeltaValidator(TimedeltaValidator):
1889    cdef inline bint is_value_typed(self, object value) except -1:
1890        return is_timedelta(value)
1891
1892
1893# TODO: only non-here use is in test
1894cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
1895    """
1896    Infer with timedeltas and/or nat/none.
1897    """
1898    cdef:
1899        AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values),
1900                                                                skipna=True)
1901    return validator.validate(values)
1902
1903
1904cdef class DateValidator(Validator):
1905    cdef inline bint is_value_typed(self, object value) except -1:
1906        return PyDate_Check(value)
1907
1908
1909cpdef bint is_date_array(ndarray values, bint skipna=False):
1910    cdef:
1911        DateValidator validator = DateValidator(len(values), skipna=skipna)
1912    return validator.validate(values)
1913
1914
1915cdef class TimeValidator(Validator):
1916    cdef inline bint is_value_typed(self, object value) except -1:
1917        return PyTime_Check(value)
1918
1919
1920cpdef bint is_time_array(ndarray values, bint skipna=False):
1921    cdef:
1922        TimeValidator validator = TimeValidator(len(values), skipna=skipna)
1923    return validator.validate(values)
1924
1925
1926cdef class PeriodValidator(TemporalValidator):
1927    cdef inline bint is_value_typed(self, object value) except -1:
1928        return is_period_object(value)
1929
1930    cdef inline bint is_valid_null(self, object value) except -1:
1931        return checknull_with_nat(value)
1932
1933
1934cpdef bint is_period_array(ndarray values):
1935    cdef:
1936        PeriodValidator validator = PeriodValidator(len(values), skipna=True)
1937    return validator.validate(values)
1938
1939
1940cdef class IntervalValidator(Validator):
1941    cdef inline bint is_value_typed(self, object value) except -1:
1942        return is_interval(value)
1943
1944
1945cpdef bint is_interval_array(ndarray values):
1946    cdef:
1947        IntervalValidator validator = IntervalValidator(len(values),
1948                                                        skipna=True)
1949    return validator.validate(values)
1950
1951
1952@cython.boundscheck(False)
1953@cython.wraparound(False)
1954def maybe_convert_numeric(ndarray[object] values, set na_values,
1955                          bint convert_empty=True, bint coerce_numeric=False):
1956    """
1957    Convert object array to a numeric array if possible.
1958
1959    Parameters
1960    ----------
1961    values : ndarray
1962        Array of object elements to convert.
1963    na_values : set
1964        Set of values that should be interpreted as NaN.
1965    convert_empty : bool, default True
1966        If an empty array-like object is encountered, whether to interpret
1967        that element as NaN or not. If set to False, a ValueError will be
1968        raised if such an element is encountered and 'coerce_numeric' is False.
1969    coerce_numeric : bool, default False
1970        If initial attempts to convert to numeric have failed, whether to
1971        force conversion to numeric via alternative methods or by setting the
1972        element to NaN. Otherwise, an Exception will be raised when such an
1973        element is encountered.
1974
1975        This boolean also has an impact on how conversion behaves when a
1976        numeric array has no suitable numerical dtype to return (i.e. uint64,
1977        int32, uint8). If set to False, the original object array will be
1978        returned. Otherwise, a ValueError will be raised.
1979
1980    Returns
1981    -------
1982    Array of converted object values to numerical ones.
1983    """
1984    if len(values) == 0:
1985        return np.array([], dtype='i8')
1986
1987    # fastpath for ints - try to convert all based on first value
1988    cdef:
1989        object val = values[0]
1990
1991    if util.is_integer_object(val):
1992        try:
1993            maybe_ints = values.astype('i8')
1994            if (maybe_ints == values).all():
1995                return maybe_ints
1996        except (ValueError, OverflowError, TypeError):
1997            pass
1998
1999    # Otherwise, iterate and do full inference.
2000    cdef:
2001        int status, maybe_int
2002        Py_ssize_t i, n = values.size
2003        Seen seen = Seen(coerce_numeric)
2004        ndarray[float64_t] floats = np.empty(n, dtype='f8')
2005        ndarray[complex128_t] complexes = np.empty(n, dtype='c16')
2006        ndarray[int64_t] ints = np.empty(n, dtype='i8')
2007        ndarray[uint64_t] uints = np.empty(n, dtype='u8')
2008        ndarray[uint8_t] bools = np.empty(n, dtype='u1')
2009        float64_t fval
2010
2011    for i in range(n):
2012        val = values[i]
2013
2014        if val.__hash__ is not None and val in na_values:
2015            seen.saw_null()
2016            floats[i] = complexes[i] = NaN
2017        elif util.is_float_object(val):
2018            fval = val
2019            if fval != fval:
2020                seen.null_ = True
2021
2022            floats[i] = complexes[i] = fval
2023            seen.float_ = True
2024        elif util.is_integer_object(val):
2025            floats[i] = complexes[i] = val
2026
2027            val = int(val)
2028            seen.saw_int(val)
2029
2030            if val >= 0:
2031                if val <= oUINT64_MAX:
2032                    uints[i] = val
2033                else:
2034                    seen.float_ = True
2035
2036            if oINT64_MIN <= val <= oINT64_MAX:
2037                ints[i] = val
2038
2039            if val < oINT64_MIN or (seen.sint_ and seen.uint_):
2040                seen.float_ = True
2041
2042        elif util.is_bool_object(val):
2043            floats[i] = uints[i] = ints[i] = bools[i] = val
2044            seen.bool_ = True
2045        elif val is None or val is C_NA:
2046            seen.saw_null()
2047            floats[i] = complexes[i] = NaN
2048        elif hasattr(val, '__len__') and len(val) == 0:
2049            if convert_empty or seen.coerce_numeric:
2050                seen.saw_null()
2051                floats[i] = complexes[i] = NaN
2052            else:
2053                raise ValueError("Empty string encountered")
2054        elif util.is_complex_object(val):
2055            complexes[i] = val
2056            seen.complex_ = True
2057        elif is_decimal(val):
2058            floats[i] = complexes[i] = val
2059            seen.float_ = True
2060        else:
2061            try:
2062                status = floatify(val, &fval, &maybe_int)
2063
2064                if fval in na_values:
2065                    seen.saw_null()
2066                    floats[i] = complexes[i] = NaN
2067                else:
2068                    if fval != fval:
2069                        seen.null_ = True
2070
2071                    floats[i] = fval
2072
2073                if maybe_int:
2074                    as_int = int(val)
2075
2076                    if as_int in na_values:
2077                        seen.saw_null()
2078                    else:
2079                        seen.saw_int(as_int)
2080
2081                    if as_int not in na_values:
2082                        if as_int < oINT64_MIN or as_int > oUINT64_MAX:
2083                            if seen.coerce_numeric:
2084                                seen.float_ = True
2085                            else:
2086                                raise ValueError("Integer out of range.")
2087                        else:
2088                            if as_int >= 0:
2089                                uints[i] = as_int
2090
2091                            if as_int <= oINT64_MAX:
2092                                ints[i] = as_int
2093
2094                    seen.float_ = seen.float_ or (seen.uint_ and seen.sint_)
2095                else:
2096                    seen.float_ = True
2097            except (TypeError, ValueError) as err:
2098                if not seen.coerce_numeric:
2099                    raise type(err)(f"{err} at position {i}")
2100
2101                seen.saw_null()
2102                floats[i] = NaN
2103
2104    if seen.check_uint64_conflict():
2105        return values
2106
2107    if seen.complex_:
2108        return complexes
2109    elif seen.float_:
2110        return floats
2111    elif seen.int_:
2112        if seen.uint_:
2113            return uints
2114        else:
2115            return ints
2116    elif seen.bool_:
2117        return bools.view(np.bool_)
2118    elif seen.uint_:
2119        return uints
2120    return ints
2121
2122
2123@cython.boundscheck(False)
2124@cython.wraparound(False)
2125def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
2126                          bint safe=False, bint convert_datetime=False,
2127                          bint convert_timedelta=False,
2128                          bint convert_to_nullable_integer=False):
2129    """
2130    Type inference function-- convert object array to proper dtype
2131
2132    Parameters
2133    ----------
2134    values : ndarray
2135        Array of object elements to convert.
2136    try_float : bool, default False
2137        If an array-like object contains only float or NaN values is
2138        encountered, whether to convert and return an array of float dtype.
2139    safe : bool, default False
2140        Whether to upcast numeric type (e.g. int cast to float). If set to
2141        True, no upcasting will be performed.
2142    convert_datetime : bool, default False
2143        If an array-like object contains only datetime values or NaT is
2144        encountered, whether to convert and return an array of M8[ns] dtype.
2145    convert_timedelta : bool, default False
2146        If an array-like object contains only timedelta values or NaT is
2147        encountered, whether to convert and return an array of m8[ns] dtype.
2148    convert_to_nullable_integer : bool, default False
2149        If an array-like object contains only integer values (and NaN) is
2150        encountered, whether to convert and return an IntegerArray.
2151
2152    Returns
2153    -------
2154    Array of converted object values to more specific dtypes if applicable.
2155    """
2156    cdef:
2157        Py_ssize_t i, n
2158        ndarray[float64_t] floats
2159        ndarray[complex128_t] complexes
2160        ndarray[int64_t] ints
2161        ndarray[uint64_t] uints
2162        ndarray[uint8_t] bools
2163        int64_t[:]  idatetimes
2164        int64_t[:] itimedeltas
2165        Seen seen = Seen()
2166        object val
2167        float64_t fval, fnan
2168
2169    n = len(objects)
2170
2171    floats = np.empty(n, dtype='f8')
2172    complexes = np.empty(n, dtype='c16')
2173    ints = np.empty(n, dtype='i8')
2174    uints = np.empty(n, dtype='u8')
2175    bools = np.empty(n, dtype=np.uint8)
2176    mask = np.full(n, False)
2177
2178    if convert_datetime:
2179        datetimes = np.empty(n, dtype='M8[ns]')
2180        idatetimes = datetimes.view(np.int64)
2181
2182    if convert_timedelta:
2183        timedeltas = np.empty(n, dtype='m8[ns]')
2184        itimedeltas = timedeltas.view(np.int64)
2185
2186    fnan = np.nan
2187
2188    for i in range(n):
2189        val = objects[i]
2190
2191        if val is None:
2192            seen.null_ = True
2193            floats[i] = complexes[i] = fnan
2194            mask[i] = True
2195        elif val is NaT:
2196            seen.nat_ = True
2197            if convert_datetime:
2198                idatetimes[i] = NPY_NAT
2199            if convert_timedelta:
2200                itimedeltas[i] = NPY_NAT
2201            if not (convert_datetime or convert_timedelta):
2202                seen.object_ = True
2203                break
2204        elif val is np.nan:
2205            seen.nan_ = True
2206            mask[i] = True
2207            floats[i] = complexes[i] = val
2208        elif util.is_bool_object(val):
2209            seen.bool_ = True
2210            bools[i] = val
2211        elif util.is_float_object(val):
2212            floats[i] = complexes[i] = val
2213            seen.float_ = True
2214        elif util.is_datetime64_object(val):
2215            if convert_datetime:
2216                idatetimes[i] = convert_to_tsobject(
2217                    val, None, None, 0, 0).value
2218                seen.datetime_ = True
2219            else:
2220                seen.object_ = True
2221                break
2222        elif is_timedelta(val):
2223            if convert_timedelta:
2224                itimedeltas[i] = convert_to_timedelta64(val, 'ns')
2225                seen.timedelta_ = True
2226            else:
2227                seen.object_ = True
2228                break
2229        elif util.is_integer_object(val):
2230            seen.int_ = True
2231            floats[i] = <float64_t>val
2232            complexes[i] = <double complex>val
2233            if not seen.null_:
2234                val = int(val)
2235                seen.saw_int(val)
2236
2237                if ((seen.uint_ and seen.sint_) or
2238                        val > oUINT64_MAX or val < oINT64_MIN):
2239                    seen.object_ = True
2240                    break
2241
2242                if seen.uint_:
2243                    uints[i] = val
2244                elif seen.sint_:
2245                    ints[i] = val
2246                else:
2247                    uints[i] = val
2248                    ints[i] = val
2249
2250        elif util.is_complex_object(val):
2251            complexes[i] = val
2252            seen.complex_ = True
2253        elif PyDateTime_Check(val) or util.is_datetime64_object(val):
2254
2255            # if we have an tz's attached then return the objects
2256            if convert_datetime:
2257                if getattr(val, 'tzinfo', None) is not None:
2258                    seen.datetimetz_ = True
2259                    break
2260                else:
2261                    seen.datetime_ = True
2262                    idatetimes[i] = convert_to_tsobject(
2263                        val, None, None, 0, 0).value
2264            else:
2265                seen.object_ = True
2266                break
2267        elif try_float and not isinstance(val, str):
2268            # this will convert Decimal objects
2269            try:
2270                floats[i] = float(val)
2271                complexes[i] = complex(val)
2272                seen.float_ = True
2273            except (ValueError, TypeError):
2274                seen.object_ = True
2275                break
2276        else:
2277            seen.object_ = True
2278            break
2279
2280    # we try to coerce datetime w/tz but must all have the same tz
2281    if seen.datetimetz_:
2282        if is_datetime_with_singletz_array(objects):
2283            from pandas import DatetimeIndex
2284            return DatetimeIndex(objects)
2285        seen.object_ = True
2286
2287    if not seen.object_:
2288        if not safe:
2289            if seen.null_ or seen.nan_:
2290                if seen.is_float_or_complex:
2291                    if seen.complex_:
2292                        return complexes
2293                    elif seen.float_:
2294                        return floats
2295                    elif seen.int_:
2296                        if convert_to_nullable_integer:
2297                            from pandas.core.arrays import IntegerArray
2298                            return IntegerArray(ints, mask)
2299                        else:
2300                            return floats
2301                    elif seen.nan_:
2302                        return floats
2303            else:
2304                if not seen.bool_:
2305                    if seen.datetime_:
2306                        if not seen.numeric_ and not seen.timedelta_:
2307                            return datetimes
2308                    elif seen.timedelta_:
2309                        if not seen.numeric_:
2310                            return timedeltas
2311                    elif seen.nat_:
2312                        if not seen.numeric_:
2313                            if convert_datetime and convert_timedelta:
2314                                # TODO: array full of NaT ambiguity resolve here needed
2315                                pass
2316                            elif convert_datetime:
2317                                return datetimes
2318                            elif convert_timedelta:
2319                                return timedeltas
2320                    else:
2321                        if seen.complex_:
2322                            return complexes
2323                        elif seen.float_:
2324                            return floats
2325                        elif seen.int_:
2326                            if seen.uint_:
2327                                return uints
2328                            else:
2329                                return ints
2330                elif seen.is_bool:
2331                    return bools.view(np.bool_)
2332
2333        else:
2334            # don't cast int to float, etc.
2335            if seen.null_:
2336                if seen.is_float_or_complex:
2337                    if seen.complex_:
2338                        if not seen.int_:
2339                            return complexes
2340                    elif seen.float_ or seen.nan_:
2341                        if not seen.int_:
2342                            return floats
2343            else:
2344                if not seen.bool_:
2345                    if seen.datetime_:
2346                        if not seen.numeric_ and not seen.timedelta_:
2347                            return datetimes
2348                    elif seen.timedelta_:
2349                        if not seen.numeric_:
2350                            return timedeltas
2351                    elif seen.nat_:
2352                        if not seen.numeric_:
2353                            if convert_datetime and convert_timedelta:
2354                                # TODO: array full of NaT ambiguity resolve here needed
2355                                pass
2356                            elif convert_datetime:
2357                                return datetimes
2358                            elif convert_timedelta:
2359                                return timedeltas
2360                    else:
2361                        if seen.complex_:
2362                            if not seen.int_:
2363                                return complexes
2364                        elif seen.float_ or seen.nan_:
2365                            if not seen.int_:
2366                                return floats
2367                        elif seen.int_:
2368                            if seen.uint_:
2369                                return uints
2370                            else:
2371                                return ints
2372                elif seen.is_bool and not seen.nan_:
2373                    return bools.view(np.bool_)
2374
2375    return objects
2376
2377
2378# Note: no_default is exported to the public API in pandas.api.extensions
2379no_default = object()  #: Sentinel indicating the default value.
2380
2381
2382@cython.boundscheck(False)
2383@cython.wraparound(False)
2384def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
2385                   object na_value=no_default, object dtype=object):
2386    """
2387    Substitute for np.vectorize with pandas-friendly dtype inference.
2388
2389    Parameters
2390    ----------
2391    arr : ndarray
2392    f : function
2393    mask : ndarray
2394        uint8 dtype ndarray indicating values not to apply `f` to.
2395    convert : bool, default True
2396        Whether to call `maybe_convert_objects` on the resulting ndarray
2397    na_value : Any, optional
2398        The result value to use for masked values. By default, the
2399        input value is used
2400    dtype : numpy.dtype
2401        The numpy dtype to use for the result ndarray.
2402
2403    Returns
2404    -------
2405    ndarray
2406    """
2407    cdef:
2408        Py_ssize_t i, n
2409        ndarray result
2410        object val
2411
2412    n = len(arr)
2413    result = np.empty(n, dtype=dtype)
2414    for i in range(n):
2415        if mask[i]:
2416            if na_value is no_default:
2417                val = arr[i]
2418            else:
2419                val = na_value
2420        else:
2421            val = f(arr[i])
2422
2423            if cnp.PyArray_IsZeroDim(val):
2424                # unbox 0-dim arrays, GH#690
2425                val = val.item()
2426
2427        result[i] = val
2428
2429    if convert:
2430        return maybe_convert_objects(result,
2431                                     try_float=False,
2432                                     convert_datetime=False,
2433                                     convert_timedelta=False)
2434
2435    return result
2436
2437
2438@cython.boundscheck(False)
2439@cython.wraparound(False)
2440def map_infer(ndarray arr, object f, bint convert=True, bint ignore_na=False):
2441    """
2442    Substitute for np.vectorize with pandas-friendly dtype inference.
2443
2444    Parameters
2445    ----------
2446    arr : ndarray
2447    f : function
2448    convert : bint
2449    ignore_na : bint
2450        If True, NA values will not have f applied
2451
2452    Returns
2453    -------
2454    ndarray
2455    """
2456    cdef:
2457        Py_ssize_t i, n
2458        ndarray[object] result
2459        object val
2460
2461    n = len(arr)
2462    result = np.empty(n, dtype=object)
2463    for i in range(n):
2464        if ignore_na and checknull(arr[i]):
2465            result[i] = arr[i]
2466            continue
2467        val = f(arr[i])
2468
2469        if cnp.PyArray_IsZeroDim(val):
2470            # unbox 0-dim arrays, GH#690
2471            val = val.item()
2472
2473        result[i] = val
2474
2475    if convert:
2476        return maybe_convert_objects(result,
2477                                     try_float=False,
2478                                     convert_datetime=False,
2479                                     convert_timedelta=False)
2480
2481    return result
2482
2483
2484def to_object_array(rows: object, int min_width=0):
2485    """
2486    Convert a list of lists into an object array.
2487
2488    Parameters
2489    ----------
2490    rows : 2-d array (N, K)
2491        List of lists to be converted into an array.
2492    min_width : int
2493        Minimum width of the object array. If a list
2494        in `rows` contains fewer than `width` elements,
2495        the remaining elements in the corresponding row
2496        will all be `NaN`.
2497
2498    Returns
2499    -------
2500    numpy array of the object dtype.
2501    """
2502    cdef:
2503        Py_ssize_t i, j, n, k, tmp
2504        ndarray[object, ndim=2] result
2505        list row
2506
2507    rows = list(rows)
2508    n = len(rows)
2509
2510    k = min_width
2511    for i in range(n):
2512        tmp = len(rows[i])
2513        if tmp > k:
2514            k = tmp
2515
2516    result = np.empty((n, k), dtype=object)
2517
2518    for i in range(n):
2519        row = list(rows[i])
2520
2521        for j in range(len(row)):
2522            result[i, j] = row[j]
2523
2524    return result
2525
2526
2527def tuples_to_object_array(ndarray[object] tuples):
2528    cdef:
2529        Py_ssize_t i, j, n, k, tmp
2530        ndarray[object, ndim=2] result
2531        tuple tup
2532
2533    n = len(tuples)
2534    k = len(tuples[0])
2535    result = np.empty((n, k), dtype=object)
2536    for i in range(n):
2537        tup = tuples[i]
2538        for j in range(k):
2539            result[i, j] = tup[j]
2540
2541    return result
2542
2543
2544def to_object_array_tuples(rows: object):
2545    """
2546    Convert a list of tuples into an object array. Any subclass of
2547    tuple in `rows` will be casted to tuple.
2548
2549    Parameters
2550    ----------
2551    rows : 2-d array (N, K)
2552        List of tuples to be converted into an array.
2553
2554    Returns
2555    -------
2556    numpy array of the object dtype.
2557    """
2558    cdef:
2559        Py_ssize_t i, j, n, k, tmp
2560        ndarray[object, ndim=2] result
2561        tuple row
2562
2563    rows = list(rows)
2564    n = len(rows)
2565
2566    k = 0
2567    for i in range(n):
2568        tmp = 1 if checknull(rows[i]) else len(rows[i])
2569        if tmp > k:
2570            k = tmp
2571
2572    result = np.empty((n, k), dtype=object)
2573
2574    try:
2575        for i in range(n):
2576            row = rows[i]
2577            for j in range(len(row)):
2578                result[i, j] = row[j]
2579    except TypeError:
2580        # e.g. "Expected tuple, got list"
2581        # upcast any subclasses to tuple
2582        for i in range(n):
2583            row = (rows[i],) if checknull(rows[i]) else tuple(rows[i])
2584            for j in range(len(row)):
2585                result[i, j] = row[j]
2586
2587    return result
2588
2589
2590@cython.wraparound(False)
2591@cython.boundscheck(False)
2592def fast_multiget(dict mapping, ndarray keys, default=np.nan):
2593    cdef:
2594        Py_ssize_t i, n = len(keys)
2595        object val
2596        ndarray[object] output = np.empty(n, dtype='O')
2597
2598    if n == 0:
2599        # kludge, for Series
2600        return np.empty(0, dtype='f8')
2601
2602    for i in range(n):
2603        val = keys[i]
2604        if val in mapping:
2605            output[i] = mapping[val]
2606        else:
2607            output[i] = default
2608
2609    return maybe_convert_objects(output)
2610