1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18import os
19import warnings
20
21
22cdef _sequence_to_array(object sequence, object mask, object size,
23                        DataType type, CMemoryPool* pool, c_bool from_pandas):
24    cdef:
25        int64_t c_size
26        PyConversionOptions options
27        shared_ptr[CChunkedArray] chunked
28
29    if type is not None:
30        options.type = type.sp_type
31
32    if size is not None:
33        options.size = size
34
35    options.from_pandas = from_pandas
36    options.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
37
38    with nogil:
39        chunked = GetResultValue(
40            ConvertPySequence(sequence, mask, options, pool)
41        )
42
43    if chunked.get().num_chunks() == 1:
44        return pyarrow_wrap_array(chunked.get().chunk(0))
45    else:
46        return pyarrow_wrap_chunked_array(chunked)
47
48
49cdef inline _is_array_like(obj):
50    if isinstance(obj, np.ndarray):
51        return True
52    return pandas_api._have_pandas_internal() and pandas_api.is_array_like(obj)
53
54
55def _ndarray_to_arrow_type(object values, DataType type):
56    return pyarrow_wrap_data_type(_ndarray_to_type(values, type))
57
58
59cdef shared_ptr[CDataType] _ndarray_to_type(object values,
60                                            DataType type) except *:
61    cdef shared_ptr[CDataType] c_type
62
63    dtype = values.dtype
64
65    if type is None and dtype != object:
66        with nogil:
67            check_status(NumPyDtypeToArrow(dtype, &c_type))
68
69    if type is not None:
70        c_type = type.sp_type
71
72    return c_type
73
74
75cdef _ndarray_to_array(object values, object mask, DataType type,
76                       c_bool from_pandas, c_bool safe, CMemoryPool* pool):
77    cdef:
78        shared_ptr[CChunkedArray] chunked_out
79        shared_ptr[CDataType] c_type = _ndarray_to_type(values, type)
80        CCastOptions cast_options = CCastOptions(safe)
81
82    with nogil:
83        check_status(NdarrayToArrow(pool, values, mask, from_pandas,
84                                    c_type, cast_options, &chunked_out))
85
86    if chunked_out.get().num_chunks() > 1:
87        return pyarrow_wrap_chunked_array(chunked_out)
88    else:
89        return pyarrow_wrap_array(chunked_out.get().chunk(0))
90
91
92cdef _codes_to_indices(object codes, object mask, DataType type,
93                       MemoryPool memory_pool):
94    """
95    Convert the codes of a pandas Categorical to indices for a pyarrow
96    DictionaryArray, taking into account missing values + mask
97    """
98    if mask is None:
99        mask = codes == -1
100    else:
101        mask = mask | (codes == -1)
102    return array(codes, mask=mask, type=type, memory_pool=memory_pool)
103
104
105def _handle_arrow_array_protocol(obj, type, mask, size):
106    if mask is not None or size is not None:
107        raise ValueError(
108            "Cannot specify a mask or a size when passing an object that is "
109            "converted with the __arrow_array__ protocol.")
110    res = obj.__arrow_array__(type=type)
111    if not isinstance(res, (Array, ChunkedArray)):
112        raise TypeError("The object's __arrow_array__ method does not "
113                        "return a pyarrow Array or ChunkedArray.")
114    return res
115
116
117def array(object obj, type=None, mask=None, size=None, from_pandas=None,
118          bint safe=True, MemoryPool memory_pool=None):
119    """
120    Create pyarrow.Array instance from a Python object.
121
122    Parameters
123    ----------
124    obj : sequence, iterable, ndarray or Series
125        If both type and size are specified may be a single use iterable. If
126        not strongly-typed, Arrow type will be inferred for resulting array.
127    type : pyarrow.DataType
128        Explicit type to attempt to coerce to, otherwise will be inferred from
129        the data.
130    mask : array[bool], optional
131        Indicate which values are null (True) or not null (False).
132    size : int64, optional
133        Size of the elements. If the input is larger than size bail at this
134        length. For iterators, if size is larger than the input iterator this
135        will be treated as a "max size", but will involve an initial allocation
136        of size followed by a resize to the actual size (so if you know the
137        exact size specifying it correctly will give you better performance).
138    from_pandas : bool, default None
139        Use pandas's semantics for inferring nulls from values in
140        ndarray-like data. If passed, the mask tasks precedence, but
141        if a value is unmasked (not-null), but still null according to
142        pandas semantics, then it is null. Defaults to False if not
143        passed explicitly by user, or True if a pandas object is
144        passed in.
145    safe : bool, default True
146        Check for overflows or other unsafe conversions.
147    memory_pool : pyarrow.MemoryPool, optional
148        If not passed, will allocate memory from the currently-set default
149        memory pool.
150
151    Returns
152    -------
153    array : pyarrow.Array or pyarrow.ChunkedArray
154        A ChunkedArray instead of an Array is returned if:
155
156        - the object data overflowed binary storage.
157        - the object's ``__arrow_array__`` protocol method returned a chunked
158          array.
159
160    Notes
161    -----
162    Localized timestamps will currently be returned as UTC (pandas's native
163    representation). Timezone-naive data will be implicitly interpreted as
164    UTC.
165
166    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
167    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
168    are ignored as are all absolute fields on both objects. datetime.timedelta
169    can also be converted to MonthDayNanoIntervalArray but this requires
170    passing MonthDayNanoIntervalType explicitly.
171
172    Converting to dictionary array will promote to a wider integer type for
173    indices if the number of distinct values cannot be represented, even if
174    the index type was explicitly set. This means that if there are more than
175    127 values the returned dictionary array's index type will be at least
176    pa.int16() even if pa.int8() was passed to the function. Note that an
177    explicit index type will not be demoted even if it is wider than required.
178
179    Examples
180    --------
181    >>> import pandas as pd
182    >>> import pyarrow as pa
183    >>> pa.array(pd.Series([1, 2]))
184    <pyarrow.lib.Int64Array object at 0x7f674e4c0e10>
185    [
186      1,
187      2
188    ]
189
190    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
191    <pyarrow.lib.DictionaryArray object at 0x7feb288d9040>
192    -- dictionary:
193    [
194      "a",
195      "b"
196    ]
197    -- indices:
198    [
199      0,
200      1,
201      0
202    ]
203
204    >>> import numpy as np
205    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
206    <pyarrow.lib.Int64Array object at 0x7f9019e11208>
207    [
208      1,
209      null
210    ]
211
212    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
213    >>> arr.type.index_type
214    DataType(int16)
215    """
216    cdef:
217        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
218        bint is_pandas_object = False
219        bint c_from_pandas
220
221    type = ensure_type(type, allow_none=True)
222
223    if from_pandas is None:
224        c_from_pandas = False
225    else:
226        c_from_pandas = from_pandas
227
228    if hasattr(obj, '__arrow_array__'):
229        return _handle_arrow_array_protocol(obj, type, mask, size)
230    elif _is_array_like(obj):
231        if mask is not None:
232            if _is_array_like(mask):
233                mask = get_values(mask, &is_pandas_object)
234            else:
235                raise TypeError("Mask must be a numpy array "
236                                "when converting numpy arrays")
237
238        values = get_values(obj, &is_pandas_object)
239        if is_pandas_object and from_pandas is None:
240            c_from_pandas = True
241
242        if isinstance(values, np.ma.MaskedArray):
243            if mask is not None:
244                raise ValueError("Cannot pass a numpy masked array and "
245                                 "specify a mask at the same time")
246            else:
247                # don't use shrunken masks
248                mask = None if values.mask is np.ma.nomask else values.mask
249                values = values.data
250
251        if mask is not None:
252            if mask.dtype != np.bool_:
253                raise TypeError("Mask must be boolean dtype")
254            if mask.ndim != 1:
255                raise ValueError("Mask must be 1D array")
256            if len(values) != len(mask):
257                raise ValueError(
258                    "Mask is a different length from sequence being converted")
259
260        if hasattr(values, '__arrow_array__'):
261            return _handle_arrow_array_protocol(values, type, mask, size)
262        elif pandas_api.is_categorical(values):
263            if type is not None:
264                if type.id != Type_DICTIONARY:
265                    return _ndarray_to_array(
266                        np.asarray(values), mask, type, c_from_pandas, safe,
267                        pool)
268                index_type = type.index_type
269                value_type = type.value_type
270                if values.ordered != type.ordered:
271                    warnings.warn(
272                        "The 'ordered' flag of the passed categorical values "
273                        "does not match the 'ordered' of the specified type. "
274                        "Using the flag of the values, but in the future this "
275                        "mismatch will raise a ValueError.",
276                        FutureWarning, stacklevel=2)
277            else:
278                index_type = None
279                value_type = None
280
281            indices = _codes_to_indices(
282                values.codes, mask, index_type, memory_pool)
283            try:
284                dictionary = array(
285                    values.categories.values, type=value_type,
286                    memory_pool=memory_pool)
287            except TypeError:
288                # TODO when removing the deprecation warning, this whole
289                # try/except can be removed (to bubble the TypeError of
290                # the first array(..) call)
291                if value_type is not None:
292                    warnings.warn(
293                        "The dtype of the 'categories' of the passed "
294                        "categorical values ({0}) does not match the "
295                        "specified type ({1}). For now ignoring the specified "
296                        "type, but in the future this mismatch will raise a "
297                        "TypeError".format(
298                            values.categories.dtype, value_type),
299                        FutureWarning, stacklevel=2)
300                    dictionary = array(
301                        values.categories.values, memory_pool=memory_pool)
302                else:
303                    raise
304
305            return DictionaryArray.from_arrays(
306                indices, dictionary, ordered=values.ordered, safe=safe)
307        else:
308            if pandas_api.have_pandas:
309                values, type = pandas_api.compat.get_datetimetz_type(
310                    values, obj.dtype, type)
311            return _ndarray_to_array(values, mask, type, c_from_pandas, safe,
312                                     pool)
313    else:
314        # ConvertPySequence does strict conversion if type is explicitly passed
315        return _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
316
317
318def asarray(values, type=None):
319    """
320    Convert to pyarrow.Array, inferring type if not provided.
321
322    Parameters
323    ----------
324    values : array-like
325        This can be a sequence, numpy.ndarray, pyarrow.Array or
326        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
327        a ChunkedArray, otherwise the output will be a Array.
328    type : string or DataType
329        Explicitly construct the array with this type. Attempt to cast if
330        indicated type is different.
331
332    Returns
333    -------
334    arr : Array or ChunkedArray
335    """
336    if isinstance(values, (Array, ChunkedArray)):
337        if type is not None and not values.type.equals(type):
338            values = values.cast(type)
339        return values
340    else:
341        return array(values, type=type)
342
343
344def nulls(size, type=None, MemoryPool memory_pool=None):
345    """
346    Create a strongly-typed Array instance with all elements null.
347
348    Parameters
349    ----------
350    size : int
351        Array length.
352    type : pyarrow.DataType, default None
353        Explicit type for the array. By default use NullType.
354    memory_pool : MemoryPool, default None
355        Arrow MemoryPool to use for allocations. Uses the default memory
356        pool is not passed.
357
358    Returns
359    -------
360    arr : Array
361
362    Examples
363    --------
364    >>> import pyarrow as pa
365    >>> pa.nulls(10)
366    <pyarrow.lib.NullArray object at 0x7ffaf04c2e50>
367    10 nulls
368
369    >>> pa.nulls(3, pa.uint32())
370    <pyarrow.lib.UInt32Array object at 0x7ffaf04c2e50>
371    [
372      null,
373      null,
374      null
375    ]
376    """
377    cdef:
378        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
379        int64_t length = size
380        shared_ptr[CDataType] ty
381        shared_ptr[CArray] arr
382
383    type = ensure_type(type, allow_none=True)
384    if type is None:
385        type = null()
386
387    ty = pyarrow_unwrap_data_type(type)
388    with nogil:
389        arr = GetResultValue(MakeArrayOfNull(ty, length, pool))
390
391    return pyarrow_wrap_array(arr)
392
393
394def repeat(value, size, MemoryPool memory_pool=None):
395    """
396    Create an Array instance whose slots are the given scalar.
397
398    Parameters
399    ----------
400    value : Scalar-like object
401        Either a pyarrow.Scalar or any python object coercible to a Scalar.
402    size : int
403        Number of times to repeat the scalar in the output Array.
404    memory_pool : MemoryPool, default None
405        Arrow MemoryPool to use for allocations. Uses the default memory
406        pool is not passed.
407
408    Returns
409    -------
410    arr : Array
411
412    Examples
413    --------
414    >>> import pyarrow as pa
415    >>> pa.repeat(10, 3)
416    <pyarrow.lib.Int64Array object at 0x7ffac03a2750>
417    [
418      10,
419      10,
420      10
421    ]
422
423    >>> pa.repeat([1, 2], 2)
424    <pyarrow.lib.ListArray object at 0x7ffaf04c2e50>
425    [
426      [
427        1,
428        2
429      ],
430      [
431        1,
432        2
433      ]
434    ]
435
436    >>> pa.repeat("string", 3)
437    <pyarrow.lib.StringArray object at 0x7ffac03a2750>
438    [
439      "string",
440      "string",
441      "string"
442    ]
443
444    >>> pa.repeat(pa.scalar({'a': 1, 'b': [1, 2]}), 2)
445    <pyarrow.lib.StructArray object at 0x7ffac03a2750>
446    -- is_valid: all not null
447    -- child 0 type: int64
448      [
449        1,
450        1
451      ]
452    -- child 1 type: list<item: int64>
453      [
454        [
455          1,
456          2
457        ],
458        [
459          1,
460          2
461        ]
462      ]
463    """
464    cdef:
465        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
466        int64_t length = size
467        shared_ptr[CArray] c_array
468        shared_ptr[CScalar] c_scalar
469
470    if not isinstance(value, Scalar):
471        value = scalar(value, memory_pool=memory_pool)
472
473    c_scalar = (<Scalar> value).unwrap()
474    with nogil:
475        c_array = GetResultValue(
476            MakeArrayFromScalar(deref(c_scalar), length, pool)
477        )
478
479    return pyarrow_wrap_array(c_array)
480
481
482def infer_type(values, mask=None, from_pandas=False):
483    """
484    Attempt to infer Arrow data type that can hold the passed Python
485    sequence type in an Array object
486
487    Parameters
488    ----------
489    values : array-like
490        Sequence to infer type from.
491    mask : ndarray (bool type), optional
492        Optional exclusion mask where True marks null, False non-null.
493    from_pandas : bool, default False
494        Use pandas's NA/null sentinel values for type inference.
495
496    Returns
497    -------
498    type : DataType
499    """
500    cdef:
501        shared_ptr[CDataType] out
502        c_bool use_pandas_sentinels = from_pandas
503
504    if mask is not None and not isinstance(mask, np.ndarray):
505        mask = np.array(mask, dtype=bool)
506
507    out = GetResultValue(InferArrowType(values, mask, use_pandas_sentinels))
508    return pyarrow_wrap_data_type(out)
509
510
511def _normalize_slice(object arrow_obj, slice key):
512    """
513    Slices with step not equal to 1 (or None) will produce a copy
514    rather than a zero-copy view
515    """
516    cdef:
517        Py_ssize_t start, stop, step
518        Py_ssize_t n = len(arrow_obj)
519
520    start = key.start or 0
521    if start < 0:
522        start += n
523        if start < 0:
524            start = 0
525    elif start >= n:
526        start = n
527
528    stop = key.stop if key.stop is not None else n
529    if stop < 0:
530        stop += n
531        if stop < 0:
532            stop = 0
533    elif stop >= n:
534        stop = n
535
536    step = key.step or 1
537    if step != 1:
538        if step < 0:
539            # Negative steps require some special handling
540            if key.start is None:
541                start = n - 1
542
543            if key.stop is None:
544                stop = -1
545
546        indices = np.arange(start, stop, step)
547        return arrow_obj.take(indices)
548    else:
549        length = max(stop - start, 0)
550        return arrow_obj.slice(start, length)
551
552
553cdef Py_ssize_t _normalize_index(Py_ssize_t index,
554                                 Py_ssize_t length) except -1:
555    if index < 0:
556        index += length
557        if index < 0:
558            raise IndexError("index out of bounds")
559    elif index >= length:
560        raise IndexError("index out of bounds")
561    return index
562
563
564cdef wrap_datum(const CDatum& datum):
565    if datum.kind() == DatumType_ARRAY:
566        return pyarrow_wrap_array(MakeArray(datum.array()))
567    elif datum.kind() == DatumType_CHUNKED_ARRAY:
568        return pyarrow_wrap_chunked_array(datum.chunked_array())
569    elif datum.kind() == DatumType_RECORD_BATCH:
570        return pyarrow_wrap_batch(datum.record_batch())
571    elif datum.kind() == DatumType_TABLE:
572        return pyarrow_wrap_table(datum.table())
573    elif datum.kind() == DatumType_SCALAR:
574        return pyarrow_wrap_scalar(datum.scalar())
575    else:
576        raise ValueError("Unable to wrap Datum in a Python object")
577
578
579cdef _append_array_buffers(const CArrayData* ad, list res):
580    """
581    Recursively append Buffer wrappers from *ad* and its children.
582    """
583    cdef size_t i, n
584    assert ad != NULL
585    n = ad.buffers.size()
586    for i in range(n):
587        buf = ad.buffers[i]
588        res.append(pyarrow_wrap_buffer(buf)
589                   if buf.get() != NULL else None)
590    n = ad.child_data.size()
591    for i in range(n):
592        _append_array_buffers(ad.child_data[i].get(), res)
593
594
595cdef _reduce_array_data(const CArrayData* ad):
596    """
597    Recursively dissect ArrayData to (pickable) tuples.
598    """
599    cdef size_t i, n
600    assert ad != NULL
601
602    n = ad.buffers.size()
603    buffers = []
604    for i in range(n):
605        buf = ad.buffers[i]
606        buffers.append(pyarrow_wrap_buffer(buf)
607                       if buf.get() != NULL else None)
608
609    children = []
610    n = ad.child_data.size()
611    for i in range(n):
612        children.append(_reduce_array_data(ad.child_data[i].get()))
613
614    if ad.dictionary.get() != NULL:
615        dictionary = _reduce_array_data(ad.dictionary.get())
616    else:
617        dictionary = None
618
619    return pyarrow_wrap_data_type(ad.type), ad.length, ad.null_count, \
620        ad.offset, buffers, children, dictionary
621
622
623cdef shared_ptr[CArrayData] _reconstruct_array_data(data):
624    """
625    Reconstruct CArrayData objects from the tuple structure generated
626    by _reduce_array_data.
627    """
628    cdef:
629        int64_t length, null_count, offset, i
630        DataType dtype
631        Buffer buf
632        vector[shared_ptr[CBuffer]] c_buffers
633        vector[shared_ptr[CArrayData]] c_children
634        shared_ptr[CArrayData] c_dictionary
635
636    dtype, length, null_count, offset, buffers, children, dictionary = data
637
638    for i in range(len(buffers)):
639        buf = buffers[i]
640        if buf is None:
641            c_buffers.push_back(shared_ptr[CBuffer]())
642        else:
643            c_buffers.push_back(buf.buffer)
644
645    for i in range(len(children)):
646        c_children.push_back(_reconstruct_array_data(children[i]))
647
648    if dictionary is not None:
649        c_dictionary = _reconstruct_array_data(dictionary)
650
651    return CArrayData.MakeWithChildrenAndDictionary(
652        dtype.sp_type,
653        length,
654        c_buffers,
655        c_children,
656        c_dictionary,
657        null_count,
658        offset)
659
660
661def _restore_array(data):
662    """
663    Reconstruct an Array from pickled ArrayData.
664    """
665    cdef shared_ptr[CArrayData] ad = _reconstruct_array_data(data)
666    return pyarrow_wrap_array(MakeArray(ad))
667
668
669cdef class _PandasConvertible(_Weakrefable):
670
671    def to_pandas(
672            self,
673            memory_pool=None,
674            categories=None,
675            bint strings_to_categorical=False,
676            bint zero_copy_only=False,
677            bint integer_object_nulls=False,
678            bint date_as_object=True,
679            bint timestamp_as_object=False,
680            bint use_threads=True,
681            bint deduplicate_objects=True,
682            bint ignore_metadata=False,
683            bint safe=True,
684            bint split_blocks=False,
685            bint self_destruct=False,
686            types_mapper=None
687    ):
688        """
689        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
690
691        Parameters
692        ----------
693        memory_pool : MemoryPool, default None
694            Arrow MemoryPool to use for allocations. Uses the default memory
695            pool is not passed.
696        strings_to_categorical : bool, default False
697            Encode string (UTF8) and binary types to pandas.Categorical.
698        categories: list, default empty
699            List of fields that should be returned as pandas.Categorical. Only
700            applies to table-like data structures.
701        zero_copy_only : bool, default False
702            Raise an ArrowException if this function call would require copying
703            the underlying data.
704        integer_object_nulls : bool, default False
705            Cast integers with nulls to objects
706        date_as_object : bool, default True
707            Cast dates to objects. If False, convert to datetime64[ns] dtype.
708        timestamp_as_object : bool, default False
709            Cast non-nanosecond timestamps (np.datetime64) to objects. This is
710            useful if you have timestamps that don't fit in the normal date
711            range of nanosecond timestamps (1678 CE-2262 CE).
712            If False, all timestamps are converted to datetime64[ns] dtype.
713        use_threads: bool, default True
714            Whether to parallelize the conversion using multiple threads.
715        deduplicate_objects : bool, default False
716            Do not create multiple copies Python objects when created, to save
717            on memory use. Conversion will be slower.
718        ignore_metadata : bool, default False
719            If True, do not use the 'pandas' metadata to reconstruct the
720            DataFrame index, if present
721        safe : bool, default True
722            For certain data types, a cast is needed in order to store the
723            data in a pandas DataFrame or Series (e.g. timestamps are always
724            stored as nanoseconds in pandas). This option controls whether it
725            is a safe cast or not.
726        split_blocks : bool, default False
727            If True, generate one internal "block" for each column when
728            creating a pandas.DataFrame from a RecordBatch or Table. While this
729            can temporarily reduce memory note that various pandas operations
730            can trigger "consolidation" which may balloon memory use.
731        self_destruct : bool, default False
732            EXPERIMENTAL: If True, attempt to deallocate the originating Arrow
733            memory while converting the Arrow object to pandas. If you use the
734            object after calling to_pandas with this option it will crash your
735            program.
736
737            Note that you may not see always memory usage improvements. For
738            example, if multiple columns share an underlying allocation,
739            memory can't be freed until all columns are converted.
740        types_mapper : function, default None
741            A function mapping a pyarrow DataType to a pandas ExtensionDtype.
742            This can be used to override the default pandas type for conversion
743            of built-in pyarrow types or in absence of pandas_metadata in the
744            Table schema. The function receives a pyarrow DataType and is
745            expected to return a pandas ExtensionDtype or ``None`` if the
746            default conversion should be used for that type. If you have
747            a dictionary mapping, you can pass ``dict.get`` as function.
748
749        Returns
750        -------
751        pandas.Series or pandas.DataFrame depending on type of object
752        """
753        options = dict(
754            pool=memory_pool,
755            strings_to_categorical=strings_to_categorical,
756            zero_copy_only=zero_copy_only,
757            integer_object_nulls=integer_object_nulls,
758            date_as_object=date_as_object,
759            timestamp_as_object=timestamp_as_object,
760            use_threads=use_threads,
761            deduplicate_objects=deduplicate_objects,
762            safe=safe,
763            split_blocks=split_blocks,
764            self_destruct=self_destruct
765        )
766        return self._to_pandas(options, categories=categories,
767                               ignore_metadata=ignore_metadata,
768                               types_mapper=types_mapper)
769
770
771cdef PandasOptions _convert_pandas_options(dict options):
772    cdef PandasOptions result
773    result.pool = maybe_unbox_memory_pool(options['pool'])
774    result.strings_to_categorical = options['strings_to_categorical']
775    result.zero_copy_only = options['zero_copy_only']
776    result.integer_object_nulls = options['integer_object_nulls']
777    result.date_as_object = options['date_as_object']
778    result.timestamp_as_object = options['timestamp_as_object']
779    result.use_threads = options['use_threads']
780    result.deduplicate_objects = options['deduplicate_objects']
781    result.safe_cast = options['safe']
782    result.split_blocks = options['split_blocks']
783    result.self_destruct = options['self_destruct']
784    result.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
785    return result
786
787
788cdef class Array(_PandasConvertible):
789    """
790    The base class for all Arrow arrays.
791    """
792
793    def __init__(self):
794        raise TypeError("Do not call {}'s constructor directly, use one of "
795                        "the `pyarrow.Array.from_*` functions instead."
796                        .format(self.__class__.__name__))
797
798    cdef void init(self, const shared_ptr[CArray]& sp_array) except *:
799        self.sp_array = sp_array
800        self.ap = sp_array.get()
801        self.type = pyarrow_wrap_data_type(self.sp_array.get().type())
802
803    def _debug_print(self):
804        with nogil:
805            check_status(DebugPrint(deref(self.ap), 0))
806
807    def diff(self, Array other):
808        """
809        Compare contents of this array against another one.
810
811        Return string containing the result of arrow::Diff comparing contents
812        of this array against the other array.
813        """
814        cdef c_string result
815        with nogil:
816            result = self.ap.Diff(deref(other.ap))
817        return frombytes(result, safe=True)
818
819    def cast(self, object target_type, safe=True):
820        """
821        Cast array values to another data type
822
823        See pyarrow.compute.cast for usage
824        """
825        return _pc().cast(self, target_type, safe=safe)
826
827    def view(self, object target_type):
828        """
829        Return zero-copy "view" of array as another data type.
830
831        The data types must have compatible columnar buffer layouts
832
833        Parameters
834        ----------
835        target_type : DataType
836            Type to construct view as.
837
838        Returns
839        -------
840        view : Array
841        """
842        cdef DataType type = ensure_type(target_type)
843        cdef shared_ptr[CArray] result
844        with nogil:
845            result = GetResultValue(self.ap.View(type.sp_type))
846        return pyarrow_wrap_array(result)
847
848    def sum(self, **kwargs):
849        """
850        Sum the values in a numerical array.
851        """
852        options = _pc().ScalarAggregateOptions(**kwargs)
853        return _pc().call_function('sum', [self], options)
854
855    def unique(self):
856        """
857        Compute distinct elements in array.
858        """
859        return _pc().call_function('unique', [self])
860
861    def dictionary_encode(self, null_encoding='mask'):
862        """
863        Compute dictionary-encoded representation of array.
864        """
865        options = _pc().DictionaryEncodeOptions(null_encoding)
866        return _pc().call_function('dictionary_encode', [self], options)
867
868    def value_counts(self):
869        """
870        Compute counts of unique elements in array.
871
872        Returns
873        -------
874        An array of  <input type "Values", int64_t "Counts"> structs
875        """
876        return _pc().call_function('value_counts', [self])
877
878    @staticmethod
879    def from_pandas(obj, mask=None, type=None, bint safe=True,
880                    MemoryPool memory_pool=None):
881        """
882        Convert pandas.Series to an Arrow Array.
883
884        This method uses Pandas semantics about what values indicate
885        nulls. See pyarrow.array for more general conversion from arrays or
886        sequences to Arrow arrays.
887
888        Parameters
889        ----------
890        obj : ndarray, pandas.Series, array-like
891        mask : array (boolean), optional
892            Indicate which values are null (True) or not null (False).
893        type : pyarrow.DataType
894            Explicit type to attempt to coerce to, otherwise will be inferred
895            from the data.
896        safe : bool, default True
897            Check for overflows or other unsafe conversions.
898        memory_pool : pyarrow.MemoryPool, optional
899            If not passed, will allocate memory from the currently-set default
900            memory pool.
901
902        Notes
903        -----
904        Localized timestamps will currently be returned as UTC (pandas's native
905        representation). Timezone-naive data will be implicitly interpreted as
906        UTC.
907
908        Returns
909        -------
910        array : pyarrow.Array or pyarrow.ChunkedArray
911            ChunkedArray is returned if object data overflows binary buffer.
912        """
913        return array(obj, mask=mask, type=type, safe=safe, from_pandas=True,
914                     memory_pool=memory_pool)
915
916    def __reduce__(self):
917        return _restore_array, \
918            (_reduce_array_data(self.sp_array.get().data().get()),)
919
920    @staticmethod
921    def from_buffers(DataType type, length, buffers, null_count=-1, offset=0,
922                     children=None):
923        """
924        Construct an Array from a sequence of buffers.
925
926        The concrete type returned depends on the datatype.
927
928        Parameters
929        ----------
930        type : DataType
931            The value type of the array.
932        length : int
933            The number of values in the array.
934        buffers : List[Buffer]
935            The buffers backing this array.
936        null_count : int, default -1
937            The number of null entries in the array. Negative value means that
938            the null count is not known.
939        offset : int, default 0
940            The array's logical offset (in values, not in bytes) from the
941            start of each buffer.
942        children : List[Array], default None
943            Nested type children with length matching type.num_fields.
944
945        Returns
946        -------
947        array : Array
948        """
949        cdef:
950            Buffer buf
951            Array child
952            vector[shared_ptr[CBuffer]] c_buffers
953            vector[shared_ptr[CArrayData]] c_child_data
954            shared_ptr[CArrayData] array_data
955
956        children = children or []
957
958        if type.num_fields != len(children):
959            raise ValueError("Type's expected number of children "
960                             "({0}) did not match the passed number "
961                             "({1}).".format(type.num_fields, len(children)))
962
963        if type.num_buffers != len(buffers):
964            raise ValueError("Type's expected number of buffers "
965                             "({0}) did not match the passed number "
966                             "({1}).".format(type.num_buffers, len(buffers)))
967
968        for buf in buffers:
969            # None will produce a null buffer pointer
970            c_buffers.push_back(pyarrow_unwrap_buffer(buf))
971
972        for child in children:
973            c_child_data.push_back(child.ap.data())
974
975        array_data = CArrayData.MakeWithChildren(type.sp_type, length,
976                                                 c_buffers, c_child_data,
977                                                 null_count, offset)
978        cdef Array result = pyarrow_wrap_array(MakeArray(array_data))
979        result.validate()
980        return result
981
982    @property
983    def null_count(self):
984        return self.sp_array.get().null_count()
985
986    @property
987    def nbytes(self):
988        """
989        Total number of bytes consumed by the elements of the array.
990        """
991        size = 0
992        for buf in self.buffers():
993            if buf is not None:
994                size += buf.size
995        return size
996
997    def __sizeof__(self):
998        return super(Array, self).__sizeof__() + self.nbytes
999
1000    def __iter__(self):
1001        for i in range(len(self)):
1002            yield self.getitem(i)
1003
1004    def __repr__(self):
1005        type_format = object.__repr__(self)
1006        return '{0}\n{1}'.format(type_format, str(self))
1007
1008    def to_string(self, *, int indent=0, int window=10,
1009                  c_bool skip_new_lines=False):
1010        """
1011        Render a "pretty-printed" string representation of the Array.
1012
1013        Parameters
1014        ----------
1015        indent : int
1016            How much to indent right the content of the array,
1017            by default ``0``.
1018        window : int
1019            How many items to preview at the begin and end
1020            of the array when the arrays is bigger than the window.
1021            The other elements will be ellipsed.
1022        skip_new_lines : bool
1023            If the array should be rendered as a single line of text
1024            or if each element should be on its own line.
1025        """
1026        cdef:
1027            c_string result
1028            PrettyPrintOptions options
1029
1030        with nogil:
1031            options = PrettyPrintOptions(indent, window)
1032            options.skip_new_lines = skip_new_lines
1033            check_status(
1034                PrettyPrint(
1035                    deref(self.ap),
1036                    options,
1037                    &result
1038                )
1039            )
1040
1041        return frombytes(result, safe=True)
1042
1043    def format(self, **kwargs):
1044        import warnings
1045        warnings.warn('Array.format is deprecated, use Array.to_string')
1046        return self.to_string(**kwargs)
1047
1048    def __str__(self):
1049        return self.to_string()
1050
1051    def __eq__(self, other):
1052        try:
1053            return self.equals(other)
1054        except TypeError:
1055            # This also handles comparing with None
1056            # as Array.equals(None) raises a TypeError.
1057            return NotImplemented
1058
1059    def equals(Array self, Array other not None):
1060        return self.ap.Equals(deref(other.ap))
1061
1062    def __len__(self):
1063        return self.length()
1064
1065    cdef int64_t length(self):
1066        if self.sp_array.get():
1067            return self.sp_array.get().length()
1068        else:
1069            return 0
1070
1071    def is_null(self, *, nan_is_null=False):
1072        """
1073        Return BooleanArray indicating the null values.
1074
1075        Parameters
1076        ----------
1077        nan_is_null : bool (optional, default False)
1078            Whether floating-point NaN values should also be considered null.
1079
1080        Returns
1081        -------
1082        array : boolean Array
1083        """
1084        options = _pc().NullOptions(nan_is_null=nan_is_null)
1085        return _pc().call_function('is_null', [self], options)
1086
1087    def is_valid(self):
1088        """
1089        Return BooleanArray indicating the non-null values.
1090        """
1091        return _pc().is_valid(self)
1092
1093    def fill_null(self, fill_value):
1094        """
1095        See pyarrow.compute.fill_null for usage.
1096        """
1097        return _pc().fill_null(self, fill_value)
1098
1099    def __getitem__(self, key):
1100        """
1101        Slice or return value at given index
1102
1103        Parameters
1104        ----------
1105        key : integer or slice
1106            Slices with step not equal to 1 (or None) will produce a copy
1107            rather than a zero-copy view
1108
1109        Returns
1110        -------
1111        value : Scalar (index) or Array (slice)
1112        """
1113        if PySlice_Check(key):
1114            return _normalize_slice(self, key)
1115
1116        return self.getitem(_normalize_index(key, self.length()))
1117
1118    cdef getitem(self, int64_t i):
1119        return Scalar.wrap(GetResultValue(self.ap.GetScalar(i)))
1120
1121    def slice(self, offset=0, length=None):
1122        """
1123        Compute zero-copy slice of this array.
1124
1125        Parameters
1126        ----------
1127        offset : int, default 0
1128            Offset from start of array to slice.
1129        length : int, default None
1130            Length of slice (default is until end of Array starting from
1131            offset).
1132
1133        Returns
1134        -------
1135        sliced : RecordBatch
1136        """
1137        cdef:
1138            shared_ptr[CArray] result
1139
1140        if offset < 0:
1141            raise IndexError('Offset must be non-negative')
1142
1143        offset = min(len(self), offset)
1144        if length is None:
1145            result = self.ap.Slice(offset)
1146        else:
1147            if length < 0:
1148                raise ValueError('Length must be non-negative')
1149            result = self.ap.Slice(offset, length)
1150
1151        return pyarrow_wrap_array(result)
1152
1153    def take(self, object indices):
1154        """
1155        Select values from an array. See pyarrow.compute.take for full usage.
1156        """
1157        return _pc().take(self, indices)
1158
1159    def drop_null(self):
1160        """
1161        Remove missing values from an array.
1162        """
1163        return _pc().drop_null(self)
1164
1165    def filter(self, Array mask, *, null_selection_behavior='drop'):
1166        """
1167        Select values from an array. See pyarrow.compute.filter for full usage.
1168        """
1169        return _pc().filter(self, mask,
1170                            null_selection_behavior=null_selection_behavior)
1171
1172    def index(self, value, start=None, end=None, *, memory_pool=None):
1173        """
1174        Find the first index of a value.
1175
1176        See pyarrow.compute.index for full usage.
1177        """
1178        return _pc().index(self, value, start, end, memory_pool=memory_pool)
1179
1180    def _to_pandas(self, options, **kwargs):
1181        return _array_like_to_pandas(self, options)
1182
1183    def __array__(self, dtype=None):
1184        values = self.to_numpy(zero_copy_only=False)
1185        if dtype is None:
1186            return values
1187        return values.astype(dtype)
1188
1189    def to_numpy(self, zero_copy_only=True, writable=False):
1190        """
1191        Return a NumPy view or copy of this array (experimental).
1192
1193        By default, tries to return a view of this array. This is only
1194        supported for primitive arrays with the same memory layout as NumPy
1195        (i.e. integers, floating point, ..) and without any nulls.
1196
1197        Parameters
1198        ----------
1199        zero_copy_only : bool, default True
1200            If True, an exception will be raised if the conversion to a numpy
1201            array would require copying the underlying data (e.g. in presence
1202            of nulls, or for non-primitive types).
1203        writable : bool, default False
1204            For numpy arrays created with zero copy (view on the Arrow data),
1205            the resulting array is not writable (Arrow data is immutable).
1206            By setting this to True, a copy of the array is made to ensure
1207            it is writable.
1208
1209        Returns
1210        -------
1211        array : numpy.ndarray
1212        """
1213        cdef:
1214            PyObject* out
1215            PandasOptions c_options
1216            object values
1217
1218        if zero_copy_only and writable:
1219            raise ValueError(
1220                "Cannot return a writable array if asking for zero-copy")
1221
1222        # If there are nulls and the array is a DictionaryArray
1223        # decoding the dictionary will make sure nulls are correctly handled.
1224        # Decoding a dictionary does imply a copy by the way,
1225        # so it can't be done if the user requested a zero_copy.
1226        c_options.decode_dictionaries = not zero_copy_only
1227        c_options.zero_copy_only = zero_copy_only
1228
1229        with nogil:
1230            check_status(ConvertArrayToPandas(c_options, self.sp_array,
1231                                              self, &out))
1232
1233        # wrap_array_output uses pandas to convert to Categorical, here
1234        # always convert to numpy array without pandas dependency
1235        array = PyObject_to_object(out)
1236
1237        if isinstance(array, dict):
1238            array = np.take(array['dictionary'], array['indices'])
1239
1240        if writable and not array.flags.writeable:
1241            # if the conversion already needed to a copy, writeable is True
1242            array = array.copy()
1243        return array
1244
1245    def to_pylist(self):
1246        """
1247        Convert to a list of native Python objects.
1248
1249        Returns
1250        -------
1251        lst : list
1252        """
1253        return [x.as_py() for x in self]
1254
1255    def tolist(self):
1256        """
1257        Alias of to_pylist for compatibility with NumPy.
1258        """
1259        return self.to_pylist()
1260
1261    def validate(self, *, full=False):
1262        """
1263        Perform validation checks.  An exception is raised if validation fails.
1264
1265        By default only cheap validation checks are run.  Pass `full=True`
1266        for thorough validation checks (potentially O(n)).
1267
1268        Parameters
1269        ----------
1270        full: bool, default False
1271            If True, run expensive checks, otherwise cheap checks only.
1272
1273        Raises
1274        ------
1275        ArrowInvalid
1276        """
1277        if full:
1278            with nogil:
1279                check_status(self.ap.ValidateFull())
1280        else:
1281            with nogil:
1282                check_status(self.ap.Validate())
1283
1284    @property
1285    def offset(self):
1286        """
1287        A relative position into another array's data.
1288
1289        The purpose is to enable zero-copy slicing. This value defaults to zero
1290        but must be applied on all operations with the physical storage
1291        buffers.
1292        """
1293        return self.sp_array.get().offset()
1294
1295    def buffers(self):
1296        """
1297        Return a list of Buffer objects pointing to this array's physical
1298        storage.
1299
1300        To correctly interpret these buffers, you need to also apply the offset
1301        multiplied with the size of the stored data type.
1302        """
1303        res = []
1304        _append_array_buffers(self.sp_array.get().data().get(), res)
1305        return res
1306
1307    def _export_to_c(self, uintptr_t out_ptr, uintptr_t out_schema_ptr=0):
1308        """
1309        Export to a C ArrowArray struct, given its pointer.
1310
1311        If a C ArrowSchema struct pointer is also given, the array type
1312        is exported to it at the same time.
1313
1314        Parameters
1315        ----------
1316        out_ptr: int
1317            The raw pointer to a C ArrowArray struct.
1318        out_schema_ptr: int (optional)
1319            The raw pointer to a C ArrowSchema struct.
1320
1321        Be careful: if you don't pass the ArrowArray struct to a consumer,
1322        array memory will leak.  This is a low-level function intended for
1323        expert users.
1324        """
1325        with nogil:
1326            check_status(ExportArray(deref(self.sp_array),
1327                                     <ArrowArray*> out_ptr,
1328                                     <ArrowSchema*> out_schema_ptr))
1329
1330    @staticmethod
1331    def _import_from_c(uintptr_t in_ptr, type):
1332        """
1333        Import Array from a C ArrowArray struct, given its pointer
1334        and the imported array type.
1335
1336        Parameters
1337        ----------
1338        in_ptr: int
1339            The raw pointer to a C ArrowArray struct.
1340        type: DataType or int
1341            Either a DataType object, or the raw pointer to a C ArrowSchema
1342            struct.
1343
1344        This is a low-level function intended for expert users.
1345        """
1346        cdef:
1347            shared_ptr[CArray] c_array
1348
1349        c_type = pyarrow_unwrap_data_type(type)
1350        if c_type == nullptr:
1351            # Not a DataType object, perhaps a raw ArrowSchema pointer
1352            type_ptr = <uintptr_t> type
1353            with nogil:
1354                c_array = GetResultValue(ImportArray(<ArrowArray*> in_ptr,
1355                                                     <ArrowSchema*> type_ptr))
1356        else:
1357            with nogil:
1358                c_array = GetResultValue(ImportArray(<ArrowArray*> in_ptr,
1359                                                     c_type))
1360        return pyarrow_wrap_array(c_array)
1361
1362
1363cdef _array_like_to_pandas(obj, options):
1364    cdef:
1365        PyObject* out
1366        PandasOptions c_options = _convert_pandas_options(options)
1367
1368    original_type = obj.type
1369    name = obj._name
1370
1371    # ARROW-3789(wesm): Convert date/timestamp types to datetime64[ns]
1372    c_options.coerce_temporal_nanoseconds = True
1373
1374    if isinstance(obj, Array):
1375        with nogil:
1376            check_status(ConvertArrayToPandas(c_options,
1377                                              (<Array> obj).sp_array,
1378                                              obj, &out))
1379    elif isinstance(obj, ChunkedArray):
1380        with nogil:
1381            check_status(libarrow.ConvertChunkedArrayToPandas(
1382                c_options,
1383                (<ChunkedArray> obj).sp_chunked_array,
1384                obj, &out))
1385
1386    arr = wrap_array_output(out)
1387
1388    if (isinstance(original_type, TimestampType) and
1389            options["timestamp_as_object"]):
1390        # ARROW-5359 - need to specify object dtype to avoid pandas to
1391        # coerce back to ns resolution
1392        dtype = "object"
1393    else:
1394        dtype = None
1395
1396    result = pandas_api.series(arr, dtype=dtype, name=name)
1397
1398    if (isinstance(original_type, TimestampType) and
1399            original_type.tz is not None and
1400            # can be object dtype for non-ns and timestamp_as_object=True
1401            result.dtype.kind == "M"):
1402        from pyarrow.pandas_compat import make_tz_aware
1403        result = make_tz_aware(result, original_type.tz)
1404
1405    return result
1406
1407
1408cdef wrap_array_output(PyObject* output):
1409    cdef object obj = PyObject_to_object(output)
1410
1411    if isinstance(obj, dict):
1412        return pandas_api.categorical_type(obj['indices'],
1413                                           categories=obj['dictionary'],
1414                                           ordered=obj['ordered'],
1415                                           fastpath=True)
1416    else:
1417        return obj
1418
1419
1420cdef class NullArray(Array):
1421    """
1422    Concrete class for Arrow arrays of null data type.
1423    """
1424
1425
1426cdef class BooleanArray(Array):
1427    """
1428    Concrete class for Arrow arrays of boolean data type.
1429    """
1430    @property
1431    def false_count(self):
1432        return (<CBooleanArray*> self.ap).false_count()
1433
1434    @property
1435    def true_count(self):
1436        return (<CBooleanArray*> self.ap).true_count()
1437
1438
1439cdef class NumericArray(Array):
1440    """
1441    A base class for Arrow numeric arrays.
1442    """
1443
1444
1445cdef class IntegerArray(NumericArray):
1446    """
1447    A base class for Arrow integer arrays.
1448    """
1449
1450
1451cdef class FloatingPointArray(NumericArray):
1452    """
1453    A base class for Arrow floating-point arrays.
1454    """
1455
1456
1457cdef class Int8Array(IntegerArray):
1458    """
1459    Concrete class for Arrow arrays of int8 data type.
1460    """
1461
1462
1463cdef class UInt8Array(IntegerArray):
1464    """
1465    Concrete class for Arrow arrays of uint8 data type.
1466    """
1467
1468
1469cdef class Int16Array(IntegerArray):
1470    """
1471    Concrete class for Arrow arrays of int16 data type.
1472    """
1473
1474
1475cdef class UInt16Array(IntegerArray):
1476    """
1477    Concrete class for Arrow arrays of uint16 data type.
1478    """
1479
1480
1481cdef class Int32Array(IntegerArray):
1482    """
1483    Concrete class for Arrow arrays of int32 data type.
1484    """
1485
1486
1487cdef class UInt32Array(IntegerArray):
1488    """
1489    Concrete class for Arrow arrays of uint32 data type.
1490    """
1491
1492
1493cdef class Int64Array(IntegerArray):
1494    """
1495    Concrete class for Arrow arrays of int64 data type.
1496    """
1497
1498
1499cdef class UInt64Array(IntegerArray):
1500    """
1501    Concrete class for Arrow arrays of uint64 data type.
1502    """
1503
1504
1505cdef class Date32Array(NumericArray):
1506    """
1507    Concrete class for Arrow arrays of date32 data type.
1508    """
1509
1510
1511cdef class Date64Array(NumericArray):
1512    """
1513    Concrete class for Arrow arrays of date64 data type.
1514    """
1515
1516
1517cdef class TimestampArray(NumericArray):
1518    """
1519    Concrete class for Arrow arrays of timestamp data type.
1520    """
1521
1522
1523cdef class Time32Array(NumericArray):
1524    """
1525    Concrete class for Arrow arrays of time32 data type.
1526    """
1527
1528
1529cdef class Time64Array(NumericArray):
1530    """
1531    Concrete class for Arrow arrays of time64 data type.
1532    """
1533
1534
1535cdef class DurationArray(NumericArray):
1536    """
1537    Concrete class for Arrow arrays of duration data type.
1538    """
1539
1540
1541cdef class MonthDayNanoIntervalArray(Array):
1542    """
1543    Concrete class for Arrow arrays of interval[MonthDayNano] type.
1544    """
1545
1546    def to_pylist(self):
1547        """
1548        Convert to a list of native Python objects.
1549
1550        pyarrow.MonthDayNano is used as the native representation.
1551
1552        Returns
1553        -------
1554        lst : list
1555        """
1556        cdef:
1557            CResult[PyObject*] maybe_py_list
1558            PyObject* py_list
1559            CMonthDayNanoIntervalArray* array
1560        array = <CMonthDayNanoIntervalArray*>self.sp_array.get()
1561        maybe_py_list = MonthDayNanoIntervalArrayToPyList(deref(array))
1562        py_list = GetResultValue(maybe_py_list)
1563        return PyObject_to_object(py_list)
1564
1565
1566cdef class HalfFloatArray(FloatingPointArray):
1567    """
1568    Concrete class for Arrow arrays of float16 data type.
1569    """
1570
1571
1572cdef class FloatArray(FloatingPointArray):
1573    """
1574    Concrete class for Arrow arrays of float32 data type.
1575    """
1576
1577
1578cdef class DoubleArray(FloatingPointArray):
1579    """
1580    Concrete class for Arrow arrays of float64 data type.
1581    """
1582
1583
1584cdef class FixedSizeBinaryArray(Array):
1585    """
1586    Concrete class for Arrow arrays of a fixed-size binary data type.
1587    """
1588
1589
1590cdef class Decimal128Array(FixedSizeBinaryArray):
1591    """
1592    Concrete class for Arrow arrays of decimal128 data type.
1593    """
1594
1595
1596cdef class Decimal256Array(FixedSizeBinaryArray):
1597    """
1598    Concrete class for Arrow arrays of decimal256 data type.
1599    """
1600
1601cdef class BaseListArray(Array):
1602
1603    def flatten(self):
1604        """
1605        Unnest this ListArray/LargeListArray by one level.
1606
1607        The returned Array is logically a concatenation of all the sub-lists
1608        in this Array.
1609
1610        Note that this method is different from ``self.values()`` in that
1611        it takes care of the slicing offset as well as null elements backed
1612        by non-empty sub-lists.
1613
1614        Returns
1615        -------
1616        result : Array
1617        """
1618        return _pc().list_flatten(self)
1619
1620    def value_parent_indices(self):
1621        """
1622        Return array of same length as list child values array where each
1623        output value is the index of the parent list array slot containing each
1624        child value.
1625
1626        Examples
1627        --------
1628        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
1629        ...                type=pa.list_(pa.int32()))
1630        >>> arr.value_parent_indices()
1631        <pyarrow.lib.Int32Array object at 0x7efc5db958a0>
1632        [
1633          0,
1634          0,
1635          0,
1636          3
1637        ]
1638        """
1639        return _pc().list_parent_indices(self)
1640
1641    def value_lengths(self):
1642        """
1643        Return integers array with values equal to the respective length of
1644        each list element. Null list values are null in the output.
1645
1646        Examples
1647        --------
1648        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
1649        ...                type=pa.list_(pa.int32()))
1650        >>> arr.value_lengths()
1651        <pyarrow.lib.Int32Array object at 0x7efc5db95910>
1652        [
1653          3,
1654          0,
1655          null,
1656          1
1657        ]
1658        """
1659        return _pc().list_value_length(self)
1660
1661
1662cdef class ListArray(BaseListArray):
1663    """
1664    Concrete class for Arrow arrays of a list data type.
1665    """
1666
1667    @staticmethod
1668    def from_arrays(offsets, values, MemoryPool pool=None):
1669        """
1670        Construct ListArray from arrays of int32 offsets and values.
1671
1672        Parameters
1673        ----------
1674        offsets : Array (int32 type)
1675        values : Array (any type)
1676        pool : MemoryPool
1677
1678        Returns
1679        -------
1680        list_array : ListArray
1681
1682        Examples
1683        --------
1684        >>> values = pa.array([1, 2, 3, 4])
1685        >>> offsets = pa.array([0, 2, 4])
1686        >>> pa.ListArray.from_arrays(offsets, values)
1687        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
1688        [
1689          [
1690            0,
1691            1
1692          ],
1693          [
1694            2,
1695            3
1696          ]
1697        ]
1698        # nulls in the offsets array become null lists
1699        >>> offsets = pa.array([0, None, 2, 4])
1700        >>> pa.ListArray.from_arrays(offsets, values)
1701        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
1702        [
1703          [
1704            0,
1705            1
1706          ],
1707          null,
1708          [
1709            2,
1710            3
1711          ]
1712        ]
1713        """
1714        cdef:
1715            Array _offsets, _values
1716            shared_ptr[CArray] out
1717        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
1718
1719        _offsets = asarray(offsets, type='int32')
1720        _values = asarray(values)
1721
1722        with nogil:
1723            out = GetResultValue(
1724                CListArray.FromArrays(_offsets.ap[0], _values.ap[0], cpool))
1725        cdef Array result = pyarrow_wrap_array(out)
1726        result.validate()
1727        return result
1728
1729    @property
1730    def values(self):
1731        cdef CListArray* arr = <CListArray*> self.ap
1732        return pyarrow_wrap_array(arr.values())
1733
1734    @property
1735    def offsets(self):
1736        """
1737        Return the offsets as an int32 array.
1738        """
1739        return pyarrow_wrap_array((<CListArray*> self.ap).offsets())
1740
1741
1742cdef class LargeListArray(BaseListArray):
1743    """
1744    Concrete class for Arrow arrays of a large list data type.
1745
1746    Identical to ListArray, but 64-bit offsets.
1747    """
1748
1749    @staticmethod
1750    def from_arrays(offsets, values, MemoryPool pool=None):
1751        """
1752        Construct LargeListArray from arrays of int64 offsets and values.
1753
1754        Parameters
1755        ----------
1756        offsets : Array (int64 type)
1757        values : Array (any type)
1758        pool : MemoryPool
1759
1760        Returns
1761        -------
1762        list_array : LargeListArray
1763        """
1764        cdef:
1765            Array _offsets, _values
1766            shared_ptr[CArray] out
1767        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
1768
1769        _offsets = asarray(offsets, type='int64')
1770        _values = asarray(values)
1771
1772        with nogil:
1773            out = GetResultValue(
1774                CLargeListArray.FromArrays(_offsets.ap[0], _values.ap[0],
1775                                           cpool))
1776        cdef Array result = pyarrow_wrap_array(out)
1777        result.validate()
1778        return result
1779
1780    @property
1781    def values(self):
1782        cdef CLargeListArray* arr = <CLargeListArray*> self.ap
1783        return pyarrow_wrap_array(arr.values())
1784
1785    @property
1786    def offsets(self):
1787        """
1788        Return the offsets as an int64 array.
1789        """
1790        return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
1791
1792
1793cdef class MapArray(Array):
1794    """
1795    Concrete class for Arrow arrays of a map data type.
1796    """
1797
1798    @staticmethod
1799    def from_arrays(offsets, keys, items, MemoryPool pool=None):
1800        """
1801        Construct MapArray from arrays of int32 offsets and key, item arrays.
1802
1803        Parameters
1804        ----------
1805        offsets : array-like or sequence (int32 type)
1806        keys : array-like or sequence (any type)
1807        items : array-like or sequence (any type)
1808        pool : MemoryPool
1809
1810        Returns
1811        -------
1812        map_array : MapArray
1813        """
1814        cdef:
1815            Array _offsets, _keys, _items
1816            shared_ptr[CArray] out
1817        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
1818
1819        _offsets = asarray(offsets, type='int32')
1820        _keys = asarray(keys)
1821        _items = asarray(items)
1822
1823        with nogil:
1824            out = GetResultValue(
1825                CMapArray.FromArrays(_offsets.sp_array,
1826                                     _keys.sp_array,
1827                                     _items.sp_array, cpool))
1828        cdef Array result = pyarrow_wrap_array(out)
1829        result.validate()
1830        return result
1831
1832    @property
1833    def keys(self):
1834        return pyarrow_wrap_array((<CMapArray*> self.ap).keys())
1835
1836    @property
1837    def items(self):
1838        return pyarrow_wrap_array((<CMapArray*> self.ap).items())
1839
1840
1841cdef class FixedSizeListArray(Array):
1842    """
1843    Concrete class for Arrow arrays of a fixed size list data type.
1844    """
1845
1846    @staticmethod
1847    def from_arrays(values, int32_t list_size):
1848        """
1849        Construct FixedSizeListArray from array of values and a list length.
1850
1851        Parameters
1852        ----------
1853        values : Array (any type)
1854        list_size : int
1855            The fixed length of the lists.
1856
1857        Returns
1858        -------
1859        FixedSizeListArray
1860        """
1861        cdef:
1862            Array _values
1863            CResult[shared_ptr[CArray]] c_result
1864
1865        _values = asarray(values)
1866
1867        with nogil:
1868            c_result = CFixedSizeListArray.FromArrays(
1869                _values.sp_array, list_size)
1870        cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
1871        result.validate()
1872        return result
1873
1874    @property
1875    def values(self):
1876        return self.flatten()
1877
1878    def flatten(self):
1879        """
1880        Unnest this FixedSizeListArray by one level.
1881
1882        Returns
1883        -------
1884        result : Array
1885        """
1886        cdef CFixedSizeListArray* arr = <CFixedSizeListArray*> self.ap
1887        return pyarrow_wrap_array(arr.values())
1888
1889
1890cdef class UnionArray(Array):
1891    """
1892    Concrete class for Arrow arrays of a Union data type.
1893    """
1894
1895    def child(self, int pos):
1896        import warnings
1897        warnings.warn("child is deprecated, use field", FutureWarning)
1898        return self.field(pos)
1899
1900    def field(self, int pos):
1901        """
1902        Return the given child field as an individual array.
1903
1904        For sparse unions, the returned array has its offset, length,
1905        and null count adjusted.
1906
1907        For dense unions, the returned array is unchanged.
1908        """
1909        cdef shared_ptr[CArray] result
1910        result = (<CUnionArray*> self.ap).field(pos)
1911        if result != NULL:
1912            return pyarrow_wrap_array(result)
1913        raise KeyError("UnionArray does not have child {}".format(pos))
1914
1915    @property
1916    def type_codes(self):
1917        """Get the type codes array."""
1918        buf = pyarrow_wrap_buffer((<CUnionArray*> self.ap).type_codes())
1919        return Array.from_buffers(int8(), len(self), [None, buf])
1920
1921    @property
1922    def offsets(self):
1923        """
1924        Get the value offsets array (dense arrays only).
1925
1926        Does not account for any slice offset.
1927        """
1928        if self.type.mode != "dense":
1929            raise ArrowTypeError("Can only get value offsets for dense arrays")
1930        cdef CDenseUnionArray* dense = <CDenseUnionArray*> self.ap
1931        buf = pyarrow_wrap_buffer(dense.value_offsets())
1932        return Array.from_buffers(int32(), len(self), [None, buf])
1933
1934    @staticmethod
1935    def from_dense(Array types, Array value_offsets, list children,
1936                   list field_names=None, list type_codes=None):
1937        """
1938        Construct dense UnionArray from arrays of int8 types, int32 offsets and
1939        children arrays
1940
1941        Parameters
1942        ----------
1943        types : Array (int8 type)
1944        value_offsets : Array (int32 type)
1945        children : list
1946        field_names : list
1947        type_codes : list
1948
1949        Returns
1950        -------
1951        union_array : UnionArray
1952        """
1953        cdef:
1954            shared_ptr[CArray] out
1955            vector[shared_ptr[CArray]] c
1956            Array child
1957            vector[c_string] c_field_names
1958            vector[int8_t] c_type_codes
1959
1960        for child in children:
1961            c.push_back(child.sp_array)
1962        if field_names is not None:
1963            for x in field_names:
1964                c_field_names.push_back(tobytes(x))
1965        if type_codes is not None:
1966            for x in type_codes:
1967                c_type_codes.push_back(x)
1968
1969        with nogil:
1970            out = GetResultValue(CDenseUnionArray.Make(
1971                deref(types.ap), deref(value_offsets.ap), c, c_field_names,
1972                c_type_codes))
1973
1974        cdef Array result = pyarrow_wrap_array(out)
1975        result.validate()
1976        return result
1977
1978    @staticmethod
1979    def from_sparse(Array types, list children, list field_names=None,
1980                    list type_codes=None):
1981        """
1982        Construct sparse UnionArray from arrays of int8 types and children
1983        arrays
1984
1985        Parameters
1986        ----------
1987        types : Array (int8 type)
1988        children : list
1989        field_names : list
1990        type_codes : list
1991
1992        Returns
1993        -------
1994        union_array : UnionArray
1995        """
1996        cdef:
1997            shared_ptr[CArray] out
1998            vector[shared_ptr[CArray]] c
1999            Array child
2000            vector[c_string] c_field_names
2001            vector[int8_t] c_type_codes
2002
2003        for child in children:
2004            c.push_back(child.sp_array)
2005        if field_names is not None:
2006            for x in field_names:
2007                c_field_names.push_back(tobytes(x))
2008        if type_codes is not None:
2009            for x in type_codes:
2010                c_type_codes.push_back(x)
2011
2012        with nogil:
2013            out = GetResultValue(CSparseUnionArray.Make(
2014                deref(types.ap), c, c_field_names, c_type_codes))
2015
2016        cdef Array result = pyarrow_wrap_array(out)
2017        result.validate()
2018        return result
2019
2020
2021cdef class StringArray(Array):
2022    """
2023    Concrete class for Arrow arrays of string (or utf8) data type.
2024    """
2025
2026    @staticmethod
2027    def from_buffers(int length, Buffer value_offsets, Buffer data,
2028                     Buffer null_bitmap=None, int null_count=-1,
2029                     int offset=0):
2030        """
2031        Construct a StringArray from value_offsets and data buffers.
2032        If there are nulls in the data, also a null_bitmap and the matching
2033        null_count must be passed.
2034
2035        Parameters
2036        ----------
2037        length : int
2038        value_offsets : Buffer
2039        data : Buffer
2040        null_bitmap : Buffer, optional
2041        null_count : int, default 0
2042        offset : int, default 0
2043
2044        Returns
2045        -------
2046        string_array : StringArray
2047        """
2048        return Array.from_buffers(utf8(), length,
2049                                  [null_bitmap, value_offsets, data],
2050                                  null_count, offset)
2051
2052
2053cdef class LargeStringArray(Array):
2054    """
2055    Concrete class for Arrow arrays of large string (or utf8) data type.
2056    """
2057
2058    @staticmethod
2059    def from_buffers(int length, Buffer value_offsets, Buffer data,
2060                     Buffer null_bitmap=None, int null_count=-1,
2061                     int offset=0):
2062        """
2063        Construct a LargeStringArray from value_offsets and data buffers.
2064        If there are nulls in the data, also a null_bitmap and the matching
2065        null_count must be passed.
2066
2067        Parameters
2068        ----------
2069        length : int
2070        value_offsets : Buffer
2071        data : Buffer
2072        null_bitmap : Buffer, optional
2073        null_count : int, default 0
2074        offset : int, default 0
2075
2076        Returns
2077        -------
2078        string_array : StringArray
2079        """
2080        return Array.from_buffers(large_utf8(), length,
2081                                  [null_bitmap, value_offsets, data],
2082                                  null_count, offset)
2083
2084
2085cdef class BinaryArray(Array):
2086    """
2087    Concrete class for Arrow arrays of variable-sized binary data type.
2088    """
2089    @property
2090    def total_values_length(self):
2091        """
2092        The number of bytes from beginning to end of the data buffer addressed
2093        by the offsets of this BinaryArray.
2094        """
2095        return (<CBinaryArray*> self.ap).total_values_length()
2096
2097
2098cdef class LargeBinaryArray(Array):
2099    """
2100    Concrete class for Arrow arrays of large variable-sized binary data type.
2101    """
2102    @property
2103    def total_values_length(self):
2104        """
2105        The number of bytes from beginning to end of the data buffer addressed
2106        by the offsets of this LargeBinaryArray.
2107        """
2108        return (<CLargeBinaryArray*> self.ap).total_values_length()
2109
2110
2111cdef class DictionaryArray(Array):
2112    """
2113    Concrete class for dictionary-encoded Arrow arrays.
2114    """
2115
2116    def dictionary_encode(self):
2117        return self
2118
2119    def dictionary_decode(self):
2120        """
2121        Decodes the DictionaryArray to an Array.
2122        """
2123        return self.dictionary.take(self.indices)
2124
2125    @property
2126    def dictionary(self):
2127        cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
2128
2129        if self._dictionary is None:
2130            self._dictionary = pyarrow_wrap_array(darr.dictionary())
2131
2132        return self._dictionary
2133
2134    @property
2135    def indices(self):
2136        cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
2137
2138        if self._indices is None:
2139            self._indices = pyarrow_wrap_array(darr.indices())
2140
2141        return self._indices
2142
2143    @staticmethod
2144    def from_arrays(indices, dictionary, mask=None, bint ordered=False,
2145                    bint from_pandas=False, bint safe=True,
2146                    MemoryPool memory_pool=None):
2147        """
2148        Construct a DictionaryArray from indices and values.
2149
2150        Parameters
2151        ----------
2152        indices : pyarrow.Array, numpy.ndarray or pandas.Series, int type
2153            Non-negative integers referencing the dictionary values by zero
2154            based index.
2155        dictionary : pyarrow.Array, ndarray or pandas.Series
2156            The array of values referenced by the indices.
2157        mask : ndarray or pandas.Series, bool type
2158            True values indicate that indices are actually null.
2159        from_pandas : bool, default False
2160            If True, the indices should be treated as though they originated in
2161            a pandas.Categorical (null encoded as -1).
2162        ordered : bool, default False
2163            Set to True if the category values are ordered.
2164        safe : bool, default True
2165            If True, check that the dictionary indices are in range.
2166        memory_pool : MemoryPool, default None
2167            For memory allocations, if required, otherwise uses default pool.
2168
2169        Returns
2170        -------
2171        dict_array : DictionaryArray
2172        """
2173        cdef:
2174            Array _indices, _dictionary
2175            shared_ptr[CDataType] c_type
2176            shared_ptr[CArray] c_result
2177
2178        if isinstance(indices, Array):
2179            if mask is not None:
2180                raise NotImplementedError(
2181                    "mask not implemented with Arrow array inputs yet")
2182            _indices = indices
2183        else:
2184            if from_pandas:
2185                _indices = _codes_to_indices(indices, mask, None, memory_pool)
2186            else:
2187                _indices = array(indices, mask=mask, memory_pool=memory_pool)
2188
2189        if isinstance(dictionary, Array):
2190            _dictionary = dictionary
2191        else:
2192            _dictionary = array(dictionary, memory_pool=memory_pool)
2193
2194        if not isinstance(_indices, IntegerArray):
2195            raise ValueError('Indices must be integer type')
2196
2197        cdef c_bool c_ordered = ordered
2198
2199        c_type.reset(new CDictionaryType(_indices.type.sp_type,
2200                                         _dictionary.sp_array.get().type(),
2201                                         c_ordered))
2202
2203        if safe:
2204            with nogil:
2205                c_result = GetResultValue(
2206                    CDictionaryArray.FromArrays(c_type, _indices.sp_array,
2207                                                _dictionary.sp_array))
2208        else:
2209            c_result.reset(new CDictionaryArray(c_type, _indices.sp_array,
2210                                                _dictionary.sp_array))
2211
2212        cdef Array result = pyarrow_wrap_array(c_result)
2213        result.validate()
2214        return result
2215
2216
2217cdef class StructArray(Array):
2218    """
2219    Concrete class for Arrow arrays of a struct data type.
2220    """
2221
2222    def field(self, index):
2223        """
2224        Retrieves the child array belonging to field.
2225
2226        Parameters
2227        ----------
2228        index : Union[int, str]
2229            Index / position or name of the field.
2230
2231        Returns
2232        -------
2233        result : Array
2234        """
2235        cdef:
2236            CStructArray* arr = <CStructArray*> self.ap
2237            shared_ptr[CArray] child
2238
2239        if isinstance(index, (bytes, str)):
2240            child = arr.GetFieldByName(tobytes(index))
2241            if child == nullptr:
2242                raise KeyError(index)
2243        elif isinstance(index, int):
2244            child = arr.field(
2245                <int>_normalize_index(index, self.ap.num_fields()))
2246        else:
2247            raise TypeError('Expected integer or string index')
2248
2249        return pyarrow_wrap_array(child)
2250
2251    def flatten(self, MemoryPool memory_pool=None):
2252        """
2253        Return one individual array for each field in the struct.
2254
2255        Parameters
2256        ----------
2257        memory_pool : MemoryPool, default None
2258            For memory allocations, if required, otherwise use default pool.
2259
2260        Returns
2261        -------
2262        result : List[Array]
2263        """
2264        cdef:
2265            vector[shared_ptr[CArray]] arrays
2266            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
2267            CStructArray* sarr = <CStructArray*> self.ap
2268
2269        with nogil:
2270            arrays = GetResultValue(sarr.Flatten(pool))
2271
2272        return [pyarrow_wrap_array(arr) for arr in arrays]
2273
2274    @staticmethod
2275    def from_arrays(arrays, names=None, fields=None, mask=None,
2276                    memory_pool=None):
2277        """
2278        Construct StructArray from collection of arrays representing
2279        each field in the struct.
2280
2281        Either field names or field instances must be passed.
2282
2283        Parameters
2284        ----------
2285        arrays : sequence of Array
2286        names : List[str] (optional)
2287            Field names for each struct child.
2288        fields : List[Field] (optional)
2289            Field instances for each struct child.
2290        mask : pyarrow.Array[bool] (optional)
2291            Indicate which values are null (True) or not null (False).
2292        memory_pool : MemoryPool (optional)
2293            For memory allocations, if required, otherwise uses default pool.
2294
2295        Returns
2296        -------
2297        result : StructArray
2298        """
2299        cdef:
2300            shared_ptr[CArray] c_array
2301            shared_ptr[CBuffer] c_mask
2302            vector[shared_ptr[CArray]] c_arrays
2303            vector[c_string] c_names
2304            vector[shared_ptr[CField]] c_fields
2305            CResult[shared_ptr[CArray]] c_result
2306            ssize_t num_arrays
2307            ssize_t length
2308            ssize_t i
2309            Field py_field
2310            DataType struct_type
2311
2312        if names is None and fields is None:
2313            raise ValueError('Must pass either names or fields')
2314        if names is not None and fields is not None:
2315            raise ValueError('Must pass either names or fields, not both')
2316
2317        if mask is None:
2318            c_mask = shared_ptr[CBuffer]()
2319        elif isinstance(mask, Array):
2320            if mask.type.id != Type_BOOL:
2321                raise ValueError('Mask must be a pyarrow.Array of type bool')
2322            if mask.null_count != 0:
2323                raise ValueError('Mask must not contain nulls')
2324            inverted_mask = _pc().invert(mask, memory_pool=memory_pool)
2325            c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
2326        else:
2327            raise ValueError('Mask must be a pyarrow.Array of type bool')
2328
2329        arrays = [asarray(x) for x in arrays]
2330        for arr in arrays:
2331            c_array = pyarrow_unwrap_array(arr)
2332            if c_array == nullptr:
2333                raise TypeError(f"Expected Array, got {arr.__class__}")
2334            c_arrays.push_back(c_array)
2335        if names is not None:
2336            for name in names:
2337                c_names.push_back(tobytes(name))
2338        else:
2339            for item in fields:
2340                if isinstance(item, tuple):
2341                    py_field = field(*item)
2342                else:
2343                    py_field = item
2344                c_fields.push_back(py_field.sp_field)
2345
2346        if (c_arrays.size() == 0 and c_names.size() == 0 and
2347                c_fields.size() == 0):
2348            # The C++ side doesn't allow this
2349            return array([], struct([]))
2350
2351        if names is not None:
2352            # XXX Cannot pass "nullptr" for a shared_ptr<T> argument:
2353            # https://github.com/cython/cython/issues/3020
2354            c_result = CStructArray.MakeFromFieldNames(
2355                c_arrays, c_names, c_mask, -1, 0)
2356        else:
2357            c_result = CStructArray.MakeFromFields(
2358                c_arrays, c_fields, c_mask, -1, 0)
2359        cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
2360        result.validate()
2361        return result
2362
2363
2364cdef class ExtensionArray(Array):
2365    """
2366    Concrete class for Arrow extension arrays.
2367    """
2368
2369    @property
2370    def storage(self):
2371        cdef:
2372            CExtensionArray* ext_array = <CExtensionArray*>(self.ap)
2373
2374        return pyarrow_wrap_array(ext_array.storage())
2375
2376    @staticmethod
2377    def from_storage(BaseExtensionType typ, Array storage):
2378        """
2379        Construct ExtensionArray from type and storage array.
2380
2381        Parameters
2382        ----------
2383        typ : DataType
2384            The extension type for the result array.
2385        storage : Array
2386            The underlying storage for the result array.
2387
2388        Returns
2389        -------
2390        ext_array : ExtensionArray
2391        """
2392        cdef:
2393            shared_ptr[CExtensionArray] ext_array
2394
2395        if storage.type != typ.storage_type:
2396            raise TypeError("Incompatible storage type {0} "
2397                            "for extension type {1}".format(storage.type, typ))
2398
2399        ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array)
2400        cdef Array result = pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
2401        result.validate()
2402        return result
2403
2404    def _to_pandas(self, options, **kwargs):
2405        pandas_dtype = None
2406        try:
2407            pandas_dtype = self.type.to_pandas_dtype()
2408        except NotImplementedError:
2409            pass
2410
2411        # pandas ExtensionDtype that implements conversion from pyarrow
2412        if hasattr(pandas_dtype, '__from_arrow__'):
2413            arr = pandas_dtype.__from_arrow__(self)
2414            return pandas_api.series(arr)
2415
2416        # otherwise convert the storage array with the base implementation
2417        return Array._to_pandas(self.storage, options, **kwargs)
2418
2419    def to_numpy(self, **kwargs):
2420        """
2421        Convert extension array to a numpy ndarray.
2422
2423        See Also
2424        --------
2425        Array.to_numpy
2426        """
2427        return self.storage.to_numpy(**kwargs)
2428
2429
2430cdef dict _array_classes = {
2431    _Type_NA: NullArray,
2432    _Type_BOOL: BooleanArray,
2433    _Type_UINT8: UInt8Array,
2434    _Type_UINT16: UInt16Array,
2435    _Type_UINT32: UInt32Array,
2436    _Type_UINT64: UInt64Array,
2437    _Type_INT8: Int8Array,
2438    _Type_INT16: Int16Array,
2439    _Type_INT32: Int32Array,
2440    _Type_INT64: Int64Array,
2441    _Type_DATE32: Date32Array,
2442    _Type_DATE64: Date64Array,
2443    _Type_TIMESTAMP: TimestampArray,
2444    _Type_TIME32: Time32Array,
2445    _Type_TIME64: Time64Array,
2446    _Type_DURATION: DurationArray,
2447    _Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalArray,
2448    _Type_HALF_FLOAT: HalfFloatArray,
2449    _Type_FLOAT: FloatArray,
2450    _Type_DOUBLE: DoubleArray,
2451    _Type_LIST: ListArray,
2452    _Type_LARGE_LIST: LargeListArray,
2453    _Type_MAP: MapArray,
2454    _Type_FIXED_SIZE_LIST: FixedSizeListArray,
2455    _Type_SPARSE_UNION: UnionArray,
2456    _Type_DENSE_UNION: UnionArray,
2457    _Type_BINARY: BinaryArray,
2458    _Type_STRING: StringArray,
2459    _Type_LARGE_BINARY: LargeBinaryArray,
2460    _Type_LARGE_STRING: LargeStringArray,
2461    _Type_DICTIONARY: DictionaryArray,
2462    _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
2463    _Type_DECIMAL128: Decimal128Array,
2464    _Type_DECIMAL256: Decimal256Array,
2465    _Type_STRUCT: StructArray,
2466    _Type_EXTENSION: ExtensionArray,
2467}
2468
2469
2470cdef object get_array_class_from_type(
2471        const shared_ptr[CDataType]& sp_data_type):
2472    cdef CDataType* data_type = sp_data_type.get()
2473    if data_type == NULL:
2474        raise ValueError('Array data type was NULL')
2475
2476    if data_type.id() == _Type_EXTENSION:
2477        py_ext_data_type = pyarrow_wrap_data_type(sp_data_type)
2478        return py_ext_data_type.__arrow_ext_class__()
2479    else:
2480        return _array_classes[data_type.id()]
2481
2482
2483cdef object get_values(object obj, bint* is_series):
2484    if pandas_api.is_series(obj) or pandas_api.is_index(obj):
2485        result = pandas_api.get_values(obj)
2486        is_series[0] = True
2487    elif isinstance(obj, np.ndarray):
2488        result = obj
2489        is_series[0] = False
2490    else:
2491        result = pandas_api.series(obj).values
2492        is_series[0] = False
2493
2494    return result
2495
2496
2497def concat_arrays(arrays, MemoryPool memory_pool=None):
2498    """
2499    Concatenate the given arrays.
2500
2501    The contents of the input arrays are copied into the returned array.
2502
2503    Raises
2504    ------
2505    ArrowInvalid : if not all of the arrays have the same type.
2506
2507    Parameters
2508    ----------
2509    arrays : iterable of pyarrow.Array
2510        Arrays to concatenate, must be identically typed.
2511    memory_pool : MemoryPool, default None
2512        For memory allocations. If None, the default pool is used.
2513    """
2514    cdef:
2515        vector[shared_ptr[CArray]] c_arrays
2516        shared_ptr[CArray] c_concatenated
2517        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
2518
2519    for array in arrays:
2520        if not isinstance(array, Array):
2521            raise TypeError("Iterable should contain Array objects, "
2522                            "got {0} instead".format(type(array)))
2523        c_arrays.push_back(pyarrow_unwrap_array(array))
2524
2525    with nogil:
2526        c_concatenated = GetResultValue(Concatenate(c_arrays, pool))
2527
2528    return pyarrow_wrap_array(c_concatenated)
2529
2530
2531def _empty_array(DataType type):
2532    """
2533    Create empty array of the given type.
2534    """
2535    if type.id == Type_DICTIONARY:
2536        arr = DictionaryArray.from_arrays(
2537            _empty_array(type.index_type), _empty_array(type.value_type),
2538            ordered=type.ordered)
2539    else:
2540        arr = array([], type=type)
2541    return arr
2542