1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18# cython: language_level = 3
19
20from cpython cimport PyObject
21from libcpp cimport nullptr
22from libcpp.cast cimport dynamic_cast
23from pyarrow.includes.common cimport *
24from pyarrow.includes.libarrow cimport *
25
26
27cdef extern from "Python.h":
28    int PySlice_Check(object)
29
30
31cdef int check_status(const CStatus& status) nogil except -1
32
33
34cdef class _Weakrefable:
35    cdef object __weakref__
36
37
38cdef class IpcWriteOptions(_Weakrefable):
39    cdef:
40        CIpcWriteOptions c_options
41
42
43cdef class Message(_Weakrefable):
44    cdef:
45        unique_ptr[CMessage] message
46
47
48cdef class MemoryPool(_Weakrefable):
49    cdef:
50        CMemoryPool* pool
51
52    cdef void init(self, CMemoryPool* pool)
53
54
55cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
56
57
58cdef class DataType(_Weakrefable):
59    cdef:
60        shared_ptr[CDataType] sp_type
61        CDataType* type
62        bytes pep3118_format
63
64    cdef void init(self, const shared_ptr[CDataType]& type) except *
65    cdef Field field(self, int i)
66
67
68cdef class ListType(DataType):
69    cdef:
70        const CListType* list_type
71
72
73cdef class LargeListType(DataType):
74    cdef:
75        const CLargeListType* list_type
76
77
78cdef class MapType(DataType):
79    cdef:
80        const CMapType* map_type
81
82
83cdef class FixedSizeListType(DataType):
84    cdef:
85        const CFixedSizeListType* list_type
86
87
88cdef class StructType(DataType):
89    cdef:
90        const CStructType* struct_type
91
92    cdef Field field_by_name(self, name)
93
94
95cdef class DictionaryMemo(_Weakrefable):
96    cdef:
97        # Even though the CDictionaryMemo instance is private, we allocate
98        # it on the heap so as to avoid C++ ABI issues with Python wheels.
99        shared_ptr[CDictionaryMemo] sp_memo
100        CDictionaryMemo* memo
101
102
103cdef class DictionaryType(DataType):
104    cdef:
105        const CDictionaryType* dict_type
106
107
108cdef class TimestampType(DataType):
109    cdef:
110        const CTimestampType* ts_type
111
112
113cdef class Time32Type(DataType):
114    cdef:
115        const CTime32Type* time_type
116
117
118cdef class Time64Type(DataType):
119    cdef:
120        const CTime64Type* time_type
121
122
123cdef class DurationType(DataType):
124    cdef:
125        const CDurationType* duration_type
126
127
128cdef class FixedSizeBinaryType(DataType):
129    cdef:
130        const CFixedSizeBinaryType* fixed_size_binary_type
131
132
133cdef class Decimal128Type(FixedSizeBinaryType):
134    cdef:
135        const CDecimal128Type* decimal128_type
136
137
138cdef class Decimal256Type(FixedSizeBinaryType):
139    cdef:
140        const CDecimal256Type* decimal256_type
141
142
143cdef class BaseExtensionType(DataType):
144    cdef:
145        const CExtensionType* ext_type
146
147
148cdef class ExtensionType(BaseExtensionType):
149    cdef:
150        const CPyExtensionType* cpy_ext_type
151
152
153cdef class PyExtensionType(ExtensionType):
154    pass
155
156
157cdef class _Metadata(_Weakrefable):
158    # required because KeyValueMetadata also extends collections.abc.Mapping
159    # and the first parent class must be an extension type
160    pass
161
162
163cdef class KeyValueMetadata(_Metadata):
164    cdef:
165        shared_ptr[const CKeyValueMetadata] wrapped
166        const CKeyValueMetadata* metadata
167
168    cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped)
169
170    @staticmethod
171    cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp)
172    cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil
173
174
175cdef class Field(_Weakrefable):
176    cdef:
177        shared_ptr[CField] sp_field
178        CField* field
179
180    cdef readonly:
181        DataType type
182
183    cdef void init(self, const shared_ptr[CField]& field)
184
185
186cdef class Schema(_Weakrefable):
187    cdef:
188        shared_ptr[CSchema] sp_schema
189        CSchema* schema
190
191    cdef void init(self, const vector[shared_ptr[CField]]& fields)
192    cdef void init_schema(self, const shared_ptr[CSchema]& schema)
193
194
195cdef class Scalar(_Weakrefable):
196    cdef:
197        shared_ptr[CScalar] wrapped
198
199    cdef void init(self, const shared_ptr[CScalar]& wrapped)
200
201    @staticmethod
202    cdef wrap(const shared_ptr[CScalar]& wrapped)
203
204    cdef inline shared_ptr[CScalar] unwrap(self) nogil
205
206
207cdef class _PandasConvertible(_Weakrefable):
208    pass
209
210
211cdef class Array(_PandasConvertible):
212    cdef:
213        shared_ptr[CArray] sp_array
214        CArray* ap
215
216    cdef readonly:
217        DataType type
218        # To allow Table to propagate metadata to pandas.Series
219        object _name
220
221    cdef void init(self, const shared_ptr[CArray]& sp_array) except *
222    cdef getitem(self, int64_t i)
223    cdef int64_t length(self)
224
225
226cdef class Tensor(_Weakrefable):
227    cdef:
228        shared_ptr[CTensor] sp_tensor
229        CTensor* tp
230
231    cdef readonly:
232        DataType type
233
234    cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
235
236
237cdef class SparseCSRMatrix(_Weakrefable):
238    cdef:
239        shared_ptr[CSparseCSRMatrix] sp_sparse_tensor
240        CSparseCSRMatrix* stp
241
242    cdef readonly:
243        DataType type
244
245    cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
246
247
248cdef class SparseCSCMatrix(_Weakrefable):
249    cdef:
250        shared_ptr[CSparseCSCMatrix] sp_sparse_tensor
251        CSparseCSCMatrix* stp
252
253    cdef readonly:
254        DataType type
255
256    cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
257
258
259cdef class SparseCOOTensor(_Weakrefable):
260    cdef:
261        shared_ptr[CSparseCOOTensor] sp_sparse_tensor
262        CSparseCOOTensor* stp
263
264    cdef readonly:
265        DataType type
266
267    cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
268
269
270cdef class SparseCSFTensor(_Weakrefable):
271    cdef:
272        shared_ptr[CSparseCSFTensor] sp_sparse_tensor
273        CSparseCSFTensor* stp
274
275    cdef readonly:
276        DataType type
277
278    cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
279
280
281cdef class NullArray(Array):
282    pass
283
284
285cdef class BooleanArray(Array):
286    pass
287
288
289cdef class NumericArray(Array):
290    pass
291
292
293cdef class IntegerArray(NumericArray):
294    pass
295
296
297cdef class FloatingPointArray(NumericArray):
298    pass
299
300
301cdef class Int8Array(IntegerArray):
302    pass
303
304
305cdef class UInt8Array(IntegerArray):
306    pass
307
308
309cdef class Int16Array(IntegerArray):
310    pass
311
312
313cdef class UInt16Array(IntegerArray):
314    pass
315
316
317cdef class Int32Array(IntegerArray):
318    pass
319
320
321cdef class UInt32Array(IntegerArray):
322    pass
323
324
325cdef class Int64Array(IntegerArray):
326    pass
327
328
329cdef class UInt64Array(IntegerArray):
330    pass
331
332
333cdef class HalfFloatArray(FloatingPointArray):
334    pass
335
336
337cdef class FloatArray(FloatingPointArray):
338    pass
339
340
341cdef class DoubleArray(FloatingPointArray):
342    pass
343
344
345cdef class FixedSizeBinaryArray(Array):
346    pass
347
348
349cdef class Decimal128Array(FixedSizeBinaryArray):
350    pass
351
352
353cdef class Decimal256Array(FixedSizeBinaryArray):
354    pass
355
356
357cdef class StructArray(Array):
358    pass
359
360
361cdef class BaseListArray(Array):
362    pass
363
364
365cdef class ListArray(BaseListArray):
366    pass
367
368
369cdef class LargeListArray(BaseListArray):
370    pass
371
372
373cdef class MapArray(Array):
374    pass
375
376
377cdef class FixedSizeListArray(Array):
378    pass
379
380
381cdef class UnionArray(Array):
382    pass
383
384
385cdef class StringArray(Array):
386    pass
387
388
389cdef class BinaryArray(Array):
390    pass
391
392
393cdef class DictionaryArray(Array):
394    cdef:
395        object _indices, _dictionary
396
397
398cdef class ExtensionArray(Array):
399    pass
400
401
402cdef wrap_array_output(PyObject* output)
403cdef wrap_datum(const CDatum& datum)
404
405
406cdef class ChunkedArray(_PandasConvertible):
407    cdef:
408        shared_ptr[CChunkedArray] sp_chunked_array
409        CChunkedArray* chunked_array
410
411    cdef readonly:
412        # To allow Table to propagate metadata to pandas.Series
413        object _name
414
415    cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
416    cdef getitem(self, int64_t i)
417
418
419cdef class Table(_PandasConvertible):
420    cdef:
421        shared_ptr[CTable] sp_table
422        CTable* table
423
424    cdef void init(self, const shared_ptr[CTable]& table)
425
426
427cdef class RecordBatch(_PandasConvertible):
428    cdef:
429        shared_ptr[CRecordBatch] sp_batch
430        CRecordBatch* batch
431        Schema _schema
432
433    cdef void init(self, const shared_ptr[CRecordBatch]& table)
434
435
436cdef class Buffer(_Weakrefable):
437    cdef:
438        shared_ptr[CBuffer] buffer
439        Py_ssize_t shape[1]
440        Py_ssize_t strides[1]
441
442    cdef void init(self, const shared_ptr[CBuffer]& buffer)
443    cdef getitem(self, int64_t i)
444
445
446cdef class ResizableBuffer(Buffer):
447
448    cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer)
449
450
451cdef class NativeFile(_Weakrefable):
452    cdef:
453        shared_ptr[CInputStream] input_stream
454        shared_ptr[CRandomAccessFile] random_access
455        shared_ptr[COutputStream] output_stream
456        bint is_readable
457        bint is_writable
458        bint is_seekable
459        bint own_file
460
461    # By implementing these "virtual" functions (all functions in Cython
462    # extension classes are technically virtual in the C++ sense) we can expose
463    # the arrow::io abstract file interfaces to other components throughout the
464    # suite of Arrow C++ libraries
465    cdef set_random_access_file(self, shared_ptr[CRandomAccessFile] handle)
466    cdef set_input_stream(self, shared_ptr[CInputStream] handle)
467    cdef set_output_stream(self, shared_ptr[COutputStream] handle)
468
469    cdef shared_ptr[CRandomAccessFile] get_random_access_file(self) except *
470    cdef shared_ptr[CInputStream] get_input_stream(self) except *
471    cdef shared_ptr[COutputStream] get_output_stream(self) except *
472
473
474cdef class BufferedInputStream(NativeFile):
475    pass
476
477
478cdef class BufferedOutputStream(NativeFile):
479    pass
480
481
482cdef class CompressedInputStream(NativeFile):
483    pass
484
485
486cdef class CompressedOutputStream(NativeFile):
487    pass
488
489
490cdef class _CRecordBatchWriter(_Weakrefable):
491    cdef:
492        shared_ptr[CRecordBatchWriter] writer
493
494
495cdef class RecordBatchReader(_Weakrefable):
496    cdef:
497        shared_ptr[CRecordBatchReader] reader
498
499
500cdef class Codec(_Weakrefable):
501    cdef:
502        unique_ptr[CCodec] wrapped
503
504    cdef inline CCodec* unwrap(self) nogil
505
506
507cdef get_input_stream(object source, c_bool use_memory_map,
508                      shared_ptr[CInputStream]* reader)
509cdef get_reader(object source, c_bool use_memory_map,
510                shared_ptr[CRandomAccessFile]* reader)
511cdef get_writer(object source, shared_ptr[COutputStream]* writer)
512cdef NativeFile get_native_file(object source, c_bool use_memory_map)
513
514cdef shared_ptr[CInputStream] native_transcoding_input_stream(
515    shared_ptr[CInputStream] stream, src_encoding,
516    dest_encoding) except *
517
518# Default is allow_none=False
519cpdef DataType ensure_type(object type, bint allow_none=*)
520
521# Exceptions may be raised when converting dict values, so need to
522# check exception state on return
523cdef shared_ptr[CKeyValueMetadata] pyarrow_unwrap_metadata(object meta) \
524    except *
525cdef object pyarrow_wrap_metadata(
526    const shared_ptr[const CKeyValueMetadata]& meta)
527
528#
529# Public Cython API for 3rd party code
530#
531
532cdef public object pyarrow_wrap_scalar(const shared_ptr[CScalar]& sp_scalar)
533cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
534cdef public object pyarrow_wrap_chunked_array(
535    const shared_ptr[CChunkedArray]& sp_array)
536cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
537cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
538cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
539cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
540cdef public object pyarrow_wrap_resizable_buffer(
541    const shared_ptr[CResizableBuffer]& buf)
542cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
543cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
544cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
545cdef public object pyarrow_wrap_sparse_coo_tensor(
546    const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
547cdef public object pyarrow_wrap_sparse_csr_matrix(
548    const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
549cdef public object pyarrow_wrap_sparse_csc_matrix(
550    const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
551cdef public object pyarrow_wrap_sparse_csf_tensor(
552    const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
553
554cdef public shared_ptr[CScalar] pyarrow_unwrap_scalar(object scalar)
555cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
556cdef public shared_ptr[CChunkedArray] pyarrow_unwrap_chunked_array(
557    object array)
558cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
559cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
560cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
561cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
562cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
563cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)
564cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
565cdef public shared_ptr[CSparseCOOTensor] pyarrow_unwrap_sparse_coo_tensor(
566    object sparse_tensor)
567cdef public shared_ptr[CSparseCSRMatrix] pyarrow_unwrap_sparse_csr_matrix(
568    object sparse_tensor)
569cdef public shared_ptr[CSparseCSCMatrix] pyarrow_unwrap_sparse_csc_matrix(
570    object sparse_tensor)
571cdef public shared_ptr[CSparseCSFTensor] pyarrow_unwrap_sparse_csf_tensor(
572    object sparse_tensor)
573