1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18# distutils: language = c++
19
20from pyarrow.includes.common cimport *
21
22cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil:
23    cdef cppclass CKeyValueMetadata" arrow::KeyValueMetadata":
24        CKeyValueMetadata()
25        CKeyValueMetadata(const unordered_map[c_string, c_string]&)
26        CKeyValueMetadata(const vector[c_string]& keys,
27                          const vector[c_string]& values)
28
29        void reserve(int64_t n)
30        int64_t size() const
31        c_string key(int64_t i) const
32        c_string value(int64_t i) const
33        int FindKey(const c_string& key) const
34
35        shared_ptr[CKeyValueMetadata] Copy() const
36        c_bool Equals(const CKeyValueMetadata& other)
37        void Append(const c_string& key, const c_string& value)
38        void ToUnorderedMap(unordered_map[c_string, c_string]*) const
39        c_string ToString() const
40
41        CResult[c_string] Get(const c_string& key) const
42        CStatus Delete(const c_string& key)
43        CStatus Set(const c_string& key, const c_string& value)
44        c_bool Contains(const c_string& key) const
45
46
47cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
48    cdef cppclass CDecimal128" arrow::Decimal128":
49        c_string ToString(int32_t scale) const
50
51
52cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
53    cdef cppclass CDecimal256" arrow::Decimal256":
54        c_string ToString(int32_t scale) const
55
56
57cdef extern from "arrow/config.h" namespace "arrow" nogil:
58    cdef cppclass CBuildInfo "arrow::BuildInfo":
59        int version
60        int version_major
61        int version_minor
62        int version_patch
63        c_string version_string
64        c_string so_version
65        c_string full_so_version
66        c_string compiler_id
67        c_string compiler_version
68        c_string compiler_flags
69        c_string git_id
70        c_string git_description
71        c_string package_kind
72
73    const CBuildInfo& GetBuildInfo()
74
75    cdef cppclass CRuntimeInfo "arrow::RuntimeInfo":
76        c_string simd_level
77        c_string detected_simd_level
78
79    CRuntimeInfo GetRuntimeInfo()
80
81
82cdef extern from "arrow/api.h" namespace "arrow" nogil:
83    enum Type" arrow::Type::type":
84        _Type_NA" arrow::Type::NA"
85
86        _Type_BOOL" arrow::Type::BOOL"
87
88        _Type_UINT8" arrow::Type::UINT8"
89        _Type_INT8" arrow::Type::INT8"
90        _Type_UINT16" arrow::Type::UINT16"
91        _Type_INT16" arrow::Type::INT16"
92        _Type_UINT32" arrow::Type::UINT32"
93        _Type_INT32" arrow::Type::INT32"
94        _Type_UINT64" arrow::Type::UINT64"
95        _Type_INT64" arrow::Type::INT64"
96
97        _Type_HALF_FLOAT" arrow::Type::HALF_FLOAT"
98        _Type_FLOAT" arrow::Type::FLOAT"
99        _Type_DOUBLE" arrow::Type::DOUBLE"
100
101        _Type_DECIMAL128" arrow::Type::DECIMAL128"
102        _Type_DECIMAL256" arrow::Type::DECIMAL256"
103
104        _Type_DATE32" arrow::Type::DATE32"
105        _Type_DATE64" arrow::Type::DATE64"
106        _Type_TIMESTAMP" arrow::Type::TIMESTAMP"
107        _Type_TIME32" arrow::Type::TIME32"
108        _Type_TIME64" arrow::Type::TIME64"
109        _Type_DURATION" arrow::Type::DURATION"
110
111        _Type_BINARY" arrow::Type::BINARY"
112        _Type_STRING" arrow::Type::STRING"
113        _Type_LARGE_BINARY" arrow::Type::LARGE_BINARY"
114        _Type_LARGE_STRING" arrow::Type::LARGE_STRING"
115        _Type_FIXED_SIZE_BINARY" arrow::Type::FIXED_SIZE_BINARY"
116
117        _Type_LIST" arrow::Type::LIST"
118        _Type_LARGE_LIST" arrow::Type::LARGE_LIST"
119        _Type_FIXED_SIZE_LIST" arrow::Type::FIXED_SIZE_LIST"
120        _Type_STRUCT" arrow::Type::STRUCT"
121        _Type_SPARSE_UNION" arrow::Type::SPARSE_UNION"
122        _Type_DENSE_UNION" arrow::Type::DENSE_UNION"
123        _Type_DICTIONARY" arrow::Type::DICTIONARY"
124        _Type_MAP" arrow::Type::MAP"
125
126        _Type_EXTENSION" arrow::Type::EXTENSION"
127
128    enum UnionMode" arrow::UnionMode::type":
129        _UnionMode_SPARSE" arrow::UnionMode::SPARSE"
130        _UnionMode_DENSE" arrow::UnionMode::DENSE"
131
132    enum TimeUnit" arrow::TimeUnit::type":
133        TimeUnit_SECOND" arrow::TimeUnit::SECOND"
134        TimeUnit_MILLI" arrow::TimeUnit::MILLI"
135        TimeUnit_MICRO" arrow::TimeUnit::MICRO"
136        TimeUnit_NANO" arrow::TimeUnit::NANO"
137
138    cdef cppclass CBufferSpec" arrow::DataTypeLayout::BufferSpec":
139        pass
140
141    cdef cppclass CDataTypeLayout" arrow::DataTypeLayout":
142        vector[CBufferSpec] buffers
143        c_bool has_dictionary
144
145    cdef cppclass CDataType" arrow::DataType":
146        Type id()
147
148        c_bool Equals(const CDataType& other)
149
150        shared_ptr[CField] field(int i)
151        const vector[shared_ptr[CField]] fields()
152        int num_fields()
153        CDataTypeLayout layout()
154        c_string ToString()
155
156    c_bool is_primitive(Type type)
157
158    cdef cppclass CArrayData" arrow::ArrayData":
159        shared_ptr[CDataType] type
160        int64_t length
161        int64_t null_count
162        int64_t offset
163        vector[shared_ptr[CBuffer]] buffers
164        vector[shared_ptr[CArrayData]] child_data
165        shared_ptr[CArrayData] dictionary
166
167        @staticmethod
168        shared_ptr[CArrayData] Make(const shared_ptr[CDataType]& type,
169                                    int64_t length,
170                                    vector[shared_ptr[CBuffer]]& buffers,
171                                    int64_t null_count,
172                                    int64_t offset)
173
174        @staticmethod
175        shared_ptr[CArrayData] MakeWithChildren" Make"(
176            const shared_ptr[CDataType]& type,
177            int64_t length,
178            vector[shared_ptr[CBuffer]]& buffers,
179            vector[shared_ptr[CArrayData]]& child_data,
180            int64_t null_count,
181            int64_t offset)
182
183        @staticmethod
184        shared_ptr[CArrayData] MakeWithChildrenAndDictionary" Make"(
185            const shared_ptr[CDataType]& type,
186            int64_t length,
187            vector[shared_ptr[CBuffer]]& buffers,
188            vector[shared_ptr[CArrayData]]& child_data,
189            shared_ptr[CArrayData]& dictionary,
190            int64_t null_count,
191            int64_t offset)
192
193    cdef cppclass CArray" arrow::Array":
194        shared_ptr[CDataType] type()
195
196        int64_t length()
197        int64_t null_count()
198        int64_t offset()
199        Type type_id()
200
201        int num_fields()
202
203        CResult[shared_ptr[CScalar]] GetScalar(int64_t i) const
204
205        c_string Diff(const CArray& other)
206        c_bool Equals(const CArray& arr)
207        c_bool IsNull(int i)
208
209        shared_ptr[CArrayData] data()
210
211        shared_ptr[CArray] Slice(int64_t offset)
212        shared_ptr[CArray] Slice(int64_t offset, int64_t length)
213
214        CStatus Validate() const
215        CStatus ValidateFull() const
216        CResult[shared_ptr[CArray]] View(const shared_ptr[CDataType]& type)
217
218    shared_ptr[CArray] MakeArray(const shared_ptr[CArrayData]& data)
219    CResult[shared_ptr[CArray]] MakeArrayOfNull(
220        const shared_ptr[CDataType]& type, int64_t length, CMemoryPool* pool)
221
222    CResult[shared_ptr[CArray]] MakeArrayFromScalar(
223        const CScalar& scalar, int64_t length, CMemoryPool* pool)
224
225    CStatus DebugPrint(const CArray& arr, int indent)
226
227    cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType):
228        int bit_width()
229
230    cdef cppclass CNullArray" arrow::NullArray"(CArray):
231        CNullArray(int64_t length)
232
233    cdef cppclass CDictionaryArray" arrow::DictionaryArray"(CArray):
234        CDictionaryArray(const shared_ptr[CDataType]& type,
235                         const shared_ptr[CArray]& indices,
236                         const shared_ptr[CArray]& dictionary)
237
238        @staticmethod
239        CResult[shared_ptr[CArray]] FromArrays(
240            const shared_ptr[CDataType]& type,
241            const shared_ptr[CArray]& indices,
242            const shared_ptr[CArray]& dictionary)
243
244        shared_ptr[CArray] indices()
245        shared_ptr[CArray] dictionary()
246
247    cdef cppclass CDate32Type" arrow::Date32Type"(CFixedWidthType):
248        pass
249
250    cdef cppclass CDate64Type" arrow::Date64Type"(CFixedWidthType):
251        pass
252
253    cdef cppclass CTimestampType" arrow::TimestampType"(CFixedWidthType):
254        CTimestampType(TimeUnit unit)
255        TimeUnit unit()
256        const c_string& timezone()
257
258    cdef cppclass CTime32Type" arrow::Time32Type"(CFixedWidthType):
259        TimeUnit unit()
260
261    cdef cppclass CTime64Type" arrow::Time64Type"(CFixedWidthType):
262        TimeUnit unit()
263
264    shared_ptr[CDataType] ctime32" arrow::time32"(TimeUnit unit)
265    shared_ptr[CDataType] ctime64" arrow::time64"(TimeUnit unit)
266
267    cdef cppclass CDurationType" arrow::DurationType"(CFixedWidthType):
268        TimeUnit unit()
269
270    shared_ptr[CDataType] cduration" arrow::duration"(TimeUnit unit)
271
272    cdef cppclass CDictionaryType" arrow::DictionaryType"(CFixedWidthType):
273        CDictionaryType(const shared_ptr[CDataType]& index_type,
274                        const shared_ptr[CDataType]& value_type,
275                        c_bool ordered)
276
277        shared_ptr[CDataType] index_type()
278        shared_ptr[CDataType] value_type()
279        c_bool ordered()
280
281    shared_ptr[CDataType] ctimestamp" arrow::timestamp"(TimeUnit unit)
282    shared_ptr[CDataType] ctimestamp" arrow::timestamp"(
283        TimeUnit unit, const c_string& timezone)
284
285    cdef cppclass CMemoryPool" arrow::MemoryPool":
286        int64_t bytes_allocated()
287        int64_t max_memory()
288        c_string backend_name()
289
290    cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
291        CLoggingMemoryPool(CMemoryPool*)
292
293    cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool):
294        CProxyMemoryPool(CMemoryPool*)
295
296    cdef cppclass CBuffer" arrow::Buffer":
297        CBuffer(const uint8_t* data, int64_t size)
298        const uint8_t* data()
299        uint8_t* mutable_data()
300        uintptr_t address()
301        uintptr_t mutable_address()
302        int64_t size()
303        shared_ptr[CBuffer] parent()
304        c_bool is_cpu() const
305        c_bool is_mutable() const
306        c_string ToHexString()
307        c_bool Equals(const CBuffer& other)
308
309    shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer,
310                                    int64_t offset, int64_t length)
311    shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer,
312                                    int64_t offset)
313
314    cdef cppclass CMutableBuffer" arrow::MutableBuffer"(CBuffer):
315        CMutableBuffer(const uint8_t* data, int64_t size)
316
317    cdef cppclass CResizableBuffer" arrow::ResizableBuffer"(CMutableBuffer):
318        CStatus Resize(const int64_t new_size, c_bool shrink_to_fit)
319        CStatus Reserve(const int64_t new_size)
320
321    CResult[unique_ptr[CBuffer]] AllocateBuffer(const int64_t size,
322                                                CMemoryPool* pool)
323
324    CResult[unique_ptr[CResizableBuffer]] AllocateResizableBuffer(
325        const int64_t size, CMemoryPool* pool)
326
327    cdef CMemoryPool* c_default_memory_pool" arrow::default_memory_pool"()
328    cdef CMemoryPool* c_system_memory_pool" arrow::system_memory_pool"()
329    cdef CStatus c_jemalloc_memory_pool" arrow::jemalloc_memory_pool"(
330        CMemoryPool** out)
331    cdef CStatus c_mimalloc_memory_pool" arrow::mimalloc_memory_pool"(
332        CMemoryPool** out)
333
334    CStatus c_jemalloc_set_decay_ms" arrow::jemalloc_set_decay_ms"(int ms)
335
336    cdef cppclass CListType" arrow::ListType"(CDataType):
337        CListType(const shared_ptr[CDataType]& value_type)
338        CListType(const shared_ptr[CField]& field)
339        shared_ptr[CDataType] value_type()
340        shared_ptr[CField] value_field()
341
342    cdef cppclass CLargeListType" arrow::LargeListType"(CDataType):
343        CLargeListType(const shared_ptr[CDataType]& value_type)
344        CLargeListType(const shared_ptr[CField]& field)
345        shared_ptr[CDataType] value_type()
346        shared_ptr[CField] value_field()
347
348    cdef cppclass CMapType" arrow::MapType"(CDataType):
349        CMapType(const shared_ptr[CDataType]& key_type,
350                 const shared_ptr[CDataType]& item_type, c_bool keys_sorted)
351        shared_ptr[CDataType] key_type()
352        shared_ptr[CDataType] item_type()
353        c_bool keys_sorted()
354
355    cdef cppclass CFixedSizeListType" arrow::FixedSizeListType"(CDataType):
356        CFixedSizeListType(const shared_ptr[CDataType]& value_type,
357                           int32_t list_size)
358        CFixedSizeListType(const shared_ptr[CField]& field, int32_t list_size)
359        shared_ptr[CDataType] value_type()
360        shared_ptr[CField] value_field()
361        int32_t list_size()
362
363    cdef cppclass CStringType" arrow::StringType"(CDataType):
364        pass
365
366    cdef cppclass CFixedSizeBinaryType \
367            " arrow::FixedSizeBinaryType"(CFixedWidthType):
368        CFixedSizeBinaryType(int byte_width)
369        int byte_width()
370        int bit_width()
371
372    cdef cppclass CDecimal128Type \
373            " arrow::Decimal128Type"(CFixedSizeBinaryType):
374        CDecimal128Type(int precision, int scale)
375        int precision()
376        int scale()
377
378    cdef cppclass CDecimal256Type \
379            " arrow::Decimal256Type"(CFixedSizeBinaryType):
380        CDecimal256Type(int precision, int scale)
381        int precision()
382        int scale()
383
384    cdef cppclass CField" arrow::Field":
385        cppclass CMergeOptions "arrow::Field::MergeOptions":
386            c_bool promote_nullability
387
388            @staticmethod
389            CMergeOptions Defaults()
390
391        const c_string& name()
392        shared_ptr[CDataType] type()
393        c_bool nullable()
394
395        c_string ToString()
396        c_bool Equals(const CField& other, c_bool check_metadata)
397
398        shared_ptr[const CKeyValueMetadata] metadata()
399
400        CField(const c_string& name, const shared_ptr[CDataType]& type,
401               c_bool nullable)
402
403        CField(const c_string& name, const shared_ptr[CDataType]& type,
404               c_bool nullable, const shared_ptr[CKeyValueMetadata]& metadata)
405
406        # Removed const in Cython so don't have to cast to get code to generate
407        shared_ptr[CField] AddMetadata(
408            const shared_ptr[CKeyValueMetadata]& metadata)
409        shared_ptr[CField] WithMetadata(
410            const shared_ptr[CKeyValueMetadata]& metadata)
411        shared_ptr[CField] RemoveMetadata()
412        shared_ptr[CField] WithType(const shared_ptr[CDataType]& type)
413        shared_ptr[CField] WithName(const c_string& name)
414        shared_ptr[CField] WithNullable(c_bool nullable)
415        vector[shared_ptr[CField]] Flatten()
416
417    cdef cppclass CFieldRef" arrow::FieldRef":
418        CFieldRef()
419        CFieldRef(c_string name)
420        CFieldRef(int index)
421        const c_string* name() const
422
423    cdef cppclass CFieldRefHash" arrow::FieldRef::Hash":
424        pass
425
426    cdef cppclass CStructType" arrow::StructType"(CDataType):
427        CStructType(const vector[shared_ptr[CField]]& fields)
428
429        shared_ptr[CField] GetFieldByName(const c_string& name)
430        vector[shared_ptr[CField]] GetAllFieldsByName(const c_string& name)
431        int GetFieldIndex(const c_string& name)
432        vector[int] GetAllFieldIndices(const c_string& name)
433
434    cdef cppclass CUnionType" arrow::UnionType"(CDataType):
435        UnionMode mode()
436        const vector[int8_t]& type_codes()
437        const vector[int]& child_ids()
438
439    cdef shared_ptr[CDataType] CMakeSparseUnionType" arrow::sparse_union"(
440        vector[shared_ptr[CField]] fields,
441        vector[int8_t] type_codes)
442
443    cdef shared_ptr[CDataType] CMakeDenseUnionType" arrow::dense_union"(
444        vector[shared_ptr[CField]] fields,
445        vector[int8_t] type_codes)
446
447    cdef cppclass CSchema" arrow::Schema":
448        CSchema(const vector[shared_ptr[CField]]& fields)
449        CSchema(const vector[shared_ptr[CField]]& fields,
450                const shared_ptr[const CKeyValueMetadata]& metadata)
451
452        # Does not actually exist, but gets Cython to not complain
453        CSchema(const vector[shared_ptr[CField]]& fields,
454                const shared_ptr[CKeyValueMetadata]& metadata)
455
456        c_bool Equals(const CSchema& other, c_bool check_metadata)
457
458        shared_ptr[CField] field(int i)
459        shared_ptr[const CKeyValueMetadata] metadata()
460        shared_ptr[CField] GetFieldByName(const c_string& name)
461        vector[shared_ptr[CField]] GetAllFieldsByName(const c_string& name)
462        int GetFieldIndex(const c_string& name)
463        vector[int] GetAllFieldIndices(const c_string& name)
464        int num_fields()
465        c_string ToString()
466
467        CResult[shared_ptr[CSchema]] AddField(int i,
468                                              const shared_ptr[CField]& field)
469        CResult[shared_ptr[CSchema]] RemoveField(int i)
470        CResult[shared_ptr[CSchema]] SetField(int i,
471                                              const shared_ptr[CField]& field)
472
473        # Removed const in Cython so don't have to cast to get code to generate
474        shared_ptr[CSchema] AddMetadata(
475            const shared_ptr[CKeyValueMetadata]& metadata)
476        shared_ptr[CSchema] WithMetadata(
477            const shared_ptr[CKeyValueMetadata]& metadata)
478        shared_ptr[CSchema] RemoveMetadata()
479
480    CResult[shared_ptr[CSchema]] UnifySchemas(
481        const vector[shared_ptr[CSchema]]& schemas)
482
483    cdef cppclass PrettyPrintOptions:
484        PrettyPrintOptions()
485        PrettyPrintOptions(int indent_arg)
486        PrettyPrintOptions(int indent_arg, int window_arg)
487        int indent
488        int indent_size
489        int window
490        c_string null_rep
491        c_bool skip_new_lines
492        c_bool truncate_metadata
493        c_bool show_field_metadata
494        c_bool show_schema_metadata
495
496        @staticmethod
497        PrettyPrintOptions Defaults()
498
499    CStatus PrettyPrint(const CArray& schema,
500                        const PrettyPrintOptions& options,
501                        c_string* result)
502    CStatus PrettyPrint(const CChunkedArray& schema,
503                        const PrettyPrintOptions& options,
504                        c_string* result)
505    CStatus PrettyPrint(const CSchema& schema,
506                        const PrettyPrintOptions& options,
507                        c_string* result)
508
509    cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray):
510        c_bool Value(int i)
511        int64_t false_count()
512        int64_t true_count()
513
514    cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
515        uint8_t Value(int i)
516
517    cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
518        int8_t Value(int i)
519
520    cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
521        uint16_t Value(int i)
522
523    cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
524        int16_t Value(int i)
525
526    cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
527        uint32_t Value(int i)
528
529    cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
530        int32_t Value(int i)
531
532    cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
533        uint64_t Value(int i)
534
535    cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
536        int64_t Value(int i)
537
538    cdef cppclass CDate32Array" arrow::Date32Array"(CArray):
539        int32_t Value(int i)
540
541    cdef cppclass CDate64Array" arrow::Date64Array"(CArray):
542        int64_t Value(int i)
543
544    cdef cppclass CTime32Array" arrow::Time32Array"(CArray):
545        int32_t Value(int i)
546
547    cdef cppclass CTime64Array" arrow::Time64Array"(CArray):
548        int64_t Value(int i)
549
550    cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray):
551        int64_t Value(int i)
552
553    cdef cppclass CDurationArray" arrow::DurationArray"(CArray):
554        int64_t Value(int i)
555
556    cdef cppclass CHalfFloatArray" arrow::HalfFloatArray"(CArray):
557        uint16_t Value(int i)
558
559    cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
560        float Value(int i)
561
562    cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
563        double Value(int i)
564
565    cdef cppclass CFixedSizeBinaryArray" arrow::FixedSizeBinaryArray"(CArray):
566        const uint8_t* GetValue(int i)
567
568    cdef cppclass CDecimal128Array" arrow::Decimal128Array"(
569        CFixedSizeBinaryArray
570    ):
571        c_string FormatValue(int i)
572
573    cdef cppclass CDecimal256Array" arrow::Decimal256Array"(
574        CFixedSizeBinaryArray
575    ):
576        c_string FormatValue(int i)
577
578    cdef cppclass CListArray" arrow::ListArray"(CArray):
579        @staticmethod
580        CResult[shared_ptr[CArray]] FromArrays(
581            const CArray& offsets, const CArray& values, CMemoryPool* pool)
582
583        const int32_t* raw_value_offsets()
584        int32_t value_offset(int i)
585        int32_t value_length(int i)
586        shared_ptr[CArray] values()
587        shared_ptr[CArray] offsets()
588        shared_ptr[CDataType] value_type()
589
590    cdef cppclass CLargeListArray" arrow::LargeListArray"(CArray):
591        @staticmethod
592        CResult[shared_ptr[CArray]] FromArrays(
593            const CArray& offsets, const CArray& values, CMemoryPool* pool)
594
595        int64_t value_offset(int i)
596        int64_t value_length(int i)
597        shared_ptr[CArray] values()
598        shared_ptr[CArray] offsets()
599        shared_ptr[CDataType] value_type()
600
601    cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
602        @staticmethod
603        CResult[shared_ptr[CArray]] FromArrays(
604            const shared_ptr[CArray]& values, int32_t list_size)
605
606        int64_t value_offset(int i)
607        int64_t value_length(int i)
608        shared_ptr[CArray] values()
609        shared_ptr[CDataType] value_type()
610
611    cdef cppclass CMapArray" arrow::MapArray"(CArray):
612        @staticmethod
613        CResult[shared_ptr[CArray]] FromArrays(
614            const shared_ptr[CArray]& offsets,
615            const shared_ptr[CArray]& keys,
616            const shared_ptr[CArray]& items,
617            CMemoryPool* pool)
618
619        shared_ptr[CArray] keys()
620        shared_ptr[CArray] items()
621        CMapType* map_type()
622        int64_t value_offset(int i)
623        int64_t value_length(int i)
624        shared_ptr[CArray] values()
625        shared_ptr[CDataType] value_type()
626
627    cdef cppclass CUnionArray" arrow::UnionArray"(CArray):
628        shared_ptr[CBuffer] type_codes()
629        int8_t* raw_type_codes()
630        int child_id(int64_t index)
631        shared_ptr[CArray] field(int pos)
632        const CArray* UnsafeField(int pos)
633        UnionMode mode()
634
635    cdef cppclass CSparseUnionArray" arrow::SparseUnionArray"(CUnionArray):
636        @staticmethod
637        CResult[shared_ptr[CArray]] Make(
638            const CArray& type_codes,
639            const vector[shared_ptr[CArray]]& children,
640            const vector[c_string]& field_names,
641            const vector[int8_t]& type_codes)
642
643    cdef cppclass CDenseUnionArray" arrow::DenseUnionArray"(CUnionArray):
644        @staticmethod
645        CResult[shared_ptr[CArray]] Make(
646            const CArray& type_codes,
647            const CArray& value_offsets,
648            const vector[shared_ptr[CArray]]& children,
649            const vector[c_string]& field_names,
650            const vector[int8_t]& type_codes)
651
652        int32_t value_offset(int i)
653        shared_ptr[CBuffer] value_offsets()
654
655    cdef cppclass CBinaryArray" arrow::BinaryArray"(CArray):
656        const uint8_t* GetValue(int i, int32_t* length)
657        shared_ptr[CBuffer] value_data()
658        int32_t value_offset(int64_t i)
659        int32_t value_length(int64_t i)
660        int32_t total_values_length()
661
662    cdef cppclass CLargeBinaryArray" arrow::LargeBinaryArray"(CArray):
663        const uint8_t* GetValue(int i, int64_t* length)
664        shared_ptr[CBuffer] value_data()
665        int64_t value_offset(int64_t i)
666        int64_t value_length(int64_t i)
667        int64_t total_values_length()
668
669    cdef cppclass CStringArray" arrow::StringArray"(CBinaryArray):
670        CStringArray(int64_t length, shared_ptr[CBuffer] value_offsets,
671                     shared_ptr[CBuffer] data,
672                     shared_ptr[CBuffer] null_bitmap,
673                     int64_t null_count,
674                     int64_t offset)
675
676        c_string GetString(int i)
677
678    cdef cppclass CLargeStringArray" arrow::LargeStringArray" \
679            (CLargeBinaryArray):
680        CLargeStringArray(int64_t length, shared_ptr[CBuffer] value_offsets,
681                          shared_ptr[CBuffer] data,
682                          shared_ptr[CBuffer] null_bitmap,
683                          int64_t null_count,
684                          int64_t offset)
685
686        c_string GetString(int i)
687
688    cdef cppclass CStructArray" arrow::StructArray"(CArray):
689        CStructArray(shared_ptr[CDataType] type, int64_t length,
690                     vector[shared_ptr[CArray]] children,
691                     shared_ptr[CBuffer] null_bitmap=nullptr,
692                     int64_t null_count=-1,
693                     int64_t offset=0)
694
695        # XXX Cython crashes if default argument values are declared here
696        # https://github.com/cython/cython/issues/2167
697        @staticmethod
698        CResult[shared_ptr[CArray]] MakeFromFieldNames "Make"(
699            vector[shared_ptr[CArray]] children,
700            vector[c_string] field_names,
701            shared_ptr[CBuffer] null_bitmap,
702            int64_t null_count,
703            int64_t offset)
704
705        @staticmethod
706        CResult[shared_ptr[CArray]] MakeFromFields "Make"(
707            vector[shared_ptr[CArray]] children,
708            vector[shared_ptr[CField]] fields,
709            shared_ptr[CBuffer] null_bitmap,
710            int64_t null_count,
711            int64_t offset)
712
713        shared_ptr[CArray] field(int pos)
714        shared_ptr[CArray] GetFieldByName(const c_string& name) const
715
716        CResult[vector[shared_ptr[CArray]]] Flatten(CMemoryPool* pool)
717
718    cdef cppclass CChunkedArray" arrow::ChunkedArray":
719        CChunkedArray(const vector[shared_ptr[CArray]]& arrays)
720        CChunkedArray(const vector[shared_ptr[CArray]]& arrays,
721                      const shared_ptr[CDataType]& type)
722        int64_t length()
723        int64_t null_count()
724        int num_chunks()
725        c_bool Equals(const CChunkedArray& other)
726
727        shared_ptr[CArray] chunk(int i)
728        shared_ptr[CDataType] type()
729        shared_ptr[CChunkedArray] Slice(int64_t offset, int64_t length) const
730        shared_ptr[CChunkedArray] Slice(int64_t offset) const
731
732        CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool)
733
734        CStatus Validate() const
735        CStatus ValidateFull() const
736
737    cdef cppclass CRecordBatch" arrow::RecordBatch":
738        @staticmethod
739        shared_ptr[CRecordBatch] Make(
740            const shared_ptr[CSchema]& schema, int64_t num_rows,
741            const vector[shared_ptr[CArray]]& columns)
742
743        @staticmethod
744        CResult[shared_ptr[CRecordBatch]] FromStructArray(
745            const shared_ptr[CArray]& array)
746
747        c_bool Equals(const CRecordBatch& other, c_bool check_metadata)
748
749        shared_ptr[CSchema] schema()
750        shared_ptr[CArray] column(int i)
751        const c_string& column_name(int i)
752
753        const vector[shared_ptr[CArray]]& columns()
754
755        int num_columns()
756        int64_t num_rows()
757
758        CStatus Validate() const
759        CStatus ValidateFull() const
760
761        shared_ptr[CRecordBatch] ReplaceSchemaMetadata(
762            const shared_ptr[CKeyValueMetadata]& metadata)
763
764        shared_ptr[CRecordBatch] Slice(int64_t offset)
765        shared_ptr[CRecordBatch] Slice(int64_t offset, int64_t length)
766
767    cdef cppclass CTable" arrow::Table":
768        CTable(const shared_ptr[CSchema]& schema,
769               const vector[shared_ptr[CChunkedArray]]& columns)
770
771        @staticmethod
772        shared_ptr[CTable] Make(
773            const shared_ptr[CSchema]& schema,
774            const vector[shared_ptr[CChunkedArray]]& columns)
775
776        @staticmethod
777        shared_ptr[CTable] MakeFromArrays" Make"(
778            const shared_ptr[CSchema]& schema,
779            const vector[shared_ptr[CArray]]& arrays)
780
781        @staticmethod
782        CResult[shared_ptr[CTable]] FromRecordBatches(
783            const shared_ptr[CSchema]& schema,
784            const vector[shared_ptr[CRecordBatch]]& batches)
785
786        int num_columns()
787        int64_t num_rows()
788
789        c_bool Equals(const CTable& other, c_bool check_metadata)
790
791        shared_ptr[CSchema] schema()
792        shared_ptr[CChunkedArray] column(int i)
793        shared_ptr[CField] field(int i)
794
795        CResult[shared_ptr[CTable]] AddColumn(
796            int i, shared_ptr[CField] field, shared_ptr[CChunkedArray] column)
797        CResult[shared_ptr[CTable]] RemoveColumn(int i)
798        CResult[shared_ptr[CTable]] SetColumn(
799            int i, shared_ptr[CField] field, shared_ptr[CChunkedArray] column)
800
801        vector[c_string] ColumnNames()
802        CResult[shared_ptr[CTable]] RenameColumns(const vector[c_string]&)
803        CResult[shared_ptr[CTable]] SelectColumns(const vector[int]&)
804
805        CResult[shared_ptr[CTable]] Flatten(CMemoryPool* pool)
806
807        CResult[shared_ptr[CTable]] CombineChunks(CMemoryPool* pool)
808
809        CStatus Validate() const
810        CStatus ValidateFull() const
811
812        shared_ptr[CTable] ReplaceSchemaMetadata(
813            const shared_ptr[CKeyValueMetadata]& metadata)
814
815        shared_ptr[CTable] Slice(int64_t offset)
816        shared_ptr[CTable] Slice(int64_t offset, int64_t length)
817
818    cdef cppclass CRecordBatchReader" arrow::RecordBatchReader":
819        shared_ptr[CSchema] schema()
820        CStatus ReadNext(shared_ptr[CRecordBatch]* batch)
821        CStatus ReadAll(shared_ptr[CTable]* out)
822
823    cdef cppclass TableBatchReader(CRecordBatchReader):
824        TableBatchReader(const CTable& table)
825        void set_chunksize(int64_t chunksize)
826
827    cdef cppclass CTensor" arrow::Tensor":
828        shared_ptr[CDataType] type()
829        shared_ptr[CBuffer] data()
830
831        const vector[int64_t]& shape()
832        const vector[int64_t]& strides()
833        int64_t size()
834
835        int ndim()
836        const vector[c_string]& dim_names()
837        const c_string& dim_name(int i)
838
839        c_bool is_mutable()
840        c_bool is_contiguous()
841        Type type_id()
842        c_bool Equals(const CTensor& other)
843
844    cdef cppclass CSparseIndex" arrow::SparseIndex":
845        pass
846
847    cdef cppclass CSparseCOOIndex" arrow::SparseCOOIndex":
848        c_bool is_canonical()
849
850    cdef cppclass CSparseCOOTensor" arrow::SparseCOOTensor":
851        shared_ptr[CDataType] type()
852        shared_ptr[CBuffer] data()
853        CResult[shared_ptr[CTensor]] ToTensor()
854
855        shared_ptr[CSparseIndex] sparse_index()
856
857        const vector[int64_t]& shape()
858        int64_t size()
859        int64_t non_zero_length()
860
861        int ndim()
862        const vector[c_string]& dim_names()
863        const c_string& dim_name(int i)
864
865        c_bool is_mutable()
866        Type type_id()
867        c_bool Equals(const CSparseCOOTensor& other)
868
869    cdef cppclass CSparseCSRMatrix" arrow::SparseCSRMatrix":
870        shared_ptr[CDataType] type()
871        shared_ptr[CBuffer] data()
872        CResult[shared_ptr[CTensor]] ToTensor()
873
874        const vector[int64_t]& shape()
875        int64_t size()
876        int64_t non_zero_length()
877
878        int ndim()
879        const vector[c_string]& dim_names()
880        const c_string& dim_name(int i)
881
882        c_bool is_mutable()
883        Type type_id()
884        c_bool Equals(const CSparseCSRMatrix& other)
885
886    cdef cppclass CSparseCSCMatrix" arrow::SparseCSCMatrix":
887        shared_ptr[CDataType] type()
888        shared_ptr[CBuffer] data()
889        CResult[shared_ptr[CTensor]] ToTensor()
890
891        const vector[int64_t]& shape()
892        int64_t size()
893        int64_t non_zero_length()
894
895        int ndim()
896        const vector[c_string]& dim_names()
897        const c_string& dim_name(int i)
898
899        c_bool is_mutable()
900        Type type_id()
901        c_bool Equals(const CSparseCSCMatrix& other)
902
903    cdef cppclass CSparseCSFTensor" arrow::SparseCSFTensor":
904        shared_ptr[CDataType] type()
905        shared_ptr[CBuffer] data()
906        CResult[shared_ptr[CTensor]] ToTensor()
907
908        const vector[int64_t]& shape()
909        int64_t size()
910        int64_t non_zero_length()
911
912        int ndim()
913        const vector[c_string]& dim_names()
914        const c_string& dim_name(int i)
915
916        c_bool is_mutable()
917        Type type_id()
918        c_bool Equals(const CSparseCSFTensor& other)
919
920    cdef cppclass CScalar" arrow::Scalar":
921        shared_ptr[CDataType] type
922        c_bool is_valid
923        c_string ToString() const
924        c_bool Equals(const CScalar& other) const
925        CResult[shared_ptr[CScalar]] CastTo(shared_ptr[CDataType] to) const
926
927    cdef cppclass CScalarHash" arrow::Scalar::Hash":
928        size_t operator()(const shared_ptr[CScalar]& scalar) const
929
930    cdef cppclass CNullScalar" arrow::NullScalar"(CScalar):
931        CNullScalar()
932
933    cdef cppclass CBooleanScalar" arrow::BooleanScalar"(CScalar):
934        c_bool value
935
936    cdef cppclass CInt8Scalar" arrow::Int8Scalar"(CScalar):
937        int8_t value
938
939    cdef cppclass CUInt8Scalar" arrow::UInt8Scalar"(CScalar):
940        uint8_t value
941
942    cdef cppclass CInt16Scalar" arrow::Int16Scalar"(CScalar):
943        int16_t value
944
945    cdef cppclass CUInt16Scalar" arrow::UInt16Scalar"(CScalar):
946        uint16_t value
947
948    cdef cppclass CInt32Scalar" arrow::Int32Scalar"(CScalar):
949        int32_t value
950
951    cdef cppclass CUInt32Scalar" arrow::UInt32Scalar"(CScalar):
952        uint32_t value
953
954    cdef cppclass CInt64Scalar" arrow::Int64Scalar"(CScalar):
955        int64_t value
956
957    cdef cppclass CUInt64Scalar" arrow::UInt64Scalar"(CScalar):
958        uint64_t value
959
960    cdef cppclass CHalfFloatScalar" arrow::HalfFloatScalar"(CScalar):
961        npy_half value
962
963    cdef cppclass CFloatScalar" arrow::FloatScalar"(CScalar):
964        float value
965
966    cdef cppclass CDoubleScalar" arrow::DoubleScalar"(CScalar):
967        double value
968
969    cdef cppclass CDecimal128Scalar" arrow::Decimal128Scalar"(CScalar):
970        CDecimal128 value
971
972    cdef cppclass CDecimal256Scalar" arrow::Decimal256Scalar"(CScalar):
973        CDecimal256 value
974
975    cdef cppclass CDate32Scalar" arrow::Date32Scalar"(CScalar):
976        int32_t value
977
978    cdef cppclass CDate64Scalar" arrow::Date64Scalar"(CScalar):
979        int64_t value
980
981    cdef cppclass CTime32Scalar" arrow::Time32Scalar"(CScalar):
982        int32_t value
983
984    cdef cppclass CTime64Scalar" arrow::Time64Scalar"(CScalar):
985        int64_t value
986
987    cdef cppclass CTimestampScalar" arrow::TimestampScalar"(CScalar):
988        int64_t value
989
990    cdef cppclass CDurationScalar" arrow::DurationScalar"(CScalar):
991        int64_t value
992
993    cdef cppclass CBaseBinaryScalar" arrow::BaseBinaryScalar"(CScalar):
994        shared_ptr[CBuffer] value
995
996    cdef cppclass CBaseListScalar" arrow::BaseListScalar"(CScalar):
997        shared_ptr[CArray] value
998
999    cdef cppclass CListScalar" arrow::ListScalar"(CBaseListScalar):
1000        pass
1001
1002    cdef cppclass CMapScalar" arrow::MapScalar"(CListScalar):
1003        pass
1004
1005    cdef cppclass CStructScalar" arrow::StructScalar"(CScalar):
1006        vector[shared_ptr[CScalar]] value
1007        CResult[shared_ptr[CScalar]] field(CFieldRef ref) const
1008
1009    cdef cppclass CDictionaryScalarIndexAndDictionary \
1010            "arrow::DictionaryScalar::ValueType":
1011        shared_ptr[CScalar] index
1012        shared_ptr[CArray] dictionary
1013
1014    cdef cppclass CDictionaryScalar" arrow::DictionaryScalar"(CScalar):
1015        CDictionaryScalar(CDictionaryScalarIndexAndDictionary value,
1016                          shared_ptr[CDataType], c_bool is_valid)
1017        CDictionaryScalarIndexAndDictionary value
1018        CResult[shared_ptr[CScalar]] GetEncodedValue()
1019
1020    cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar):
1021        shared_ptr[CScalar] value
1022
1023    shared_ptr[CScalar] MakeScalar[Value](Value value)
1024
1025    cdef cppclass CConcatenateTablesOptions" arrow::ConcatenateTablesOptions":
1026        c_bool unify_schemas
1027        CField.CMergeOptions field_merge_options
1028
1029        @staticmethod
1030        CConcatenateTablesOptions Defaults()
1031
1032    CResult[shared_ptr[CTable]] ConcatenateTables(
1033        const vector[shared_ptr[CTable]]& tables,
1034        CConcatenateTablesOptions options,
1035        CMemoryPool* memory_pool)
1036
1037    cdef cppclass CDictionaryUnifier" arrow::DictionaryUnifier":
1038        @staticmethod
1039        CResult[shared_ptr[CChunkedArray]] UnifyChunkedArray(
1040            shared_ptr[CChunkedArray] array, CMemoryPool* pool)
1041
1042        @staticmethod
1043        CResult[shared_ptr[CTable]] UnifyTable(
1044            const CTable& table, CMemoryPool* pool)
1045
1046
1047cdef extern from "arrow/builder.h" namespace "arrow" nogil:
1048
1049    cdef cppclass CArrayBuilder" arrow::ArrayBuilder":
1050        CArrayBuilder(shared_ptr[CDataType], CMemoryPool* pool)
1051
1052        int64_t length()
1053        int64_t null_count()
1054        CStatus AppendNull()
1055        CStatus Finish(shared_ptr[CArray]* out)
1056        CStatus Reserve(int64_t additional_capacity)
1057
1058    cdef cppclass CBooleanBuilder" arrow::BooleanBuilder"(CArrayBuilder):
1059        CBooleanBuilder(CMemoryPool* pool)
1060        CStatus Append(const bint val)
1061        CStatus Append(const uint8_t val)
1062
1063    cdef cppclass CInt8Builder" arrow::Int8Builder"(CArrayBuilder):
1064        CInt8Builder(CMemoryPool* pool)
1065        CStatus Append(const int8_t value)
1066
1067    cdef cppclass CInt16Builder" arrow::Int16Builder"(CArrayBuilder):
1068        CInt16Builder(CMemoryPool* pool)
1069        CStatus Append(const int16_t value)
1070
1071    cdef cppclass CInt32Builder" arrow::Int32Builder"(CArrayBuilder):
1072        CInt32Builder(CMemoryPool* pool)
1073        CStatus Append(const int32_t value)
1074
1075    cdef cppclass CInt64Builder" arrow::Int64Builder"(CArrayBuilder):
1076        CInt64Builder(CMemoryPool* pool)
1077        CStatus Append(const int64_t value)
1078
1079    cdef cppclass CUInt8Builder" arrow::UInt8Builder"(CArrayBuilder):
1080        CUInt8Builder(CMemoryPool* pool)
1081        CStatus Append(const uint8_t value)
1082
1083    cdef cppclass CUInt16Builder" arrow::UInt16Builder"(CArrayBuilder):
1084        CUInt16Builder(CMemoryPool* pool)
1085        CStatus Append(const uint16_t value)
1086
1087    cdef cppclass CUInt32Builder" arrow::UInt32Builder"(CArrayBuilder):
1088        CUInt32Builder(CMemoryPool* pool)
1089        CStatus Append(const uint32_t value)
1090
1091    cdef cppclass CUInt64Builder" arrow::UInt64Builder"(CArrayBuilder):
1092        CUInt64Builder(CMemoryPool* pool)
1093        CStatus Append(const uint64_t value)
1094
1095    cdef cppclass CHalfFloatBuilder" arrow::HalfFloatBuilder"(CArrayBuilder):
1096        CHalfFloatBuilder(CMemoryPool* pool)
1097
1098    cdef cppclass CFloatBuilder" arrow::FloatBuilder"(CArrayBuilder):
1099        CFloatBuilder(CMemoryPool* pool)
1100        CStatus Append(const float value)
1101
1102    cdef cppclass CDoubleBuilder" arrow::DoubleBuilder"(CArrayBuilder):
1103        CDoubleBuilder(CMemoryPool* pool)
1104        CStatus Append(const double value)
1105
1106    cdef cppclass CBinaryBuilder" arrow::BinaryBuilder"(CArrayBuilder):
1107        CArrayBuilder(shared_ptr[CDataType], CMemoryPool* pool)
1108        CStatus Append(const char* value, int32_t length)
1109
1110    cdef cppclass CStringBuilder" arrow::StringBuilder"(CBinaryBuilder):
1111        CStringBuilder(CMemoryPool* pool)
1112
1113        CStatus Append(const c_string& value)
1114
1115    cdef cppclass CTimestampBuilder "arrow::TimestampBuilder"(CArrayBuilder):
1116        CTimestampBuilder(const shared_ptr[CDataType] typ, CMemoryPool* pool)
1117        CStatus Append(const int64_t value)
1118
1119    cdef cppclass CDate32Builder "arrow::Date32Builder"(CArrayBuilder):
1120        CDate32Builder(CMemoryPool* pool)
1121        CStatus Append(const int32_t value)
1122
1123    cdef cppclass CDate64Builder "arrow::Date64Builder"(CArrayBuilder):
1124        CDate64Builder(CMemoryPool* pool)
1125        CStatus Append(const int64_t value)
1126
1127
1128# Use typedef to emulate syntax for std::function<void(..)>
1129ctypedef void CallbackTransform(
1130    object, const shared_ptr[CBuffer]& src, shared_ptr[CBuffer]* dest)
1131
1132
1133cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
1134    enum FileMode" arrow::io::FileMode::type":
1135        FileMode_READ" arrow::io::FileMode::READ"
1136        FileMode_WRITE" arrow::io::FileMode::WRITE"
1137        FileMode_READWRITE" arrow::io::FileMode::READWRITE"
1138
1139    enum ObjectType" arrow::io::ObjectType::type":
1140        ObjectType_FILE" arrow::io::ObjectType::FILE"
1141        ObjectType_DIRECTORY" arrow::io::ObjectType::DIRECTORY"
1142
1143    cdef cppclass CAsyncContext" arrow::io::AsyncContext":
1144        CAsyncContext()
1145
1146    cdef cppclass FileStatistics:
1147        int64_t size
1148        ObjectType kind
1149
1150    cdef cppclass FileInterface:
1151        CStatus Close()
1152        CResult[int64_t] Tell()
1153        FileMode mode()
1154        c_bool closed()
1155
1156    cdef cppclass Readable:
1157        # put overload under a different name to avoid cython bug with multiple
1158        # layers of inheritance
1159        CResult[shared_ptr[CBuffer]] ReadBuffer" Read"(int64_t nbytes)
1160        CResult[int64_t] Read(int64_t nbytes, uint8_t* out)
1161
1162    cdef cppclass Seekable:
1163        CStatus Seek(int64_t position)
1164
1165    cdef cppclass Writable:
1166        CStatus WriteBuffer" Write"(shared_ptr[CBuffer] data)
1167        CStatus Write(const uint8_t* data, int64_t nbytes)
1168        CStatus Flush()
1169
1170    cdef cppclass COutputStream" arrow::io::OutputStream"(FileInterface,
1171                                                          Writable):
1172        pass
1173
1174    cdef cppclass CInputStream" arrow::io::InputStream"(FileInterface,
1175                                                        Readable):
1176        pass
1177
1178    cdef cppclass CRandomAccessFile" arrow::io::RandomAccessFile"(CInputStream,
1179                                                                  Seekable):
1180        CResult[int64_t] GetSize()
1181
1182        CResult[int64_t] ReadAt(int64_t position, int64_t nbytes,
1183                                uint8_t* buffer)
1184        CResult[shared_ptr[CBuffer]] ReadAt(int64_t position, int64_t nbytes)
1185        c_bool supports_zero_copy()
1186
1187    cdef cppclass WritableFile(COutputStream, Seekable):
1188        CStatus WriteAt(int64_t position, const uint8_t* data,
1189                        int64_t nbytes)
1190
1191    cdef cppclass ReadWriteFileInterface(CRandomAccessFile,
1192                                         WritableFile):
1193        pass
1194
1195    cdef cppclass CIOFileSystem" arrow::io::FileSystem":
1196        CStatus Stat(const c_string& path, FileStatistics* stat)
1197
1198    cdef cppclass FileOutputStream(COutputStream):
1199        @staticmethod
1200        CResult[shared_ptr[COutputStream]] Open(const c_string& path)
1201
1202        int file_descriptor()
1203
1204    cdef cppclass ReadableFile(CRandomAccessFile):
1205        @staticmethod
1206        CResult[shared_ptr[ReadableFile]] Open(const c_string& path)
1207
1208        @staticmethod
1209        CResult[shared_ptr[ReadableFile]] Open(const c_string& path,
1210                                               CMemoryPool* memory_pool)
1211
1212        int file_descriptor()
1213
1214    cdef cppclass CMemoryMappedFile \
1215            " arrow::io::MemoryMappedFile"(ReadWriteFileInterface):
1216
1217        @staticmethod
1218        CResult[shared_ptr[CMemoryMappedFile]] Create(const c_string& path,
1219                                                      int64_t size)
1220
1221        @staticmethod
1222        CResult[shared_ptr[CMemoryMappedFile]] Open(const c_string& path,
1223                                                    FileMode mode)
1224
1225        CStatus Resize(int64_t size)
1226
1227        int file_descriptor()
1228
1229    cdef cppclass CCompressedInputStream \
1230            " arrow::io::CompressedInputStream"(CInputStream):
1231        @staticmethod
1232        CResult[shared_ptr[CCompressedInputStream]] Make(
1233            CCodec* codec, shared_ptr[CInputStream] raw)
1234
1235    cdef cppclass CCompressedOutputStream \
1236            " arrow::io::CompressedOutputStream"(COutputStream):
1237        @staticmethod
1238        CResult[shared_ptr[CCompressedOutputStream]] Make(
1239            CCodec* codec, shared_ptr[COutputStream] raw)
1240
1241    cdef cppclass CBufferedInputStream \
1242            " arrow::io::BufferedInputStream"(CInputStream):
1243
1244        @staticmethod
1245        CResult[shared_ptr[CBufferedInputStream]] Create(
1246            int64_t buffer_size, CMemoryPool* pool,
1247            shared_ptr[CInputStream] raw)
1248
1249        CResult[shared_ptr[CInputStream]] Detach()
1250
1251    cdef cppclass CBufferedOutputStream \
1252            " arrow::io::BufferedOutputStream"(COutputStream):
1253
1254        @staticmethod
1255        CResult[shared_ptr[CBufferedOutputStream]] Create(
1256            int64_t buffer_size, CMemoryPool* pool,
1257            shared_ptr[COutputStream] raw)
1258
1259        CResult[shared_ptr[COutputStream]] Detach()
1260
1261    cdef cppclass CTransformInputStreamVTable \
1262            "arrow::py::TransformInputStreamVTable":
1263        CTransformInputStreamVTable()
1264        function[CallbackTransform] transform
1265
1266    shared_ptr[CInputStream] MakeTransformInputStream \
1267        "arrow::py::MakeTransformInputStream"(
1268        shared_ptr[CInputStream] wrapped, CTransformInputStreamVTable vtable,
1269        object method_arg)
1270
1271    # ----------------------------------------------------------------------
1272    # HDFS
1273
1274    CStatus HaveLibHdfs()
1275    CStatus HaveLibHdfs3()
1276
1277    enum HdfsDriver" arrow::io::HdfsDriver":
1278        HdfsDriver_LIBHDFS" arrow::io::HdfsDriver::LIBHDFS"
1279        HdfsDriver_LIBHDFS3" arrow::io::HdfsDriver::LIBHDFS3"
1280
1281    cdef cppclass HdfsConnectionConfig:
1282        c_string host
1283        int port
1284        c_string user
1285        c_string kerb_ticket
1286        unordered_map[c_string, c_string] extra_conf
1287        HdfsDriver driver
1288
1289    cdef cppclass HdfsPathInfo:
1290        ObjectType kind
1291        c_string name
1292        c_string owner
1293        c_string group
1294        int32_t last_modified_time
1295        int32_t last_access_time
1296        int64_t size
1297        int16_t replication
1298        int64_t block_size
1299        int16_t permissions
1300
1301    cdef cppclass HdfsReadableFile(CRandomAccessFile):
1302        pass
1303
1304    cdef cppclass HdfsOutputStream(COutputStream):
1305        pass
1306
1307    cdef cppclass CIOHadoopFileSystem \
1308            "arrow::io::HadoopFileSystem"(CIOFileSystem):
1309        @staticmethod
1310        CStatus Connect(const HdfsConnectionConfig* config,
1311                        shared_ptr[CIOHadoopFileSystem]* client)
1312
1313        CStatus MakeDirectory(const c_string& path)
1314
1315        CStatus Delete(const c_string& path, c_bool recursive)
1316
1317        CStatus Disconnect()
1318
1319        c_bool Exists(const c_string& path)
1320
1321        CStatus Chmod(const c_string& path, int mode)
1322        CStatus Chown(const c_string& path, const char* owner,
1323                      const char* group)
1324
1325        CStatus GetCapacity(int64_t* nbytes)
1326        CStatus GetUsed(int64_t* nbytes)
1327
1328        CStatus ListDirectory(const c_string& path,
1329                              vector[HdfsPathInfo]* listing)
1330
1331        CStatus GetPathInfo(const c_string& path, HdfsPathInfo* info)
1332
1333        CStatus Rename(const c_string& src, const c_string& dst)
1334
1335        CStatus OpenReadable(const c_string& path,
1336                             shared_ptr[HdfsReadableFile]* handle)
1337
1338        CStatus OpenWritable(const c_string& path, c_bool append,
1339                             int32_t buffer_size, int16_t replication,
1340                             int64_t default_block_size,
1341                             shared_ptr[HdfsOutputStream]* handle)
1342
1343    cdef cppclass CBufferReader \
1344            " arrow::io::BufferReader"(CRandomAccessFile):
1345        CBufferReader(const shared_ptr[CBuffer]& buffer)
1346        CBufferReader(const uint8_t* data, int64_t nbytes)
1347
1348    cdef cppclass CBufferOutputStream \
1349            " arrow::io::BufferOutputStream"(COutputStream):
1350        CBufferOutputStream(const shared_ptr[CResizableBuffer]& buffer)
1351
1352    cdef cppclass CMockOutputStream \
1353            " arrow::io::MockOutputStream"(COutputStream):
1354        CMockOutputStream()
1355        int64_t GetExtentBytesWritten()
1356
1357    cdef cppclass CFixedSizeBufferWriter \
1358            " arrow::io::FixedSizeBufferWriter"(WritableFile):
1359        CFixedSizeBufferWriter(const shared_ptr[CBuffer]& buffer)
1360
1361        void set_memcopy_threads(int num_threads)
1362        void set_memcopy_blocksize(int64_t blocksize)
1363        void set_memcopy_threshold(int64_t threshold)
1364
1365
1366cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
1367    enum MessageType" arrow::ipc::MessageType":
1368        MessageType_SCHEMA" arrow::ipc::MessageType::SCHEMA"
1369        MessageType_RECORD_BATCH" arrow::ipc::MessageType::RECORD_BATCH"
1370        MessageType_DICTIONARY_BATCH\
1371            " arrow::ipc::MessageType::DICTIONARY_BATCH"
1372
1373    # TODO: use "cpdef enum class" to automatically get a Python wrapper?
1374    # See
1375    # https://github.com/cython/cython/commit/2c7c22f51405299a4e247f78edf52957d30cf71d#diff-61c1365c0f761a8137754bb3a73bfbf7
1376    ctypedef enum CMetadataVersion" arrow::ipc::MetadataVersion":
1377        CMetadataVersion_V1" arrow::ipc::MetadataVersion::V1"
1378        CMetadataVersion_V2" arrow::ipc::MetadataVersion::V2"
1379        CMetadataVersion_V3" arrow::ipc::MetadataVersion::V3"
1380        CMetadataVersion_V4" arrow::ipc::MetadataVersion::V4"
1381        CMetadataVersion_V5" arrow::ipc::MetadataVersion::V5"
1382
1383    cdef cppclass CIpcWriteOptions" arrow::ipc::IpcWriteOptions":
1384        c_bool allow_64bit
1385        int max_recursion_depth
1386        int32_t alignment
1387        c_bool write_legacy_ipc_format
1388        CMemoryPool* memory_pool
1389        CMetadataVersion metadata_version
1390        shared_ptr[CCodec] codec
1391        c_bool use_threads
1392        c_bool emit_dictionary_deltas
1393
1394        @staticmethod
1395        CIpcWriteOptions Defaults()
1396
1397    cdef cppclass CIpcReadOptions" arrow::ipc::IpcReadOptions":
1398        int max_recursion_depth
1399        CMemoryPool* memory_pool
1400        shared_ptr[unordered_set[int]] included_fields
1401
1402        @staticmethod
1403        CIpcReadOptions Defaults()
1404
1405    cdef cppclass CIpcWriteStats" arrow::ipc::WriteStats":
1406        int64_t num_messages
1407        int64_t num_record_batches
1408        int64_t num_dictionary_batches
1409        int64_t num_dictionary_deltas
1410        int64_t num_replaced_dictionaries
1411
1412    cdef cppclass CIpcReadStats" arrow::ipc::ReadStats":
1413        int64_t num_messages
1414        int64_t num_record_batches
1415        int64_t num_dictionary_batches
1416        int64_t num_dictionary_deltas
1417        int64_t num_replaced_dictionaries
1418
1419    cdef cppclass CDictionaryMemo" arrow::ipc::DictionaryMemo":
1420        pass
1421
1422    cdef cppclass CIpcPayload" arrow::ipc::IpcPayload":
1423        MessageType type
1424        shared_ptr[CBuffer] metadata
1425        vector[shared_ptr[CBuffer]] body_buffers
1426        int64_t body_length
1427
1428    cdef cppclass CMessage" arrow::ipc::Message":
1429        CResult[unique_ptr[CMessage]] Open(shared_ptr[CBuffer] metadata,
1430                                           shared_ptr[CBuffer] body)
1431
1432        shared_ptr[CBuffer] body()
1433
1434        c_bool Equals(const CMessage& other)
1435
1436        shared_ptr[CBuffer] metadata()
1437        CMetadataVersion metadata_version()
1438        MessageType type()
1439
1440        CStatus SerializeTo(COutputStream* stream,
1441                            const CIpcWriteOptions& options,
1442                            int64_t* output_length)
1443
1444    c_string FormatMessageType(MessageType type)
1445
1446    cdef cppclass CMessageReader" arrow::ipc::MessageReader":
1447        @staticmethod
1448        unique_ptr[CMessageReader] Open(const shared_ptr[CInputStream]& stream)
1449
1450        CResult[unique_ptr[CMessage]] ReadNextMessage()
1451
1452    cdef cppclass CRecordBatchWriter" arrow::ipc::RecordBatchWriter":
1453        CStatus Close()
1454        CStatus WriteRecordBatch(const CRecordBatch& batch)
1455        CStatus WriteTable(const CTable& table, int64_t max_chunksize)
1456
1457        CIpcWriteStats stats()
1458
1459    cdef cppclass CRecordBatchStreamReader \
1460            " arrow::ipc::RecordBatchStreamReader"(CRecordBatchReader):
1461        @staticmethod
1462        CResult[shared_ptr[CRecordBatchReader]] Open(
1463            const shared_ptr[CInputStream], const CIpcReadOptions&)
1464
1465        @staticmethod
1466        CResult[shared_ptr[CRecordBatchReader]] Open2" Open"(
1467            unique_ptr[CMessageReader] message_reader,
1468            const CIpcReadOptions& options)
1469
1470        CIpcReadStats stats()
1471
1472    cdef cppclass CRecordBatchFileReader \
1473            " arrow::ipc::RecordBatchFileReader":
1474        @staticmethod
1475        CResult[shared_ptr[CRecordBatchFileReader]] Open(
1476            CRandomAccessFile* file,
1477            const CIpcReadOptions& options)
1478
1479        @staticmethod
1480        CResult[shared_ptr[CRecordBatchFileReader]] Open2" Open"(
1481            CRandomAccessFile* file, int64_t footer_offset,
1482            const CIpcReadOptions& options)
1483
1484        shared_ptr[CSchema] schema()
1485
1486        int num_record_batches()
1487
1488        CResult[shared_ptr[CRecordBatch]] ReadRecordBatch(int i)
1489
1490        CIpcReadStats stats()
1491
1492    CResult[shared_ptr[CRecordBatchWriter]] MakeStreamWriter(
1493        shared_ptr[COutputStream] sink, const shared_ptr[CSchema]& schema,
1494        CIpcWriteOptions& options)
1495
1496    CResult[shared_ptr[CRecordBatchWriter]] MakeFileWriter(
1497        shared_ptr[COutputStream] sink, const shared_ptr[CSchema]& schema,
1498        CIpcWriteOptions& options)
1499
1500    CResult[unique_ptr[CMessage]] ReadMessage(CInputStream* stream,
1501                                              CMemoryPool* pool)
1502
1503    CStatus GetRecordBatchSize(const CRecordBatch& batch, int64_t* size)
1504    CStatus GetTensorSize(const CTensor& tensor, int64_t* size)
1505
1506    CStatus WriteTensor(const CTensor& tensor, COutputStream* dst,
1507                        int32_t* metadata_length,
1508                        int64_t* body_length)
1509
1510    CResult[shared_ptr[CTensor]] ReadTensor(CInputStream* stream)
1511
1512    CResult[shared_ptr[CRecordBatch]] ReadRecordBatch(
1513        const CMessage& message, const shared_ptr[CSchema]& schema,
1514        CDictionaryMemo* dictionary_memo,
1515        const CIpcReadOptions& options)
1516
1517    CResult[shared_ptr[CBuffer]] SerializeSchema(
1518        const CSchema& schema, CMemoryPool* pool)
1519
1520    CResult[shared_ptr[CBuffer]] SerializeRecordBatch(
1521        const CRecordBatch& schema, const CIpcWriteOptions& options)
1522
1523    CResult[shared_ptr[CSchema]] ReadSchema(CInputStream* stream,
1524                                            CDictionaryMemo* dictionary_memo)
1525
1526    CResult[shared_ptr[CRecordBatch]] ReadRecordBatch(
1527        const shared_ptr[CSchema]& schema,
1528        CDictionaryMemo* dictionary_memo,
1529        const CIpcReadOptions& options,
1530        CInputStream* stream)
1531
1532    CStatus AlignStream(CInputStream* stream, int64_t alignment)
1533    CStatus AlignStream(COutputStream* stream, int64_t alignment)
1534
1535    cdef CStatus GetRecordBatchPayload\
1536        " arrow::ipc::GetRecordBatchPayload"(
1537            const CRecordBatch& batch,
1538            const CIpcWriteOptions& options,
1539            CIpcPayload* out)
1540
1541    int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
1542    int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
1543
1544    cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
1545        int version
1546        int chunksize
1547        CCompressionType compression
1548        int compression_level
1549
1550    CStatus WriteFeather" arrow::ipc::feather::WriteTable"\
1551        (const CTable& table, COutputStream* out,
1552         CFeatherProperties properties)
1553
1554    cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
1555        @staticmethod
1556        CResult[shared_ptr[CFeatherReader]] Open(
1557            const shared_ptr[CRandomAccessFile]& file)
1558        int version()
1559        shared_ptr[CSchema] schema()
1560
1561        CStatus Read(shared_ptr[CTable]* out)
1562        CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
1563        CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)
1564
1565
1566cdef extern from 'arrow/util/value_parsing.h' namespace 'arrow' nogil:
1567    cdef cppclass CTimestampParser" arrow::TimestampParser":
1568        const char* kind() const
1569        const char* format() const
1570
1571        @staticmethod
1572        shared_ptr[CTimestampParser] MakeStrptime(c_string format)
1573
1574        @staticmethod
1575        shared_ptr[CTimestampParser] MakeISO8601()
1576
1577
1578cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
1579
1580    cdef cppclass CCSVParseOptions" arrow::csv::ParseOptions":
1581        unsigned char delimiter
1582        c_bool quoting
1583        unsigned char quote_char
1584        c_bool double_quote
1585        c_bool escaping
1586        unsigned char escape_char
1587        c_bool newlines_in_values
1588        c_bool ignore_empty_lines
1589
1590        @staticmethod
1591        CCSVParseOptions Defaults()
1592
1593    cdef cppclass CCSVConvertOptions" arrow::csv::ConvertOptions":
1594        c_bool check_utf8
1595        unordered_map[c_string, shared_ptr[CDataType]] column_types
1596        vector[c_string] null_values
1597        vector[c_string] true_values
1598        vector[c_string] false_values
1599        c_bool strings_can_be_null
1600        vector[shared_ptr[CTimestampParser]] timestamp_parsers
1601
1602        c_bool auto_dict_encode
1603        int32_t auto_dict_max_cardinality
1604
1605        vector[c_string] include_columns
1606        c_bool include_missing_columns
1607
1608        @staticmethod
1609        CCSVConvertOptions Defaults()
1610
1611    cdef cppclass CCSVReadOptions" arrow::csv::ReadOptions":
1612        c_bool use_threads
1613        int32_t block_size
1614        int32_t skip_rows
1615        vector[c_string] column_names
1616        c_bool autogenerate_column_names
1617
1618        @staticmethod
1619        CCSVReadOptions Defaults()
1620
1621    cdef cppclass CCSVReader" arrow::csv::TableReader":
1622        @staticmethod
1623        CResult[shared_ptr[CCSVReader]] Make(
1624            CMemoryPool*, CAsyncContext, shared_ptr[CInputStream],
1625            CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions)
1626
1627        CResult[shared_ptr[CTable]] Read()
1628
1629    cdef cppclass CCSVStreamingReader" arrow::csv::StreamingReader"(
1630            CRecordBatchReader):
1631        @staticmethod
1632        CResult[shared_ptr[CCSVStreamingReader]] Make(
1633            CMemoryPool*, shared_ptr[CInputStream],
1634            CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions)
1635
1636
1637cdef extern from "arrow/json/options.h" nogil:
1638
1639    ctypedef enum CUnexpectedFieldBehavior \
1640            "arrow::json::UnexpectedFieldBehavior":
1641        CUnexpectedFieldBehavior_Ignore \
1642            "arrow::json::UnexpectedFieldBehavior::Ignore"
1643        CUnexpectedFieldBehavior_Error \
1644            "arrow::json::UnexpectedFieldBehavior::Error"
1645        CUnexpectedFieldBehavior_InferType \
1646            "arrow::json::UnexpectedFieldBehavior::InferType"
1647
1648    cdef cppclass CJSONReadOptions" arrow::json::ReadOptions":
1649        c_bool use_threads
1650        int32_t block_size
1651
1652        @staticmethod
1653        CJSONReadOptions Defaults()
1654
1655    cdef cppclass CJSONParseOptions" arrow::json::ParseOptions":
1656        shared_ptr[CSchema] explicit_schema
1657        c_bool newlines_in_values
1658        CUnexpectedFieldBehavior unexpected_field_behavior
1659
1660        @staticmethod
1661        CJSONParseOptions Defaults()
1662
1663
1664cdef extern from "arrow/json/reader.h" namespace "arrow::json" nogil:
1665
1666    cdef cppclass CJSONReader" arrow::json::TableReader":
1667        @staticmethod
1668        CResult[shared_ptr[CJSONReader]] Make(
1669            CMemoryPool*, shared_ptr[CInputStream],
1670            CJSONReadOptions, CJSONParseOptions)
1671
1672        CResult[shared_ptr[CTable]] Read()
1673
1674
1675cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
1676
1677    cdef cppclass CExecContext" arrow::compute::ExecContext":
1678        CExecContext()
1679        CExecContext(CMemoryPool* pool)
1680
1681    cdef cppclass CKernelSignature" arrow::compute::KernelSignature":
1682        c_string ToString() const
1683
1684    cdef cppclass CKernel" arrow::compute::Kernel":
1685        shared_ptr[CKernelSignature] signature
1686
1687    cdef cppclass CArrayKernel" arrow::compute::ArrayKernel"(CKernel):
1688        pass
1689
1690    cdef cppclass CScalarKernel" arrow::compute::ScalarKernel"(CArrayKernel):
1691        pass
1692
1693    cdef cppclass CVectorKernel" arrow::compute::VectorKernel"(CArrayKernel):
1694        pass
1695
1696    cdef cppclass CScalarAggregateKernel \
1697            " arrow::compute::ScalarAggregateKernel"(CKernel):
1698        pass
1699
1700    cdef cppclass CArity" arrow::compute::Arity":
1701        int num_args
1702        c_bool is_varargs
1703
1704    enum FunctionKind" arrow::compute::Function::Kind":
1705        FunctionKind_SCALAR" arrow::compute::Function::SCALAR"
1706        FunctionKind_VECTOR" arrow::compute::Function::VECTOR"
1707        FunctionKind_SCALAR_AGGREGATE \
1708            " arrow::compute::Function::SCALAR_AGGREGATE"
1709        FunctionKind_META \
1710            " arrow::compute::Function::META"
1711
1712    cdef cppclass CFunctionDoc" arrow::compute::FunctionDoc":
1713        c_string summary
1714        c_string description
1715        vector[c_string] arg_names
1716        c_string options_class
1717
1718    cdef cppclass CFunctionOptions" arrow::compute::FunctionOptions":
1719        pass
1720
1721    cdef cppclass CFunction" arrow::compute::Function":
1722        const c_string& name() const
1723        FunctionKind kind() const
1724        const CArity& arity() const
1725        const CFunctionDoc& doc() const
1726        int num_kernels() const
1727        CResult[CDatum] Execute(const vector[CDatum]& args,
1728                                const CFunctionOptions* options,
1729                                CExecContext* ctx)
1730
1731    cdef cppclass CScalarFunction" arrow::compute::ScalarFunction"(CFunction):
1732        vector[const CScalarKernel*] kernels() const
1733
1734    cdef cppclass CVectorFunction" arrow::compute::VectorFunction"(CFunction):
1735        vector[const CVectorKernel*] kernels() const
1736
1737    cdef cppclass CScalarAggregateFunction\
1738            " arrow::compute::ScalarAggregateFunction"\
1739            (CFunction):
1740        vector[const CScalarAggregateKernel*] kernels() const
1741
1742    cdef cppclass CMetaFunction" arrow::compute::MetaFunction"(CFunction):
1743        pass
1744
1745    cdef cppclass CFunctionRegistry" arrow::compute::FunctionRegistry":
1746        CResult[shared_ptr[CFunction]] GetFunction(
1747            const c_string& name) const
1748        vector[c_string] GetFunctionNames() const
1749        int num_functions() const
1750
1751    CFunctionRegistry* GetFunctionRegistry()
1752
1753    cdef cppclass CMatchSubstringOptions \
1754            "arrow::compute::MatchSubstringOptions"(CFunctionOptions):
1755        CMatchSubstringOptions(c_string pattern)
1756        c_string pattern
1757
1758    cdef cppclass CTrimOptions \
1759            "arrow::compute::TrimOptions"(CFunctionOptions):
1760        CTrimOptions(c_string characters)
1761        c_string characters
1762
1763    cdef cppclass CSplitOptions \
1764            "arrow::compute::SplitOptions"(CFunctionOptions):
1765        CSplitOptions(int64_t max_splits, c_bool reverse)
1766        int64_t max_splits
1767        c_bool reverse
1768
1769    cdef cppclass CSplitPatternOptions \
1770            "arrow::compute::SplitPatternOptions"(CSplitOptions):
1771        CSplitPatternOptions(c_string pattern, int64_t max_splits,
1772                             c_bool reverse)
1773        c_string pattern
1774
1775    cdef cppclass CCastOptions" arrow::compute::CastOptions"(CFunctionOptions):
1776        CCastOptions()
1777        CCastOptions(c_bool safe)
1778        CCastOptions(CCastOptions&& options)
1779
1780        @staticmethod
1781        CCastOptions Safe()
1782
1783        @staticmethod
1784        CCastOptions Unsafe()
1785        shared_ptr[CDataType] to_type
1786        c_bool allow_int_overflow
1787        c_bool allow_time_truncate
1788        c_bool allow_time_overflow
1789        c_bool allow_float_truncate
1790        c_bool allow_invalid_utf8
1791
1792    enum CFilterNullSelectionBehavior \
1793            "arrow::compute::FilterOptions::NullSelectionBehavior":
1794        CFilterNullSelectionBehavior_DROP \
1795            "arrow::compute::FilterOptions::DROP"
1796        CFilterNullSelectionBehavior_EMIT_NULL \
1797            "arrow::compute::FilterOptions::EMIT_NULL"
1798
1799    cdef cppclass CFilterOptions \
1800            " arrow::compute::FilterOptions"(CFunctionOptions):
1801        CFilterOptions()
1802        CFilterOptions(CFilterNullSelectionBehavior null_selection)
1803        CFilterNullSelectionBehavior null_selection_behavior
1804
1805    cdef cppclass CTakeOptions \
1806            " arrow::compute::TakeOptions"(CFunctionOptions):
1807        CTakeOptions(c_bool boundscheck)
1808        c_bool boundscheck
1809
1810    cdef cppclass CStrptimeOptions \
1811            "arrow::compute::StrptimeOptions"(CFunctionOptions):
1812        CStrptimeOptions(c_string format, TimeUnit unit)
1813
1814    cdef cppclass CVarianceOptions \
1815            "arrow::compute::VarianceOptions"(CFunctionOptions):
1816        CVarianceOptions(int ddof)
1817        int ddof
1818
1819    enum CMinMaxMode \
1820            "arrow::compute::MinMaxOptions::Mode":
1821        CMinMaxMode_SKIP \
1822            "arrow::compute::MinMaxOptions::SKIP"
1823        CMinMaxMode_EMIT_NULL \
1824            "arrow::compute::MinMaxOptions::EMIT_NULL"
1825
1826    cdef cppclass CMinMaxOptions \
1827            "arrow::compute::MinMaxOptions"(CFunctionOptions):
1828        CMinMaxOptions(CMinMaxMode null_handling)
1829        CMinMaxMode null_handling
1830
1831    cdef cppclass CModeOptions \
1832            "arrow::compute::ModeOptions"(CFunctionOptions):
1833        CModeOptions(int64_t n)
1834        int64_t n
1835
1836    enum CCountMode \
1837            "arrow::compute::CountOptions::Mode":
1838        CCountMode_COUNT_NON_NULL \
1839            "arrow::compute::CountOptions::COUNT_NON_NULL"
1840        CCountMode_COUNT_NULL \
1841            "arrow::compute::CountOptions::COUNT_NULL"
1842
1843    cdef cppclass CCountOptions \
1844            "arrow::compute::CountOptions"(CFunctionOptions):
1845        CCountOptions(CCountMode count_mode)
1846        CCountMode count_mode
1847
1848    cdef cppclass CPartitionNthOptions \
1849            "arrow::compute::PartitionNthOptions"(CFunctionOptions):
1850        CPartitionNthOptions(int64_t pivot)
1851        int64_t pivot
1852
1853    cdef cppclass CProjectOptions \
1854            "arrow::compute::ProjectOptions"(CFunctionOptions):
1855        CProjectOptions(vector[c_string] field_names)
1856        vector[c_string] field_names
1857
1858    ctypedef enum CSortOrder" arrow::compute::SortOrder":
1859        CSortOrder_Ascending \
1860            "arrow::compute::SortOrder::Ascending"
1861        CSortOrder_Descending \
1862            "arrow::compute::SortOrder::Descending"
1863
1864    cdef cppclass CArraySortOptions \
1865            "arrow::compute::ArraySortOptions"(CFunctionOptions):
1866        CArraySortOptions(CSortOrder order)
1867        CSortOrder order
1868
1869    cdef cppclass CSortKey" arrow::compute::SortKey":
1870        CSortKey(c_string name, CSortOrder order)
1871        c_string name
1872        CSortOrder order
1873
1874    cdef cppclass CSortOptions \
1875            "arrow::compute::SortOptions"(CFunctionOptions):
1876        CSortOptions(vector[CSortKey] sort_keys)
1877        vector[CSortKey] sort_keys
1878
1879    enum CQuantileInterp \
1880            "arrow::compute::QuantileOptions::Interpolation":
1881        CQuantileInterp_LINEAR   "arrow::compute::QuantileOptions::LINEAR"
1882        CQuantileInterp_LOWER    "arrow::compute::QuantileOptions::LOWER"
1883        CQuantileInterp_HIGHER   "arrow::compute::QuantileOptions::HIGHER"
1884        CQuantileInterp_NEAREST  "arrow::compute::QuantileOptions::NEAREST"
1885        CQuantileInterp_MIDPOINT "arrow::compute::QuantileOptions::MIDPOINT"
1886
1887    cdef cppclass CQuantileOptions \
1888            "arrow::compute::QuantileOptions"(CFunctionOptions):
1889        CQuantileOptions(vector[double] q, CQuantileInterp interpolation)
1890        vector[double] q
1891        CQuantileInterp interpolation
1892
1893    cdef cppclass CTDigestOptions \
1894            "arrow::compute::TDigestOptions"(CFunctionOptions):
1895        CTDigestOptions(vector[double] q,
1896                        unsigned int delta, unsigned int buffer_size)
1897        vector[double] q
1898        unsigned int delta
1899        unsigned int buffer_size
1900
1901    enum DatumType" arrow::Datum::type":
1902        DatumType_NONE" arrow::Datum::NONE"
1903        DatumType_SCALAR" arrow::Datum::SCALAR"
1904        DatumType_ARRAY" arrow::Datum::ARRAY"
1905        DatumType_CHUNKED_ARRAY" arrow::Datum::CHUNKED_ARRAY"
1906        DatumType_RECORD_BATCH" arrow::Datum::RECORD_BATCH"
1907        DatumType_TABLE" arrow::Datum::TABLE"
1908        DatumType_COLLECTION" arrow::Datum::COLLECTION"
1909
1910    cdef cppclass CDatum" arrow::Datum":
1911        CDatum()
1912        CDatum(const shared_ptr[CArray]& value)
1913        CDatum(const shared_ptr[CChunkedArray]& value)
1914        CDatum(const shared_ptr[CScalar]& value)
1915        CDatum(const shared_ptr[CRecordBatch]& value)
1916        CDatum(const shared_ptr[CTable]& value)
1917
1918        DatumType kind() const
1919        c_string ToString() const
1920
1921        const shared_ptr[CArrayData]& array() const
1922        const shared_ptr[CChunkedArray]& chunked_array() const
1923        const shared_ptr[CRecordBatch]& record_batch() const
1924        const shared_ptr[CTable]& table() const
1925        const shared_ptr[CScalar]& scalar() const
1926
1927    cdef cppclass CSetLookupOptions \
1928            "arrow::compute::SetLookupOptions"(CFunctionOptions):
1929        CSetLookupOptions(CDatum value_set, c_bool skip_nulls)
1930        CDatum value_set
1931        c_bool skip_nulls
1932
1933
1934cdef extern from "arrow/python/api.h" namespace "arrow::py":
1935    # Requires GIL
1936    CResult[shared_ptr[CDataType]] InferArrowType(
1937        object obj, object mask, c_bool pandas_null_sentinels)
1938
1939
1940cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
1941    shared_ptr[CDataType] GetPrimitiveType(Type type)
1942
1943    object PyHalf_FromHalf(npy_half value)
1944
1945    cdef cppclass PyConversionOptions:
1946        PyConversionOptions()
1947
1948        shared_ptr[CDataType] type
1949        int64_t size
1950        CMemoryPool* pool
1951        c_bool from_pandas
1952        c_bool ignore_timezone
1953        c_bool strict
1954
1955    # TODO Some functions below are not actually "nogil"
1956
1957    CResult[shared_ptr[CChunkedArray]] ConvertPySequence(
1958        object obj, object mask, const PyConversionOptions& options,
1959        CMemoryPool* pool)
1960
1961    CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
1962
1963    CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
1964                           c_bool from_pandas,
1965                           const shared_ptr[CDataType]& type,
1966                           shared_ptr[CChunkedArray]* out)
1967
1968    CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
1969                           c_bool from_pandas,
1970                           const shared_ptr[CDataType]& type,
1971                           const CCastOptions& cast_options,
1972                           shared_ptr[CChunkedArray]* out)
1973
1974    CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
1975                            const vector[c_string]& dim_names,
1976                            shared_ptr[CTensor]* out)
1977
1978    CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
1979                            PyObject** out)
1980
1981    CStatus SparseCOOTensorToNdarray(
1982        const shared_ptr[CSparseCOOTensor]& sparse_tensor, object base,
1983        PyObject** out_data, PyObject** out_coords)
1984
1985    CStatus SparseCSRMatrixToNdarray(
1986        const shared_ptr[CSparseCSRMatrix]& sparse_tensor, object base,
1987        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
1988
1989    CStatus SparseCSCMatrixToNdarray(
1990        const shared_ptr[CSparseCSCMatrix]& sparse_tensor, object base,
1991        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
1992
1993    CStatus SparseCSFTensorToNdarray(
1994        const shared_ptr[CSparseCSFTensor]& sparse_tensor, object base,
1995        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
1996
1997    CStatus NdarraysToSparseCOOTensor(CMemoryPool* pool, object data_ao,
1998                                      object coords_ao,
1999                                      const vector[int64_t]& shape,
2000                                      const vector[c_string]& dim_names,
2001                                      shared_ptr[CSparseCOOTensor]* out)
2002
2003    CStatus NdarraysToSparseCSRMatrix(CMemoryPool* pool, object data_ao,
2004                                      object indptr_ao, object indices_ao,
2005                                      const vector[int64_t]& shape,
2006                                      const vector[c_string]& dim_names,
2007                                      shared_ptr[CSparseCSRMatrix]* out)
2008
2009    CStatus NdarraysToSparseCSCMatrix(CMemoryPool* pool, object data_ao,
2010                                      object indptr_ao, object indices_ao,
2011                                      const vector[int64_t]& shape,
2012                                      const vector[c_string]& dim_names,
2013                                      shared_ptr[CSparseCSCMatrix]* out)
2014
2015    CStatus NdarraysToSparseCSFTensor(CMemoryPool* pool, object data_ao,
2016                                      object indptr_ao, object indices_ao,
2017                                      const vector[int64_t]& shape,
2018                                      const vector[int64_t]& axis_order,
2019                                      const vector[c_string]& dim_names,
2020                                      shared_ptr[CSparseCSFTensor]* out)
2021
2022    CStatus TensorToSparseCOOTensor(shared_ptr[CTensor],
2023                                    shared_ptr[CSparseCOOTensor]* out)
2024
2025    CStatus TensorToSparseCSRMatrix(shared_ptr[CTensor],
2026                                    shared_ptr[CSparseCSRMatrix]* out)
2027
2028    CStatus TensorToSparseCSCMatrix(shared_ptr[CTensor],
2029                                    shared_ptr[CSparseCSCMatrix]* out)
2030
2031    CStatus TensorToSparseCSFTensor(shared_ptr[CTensor],
2032                                    shared_ptr[CSparseCSFTensor]* out)
2033
2034    CStatus ConvertArrayToPandas(const PandasOptions& options,
2035                                 shared_ptr[CArray] arr,
2036                                 object py_ref, PyObject** out)
2037
2038    CStatus ConvertChunkedArrayToPandas(const PandasOptions& options,
2039                                        shared_ptr[CChunkedArray] arr,
2040                                        object py_ref, PyObject** out)
2041
2042    CStatus ConvertTableToPandas(const PandasOptions& options,
2043                                 shared_ptr[CTable] table,
2044                                 PyObject** out)
2045
2046    void c_set_default_memory_pool \
2047        " arrow::py::set_default_memory_pool"(CMemoryPool* pool)\
2048
2049    CMemoryPool* c_get_memory_pool \
2050        " arrow::py::get_memory_pool"()
2051
2052    cdef cppclass PyBuffer(CBuffer):
2053        @staticmethod
2054        CResult[shared_ptr[CBuffer]] FromPyObject(object obj)
2055
2056    cdef cppclass PyForeignBuffer(CBuffer):
2057        @staticmethod
2058        CStatus Make(const uint8_t* data, int64_t size, object base,
2059                     shared_ptr[CBuffer]* out)
2060
2061    cdef cppclass PyReadableFile(CRandomAccessFile):
2062        PyReadableFile(object fo)
2063
2064    cdef cppclass PyOutputStream(COutputStream):
2065        PyOutputStream(object fo)
2066
2067    cdef cppclass PandasOptions:
2068        CMemoryPool* pool
2069        c_bool strings_to_categorical
2070        c_bool zero_copy_only
2071        c_bool integer_object_nulls
2072        c_bool date_as_object
2073        c_bool timestamp_as_object
2074        c_bool use_threads
2075        c_bool coerce_temporal_nanoseconds
2076        c_bool ignore_timezone
2077        c_bool deduplicate_objects
2078        c_bool safe_cast
2079        c_bool split_blocks
2080        c_bool self_destruct
2081        unordered_set[c_string] categorical_columns
2082        unordered_set[c_string] extension_columns
2083
2084    cdef cppclass CSerializedPyObject" arrow::py::SerializedPyObject":
2085        shared_ptr[CRecordBatch] batch
2086        vector[shared_ptr[CTensor]] tensors
2087
2088        CStatus WriteTo(COutputStream* dst)
2089        CStatus GetComponents(CMemoryPool* pool, PyObject** dst)
2090
2091    CStatus SerializeObject(object context, object sequence,
2092                            CSerializedPyObject* out)
2093
2094    CStatus DeserializeObject(object context,
2095                              const CSerializedPyObject& obj,
2096                              PyObject* base, PyObject** out)
2097
2098    CStatus ReadSerializedObject(CRandomAccessFile* src,
2099                                 CSerializedPyObject* out)
2100
2101    cdef cppclass SparseTensorCounts:
2102        SparseTensorCounts()
2103        int coo
2104        int csr
2105        int csc
2106        int csf
2107        int ndim_csf
2108        int num_total_tensors() const
2109        int num_total_buffers() const
2110
2111    CStatus GetSerializedFromComponents(
2112        int num_tensors,
2113        const SparseTensorCounts& num_sparse_tensors,
2114        int num_ndarrays,
2115        int num_buffers,
2116        object buffers,
2117        CSerializedPyObject* out)
2118
2119
2120cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
2121    cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
2122        pass
2123
2124    CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime)
2125    int64_t TimePoint_to_ns(CTimePoint val)
2126    CTimePoint TimePoint_from_s(double val)
2127    CTimePoint TimePoint_from_ns(int64_t val)
2128
2129    CResult[c_string] TzinfoToString(PyObject* pytzinfo)
2130    CResult[PyObject*] StringToTzinfo(c_string)
2131
2132
2133cdef extern from 'arrow/python/init.h':
2134    int arrow_init_numpy() except -1
2135
2136
2137cdef extern from 'arrow/python/pyarrow.h' namespace 'arrow::py':
2138    int import_pyarrow() except -1
2139
2140
2141cdef extern from 'arrow/python/common.h' namespace "arrow::py":
2142    c_bool IsPyError(const CStatus& status)
2143    void RestorePyError(const CStatus& status)
2144
2145
2146cdef extern from 'arrow/python/inference.h' namespace 'arrow::py':
2147    c_bool IsPyBool(object o)
2148    c_bool IsPyInt(object o)
2149    c_bool IsPyFloat(object o)
2150
2151
2152cdef extern from 'arrow/python/ipc.h' namespace 'arrow::py':
2153    cdef cppclass CPyRecordBatchReader" arrow::py::PyRecordBatchReader" \
2154            (CRecordBatchReader):
2155        @staticmethod
2156        CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CSchema],
2157                                                     object)
2158
2159
2160cdef extern from 'arrow/extension_type.h' namespace 'arrow':
2161    cdef cppclass CExtensionTypeRegistry" arrow::ExtensionTypeRegistry":
2162        @staticmethod
2163        shared_ptr[CExtensionTypeRegistry] GetGlobalRegistry()
2164
2165    cdef cppclass CExtensionType" arrow::ExtensionType"(CDataType):
2166        c_string extension_name()
2167        shared_ptr[CDataType] storage_type()
2168
2169    cdef cppclass CExtensionArray" arrow::ExtensionArray"(CArray):
2170        CExtensionArray(shared_ptr[CDataType], shared_ptr[CArray] storage)
2171
2172        shared_ptr[CArray] storage()
2173
2174
2175cdef extern from 'arrow/python/extension_type.h' namespace 'arrow::py':
2176    cdef cppclass CPyExtensionType \
2177            " arrow::py::PyExtensionType"(CExtensionType):
2178        @staticmethod
2179        CStatus FromClass(const shared_ptr[CDataType] storage_type,
2180                          const c_string extension_name, object typ,
2181                          shared_ptr[CExtensionType]* out)
2182
2183        @staticmethod
2184        CStatus FromInstance(shared_ptr[CDataType] storage_type,
2185                             object inst, shared_ptr[CExtensionType]* out)
2186
2187        object GetInstance()
2188        CStatus SetInstance(object)
2189
2190    c_string PyExtensionName()
2191    CStatus RegisterPyExtensionType(shared_ptr[CDataType])
2192    CStatus UnregisterPyExtensionType(c_string type_name)
2193
2194
2195cdef extern from 'arrow/python/benchmark.h' namespace 'arrow::py::benchmark':
2196    void Benchmark_PandasObjectIsNull(object lst) except *
2197
2198
2199cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
2200    enum CCompressionType" arrow::Compression::type":
2201        CCompressionType_UNCOMPRESSED" arrow::Compression::UNCOMPRESSED"
2202        CCompressionType_SNAPPY" arrow::Compression::SNAPPY"
2203        CCompressionType_GZIP" arrow::Compression::GZIP"
2204        CCompressionType_BROTLI" arrow::Compression::BROTLI"
2205        CCompressionType_ZSTD" arrow::Compression::ZSTD"
2206        CCompressionType_LZ4" arrow::Compression::LZ4"
2207        CCompressionType_LZ4_FRAME" arrow::Compression::LZ4_FRAME"
2208        CCompressionType_BZ2" arrow::Compression::BZ2"
2209
2210    cdef cppclass CCodec" arrow::util::Codec":
2211        @staticmethod
2212        CResult[unique_ptr[CCodec]] Create(CCompressionType codec)
2213
2214        @staticmethod
2215        c_bool IsAvailable(CCompressionType codec)
2216
2217        CResult[int64_t] Decompress(int64_t input_len, const uint8_t* input,
2218                                    int64_t output_len,
2219                                    uint8_t* output_buffer)
2220        CResult[int64_t] Compress(int64_t input_len, const uint8_t* input,
2221                                  int64_t output_buffer_len,
2222                                  uint8_t* output_buffer)
2223        c_string name() const
2224        int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input)
2225
2226
2227cdef extern from 'arrow/util/io_util.h' namespace 'arrow::internal' nogil:
2228    int ErrnoFromStatus(CStatus status)
2229    int WinErrorFromStatus(CStatus status)
2230
2231cdef extern from 'arrow/util/iterator.h' namespace 'arrow' nogil:
2232    cdef cppclass CIterator" arrow::Iterator"[T]:
2233        CResult[T] Next()
2234        CStatus Visit[Visitor](Visitor&& visitor)
2235        cppclass RangeIterator:
2236            CResult[T] operator*()
2237            RangeIterator& operator++()
2238            bint operator!=(RangeIterator) const
2239        RangeIterator begin()
2240        RangeIterator end()
2241    CIterator[T] MakeVectorIterator[T](vector[T] v)
2242
2243cdef extern from 'arrow/util/thread_pool.h' namespace 'arrow' nogil:
2244    int GetCpuThreadPoolCapacity()
2245    CStatus SetCpuThreadPoolCapacity(int threads)
2246
2247cdef extern from 'arrow/array/concatenate.h' namespace 'arrow' nogil:
2248    CResult[shared_ptr[CArray]] Concatenate(
2249        const vector[shared_ptr[CArray]]& arrays,
2250        CMemoryPool* pool)
2251
2252cdef extern from 'arrow/c/abi.h':
2253    cdef struct ArrowSchema:
2254        pass
2255
2256    cdef struct ArrowArray:
2257        pass
2258
2259    cdef struct ArrowArrayStream:
2260        pass
2261
2262cdef extern from 'arrow/c/bridge.h' namespace 'arrow' nogil:
2263    CStatus ExportType(CDataType&, ArrowSchema* out)
2264    CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*)
2265
2266    CStatus ExportSchema(CSchema&, ArrowSchema* out)
2267    CResult[shared_ptr[CSchema]] ImportSchema(ArrowSchema*)
2268
2269    CStatus ExportArray(CArray&, ArrowArray* out)
2270    CStatus ExportArray(CArray&, ArrowArray* out, ArrowSchema* out_schema)
2271    CResult[shared_ptr[CArray]] ImportArray(ArrowArray*,
2272                                            shared_ptr[CDataType])
2273    CResult[shared_ptr[CArray]] ImportArray(ArrowArray*, ArrowSchema*)
2274
2275    CStatus ExportRecordBatch(CRecordBatch&, ArrowArray* out)
2276    CStatus ExportRecordBatch(CRecordBatch&, ArrowArray* out,
2277                              ArrowSchema* out_schema)
2278    CResult[shared_ptr[CRecordBatch]] ImportRecordBatch(ArrowArray*,
2279                                                        shared_ptr[CSchema])
2280    CResult[shared_ptr[CRecordBatch]] ImportRecordBatch(ArrowArray*,
2281                                                        ArrowSchema*)
2282
2283    CStatus ExportRecordBatchReader(shared_ptr[CRecordBatchReader],
2284                                    ArrowArrayStream*)
2285    CResult[shared_ptr[CRecordBatchReader]] ImportRecordBatchReader(
2286        ArrowArrayStream*)
2287