1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18# cython: language_level = 3 19 20from cpython cimport PyObject 21from libcpp cimport nullptr 22from libcpp.cast cimport dynamic_cast 23from pyarrow.includes.common cimport * 24from pyarrow.includes.libarrow cimport * 25 26 27cdef extern from "Python.h": 28 int PySlice_Check(object) 29 30 31cdef int check_status(const CStatus& status) nogil except -1 32 33 34cdef class _Weakrefable: 35 cdef object __weakref__ 36 37 38cdef class IpcWriteOptions(_Weakrefable): 39 cdef: 40 CIpcWriteOptions c_options 41 42 43cdef class Message(_Weakrefable): 44 cdef: 45 unique_ptr[CMessage] message 46 47 48cdef class MemoryPool(_Weakrefable): 49 cdef: 50 CMemoryPool* pool 51 52 cdef void init(self, CMemoryPool* pool) 53 54 55cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool) 56 57 58cdef class DataType(_Weakrefable): 59 cdef: 60 shared_ptr[CDataType] sp_type 61 CDataType* type 62 bytes pep3118_format 63 64 cdef void init(self, const shared_ptr[CDataType]& type) except * 65 cdef Field field(self, int i) 66 67 68cdef class ListType(DataType): 69 cdef: 70 const CListType* list_type 71 72 73cdef class LargeListType(DataType): 74 cdef: 75 const CLargeListType* list_type 76 77 78cdef class MapType(DataType): 79 cdef: 80 const CMapType* map_type 81 82 83cdef class FixedSizeListType(DataType): 84 cdef: 85 const CFixedSizeListType* list_type 86 87 88cdef class StructType(DataType): 89 cdef: 90 const CStructType* struct_type 91 92 cdef Field field_by_name(self, name) 93 94 95cdef class DictionaryMemo(_Weakrefable): 96 cdef: 97 # Even though the CDictionaryMemo instance is private, we allocate 98 # it on the heap so as to avoid C++ ABI issues with Python wheels. 99 shared_ptr[CDictionaryMemo] sp_memo 100 CDictionaryMemo* memo 101 102 103cdef class DictionaryType(DataType): 104 cdef: 105 const CDictionaryType* dict_type 106 107 108cdef class TimestampType(DataType): 109 cdef: 110 const CTimestampType* ts_type 111 112 113cdef class Time32Type(DataType): 114 cdef: 115 const CTime32Type* time_type 116 117 118cdef class Time64Type(DataType): 119 cdef: 120 const CTime64Type* time_type 121 122 123cdef class DurationType(DataType): 124 cdef: 125 const CDurationType* duration_type 126 127 128cdef class FixedSizeBinaryType(DataType): 129 cdef: 130 const CFixedSizeBinaryType* fixed_size_binary_type 131 132 133cdef class Decimal128Type(FixedSizeBinaryType): 134 cdef: 135 const CDecimal128Type* decimal128_type 136 137 138cdef class Decimal256Type(FixedSizeBinaryType): 139 cdef: 140 const CDecimal256Type* decimal256_type 141 142 143cdef class BaseExtensionType(DataType): 144 cdef: 145 const CExtensionType* ext_type 146 147 148cdef class ExtensionType(BaseExtensionType): 149 cdef: 150 const CPyExtensionType* cpy_ext_type 151 152 153cdef class PyExtensionType(ExtensionType): 154 pass 155 156 157cdef class _Metadata(_Weakrefable): 158 # required because KeyValueMetadata also extends collections.abc.Mapping 159 # and the first parent class must be an extension type 160 pass 161 162 163cdef class KeyValueMetadata(_Metadata): 164 cdef: 165 shared_ptr[const CKeyValueMetadata] wrapped 166 const CKeyValueMetadata* metadata 167 168 cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped) 169 170 @staticmethod 171 cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp) 172 cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil 173 174 175cdef class Field(_Weakrefable): 176 cdef: 177 shared_ptr[CField] sp_field 178 CField* field 179 180 cdef readonly: 181 DataType type 182 183 cdef void init(self, const shared_ptr[CField]& field) 184 185 186cdef class Schema(_Weakrefable): 187 cdef: 188 shared_ptr[CSchema] sp_schema 189 CSchema* schema 190 191 cdef void init(self, const vector[shared_ptr[CField]]& fields) 192 cdef void init_schema(self, const shared_ptr[CSchema]& schema) 193 194 195cdef class Scalar(_Weakrefable): 196 cdef: 197 shared_ptr[CScalar] wrapped 198 199 cdef void init(self, const shared_ptr[CScalar]& wrapped) 200 201 @staticmethod 202 cdef wrap(const shared_ptr[CScalar]& wrapped) 203 204 cdef inline shared_ptr[CScalar] unwrap(self) nogil 205 206 207cdef class _PandasConvertible(_Weakrefable): 208 pass 209 210 211cdef class Array(_PandasConvertible): 212 cdef: 213 shared_ptr[CArray] sp_array 214 CArray* ap 215 216 cdef readonly: 217 DataType type 218 # To allow Table to propagate metadata to pandas.Series 219 object _name 220 221 cdef void init(self, const shared_ptr[CArray]& sp_array) except * 222 cdef getitem(self, int64_t i) 223 cdef int64_t length(self) 224 225 226cdef class Tensor(_Weakrefable): 227 cdef: 228 shared_ptr[CTensor] sp_tensor 229 CTensor* tp 230 231 cdef readonly: 232 DataType type 233 234 cdef void init(self, const shared_ptr[CTensor]& sp_tensor) 235 236 237cdef class SparseCSRMatrix(_Weakrefable): 238 cdef: 239 shared_ptr[CSparseCSRMatrix] sp_sparse_tensor 240 CSparseCSRMatrix* stp 241 242 cdef readonly: 243 DataType type 244 245 cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor) 246 247 248cdef class SparseCSCMatrix(_Weakrefable): 249 cdef: 250 shared_ptr[CSparseCSCMatrix] sp_sparse_tensor 251 CSparseCSCMatrix* stp 252 253 cdef readonly: 254 DataType type 255 256 cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor) 257 258 259cdef class SparseCOOTensor(_Weakrefable): 260 cdef: 261 shared_ptr[CSparseCOOTensor] sp_sparse_tensor 262 CSparseCOOTensor* stp 263 264 cdef readonly: 265 DataType type 266 267 cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor) 268 269 270cdef class SparseCSFTensor(_Weakrefable): 271 cdef: 272 shared_ptr[CSparseCSFTensor] sp_sparse_tensor 273 CSparseCSFTensor* stp 274 275 cdef readonly: 276 DataType type 277 278 cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor) 279 280 281cdef class NullArray(Array): 282 pass 283 284 285cdef class BooleanArray(Array): 286 pass 287 288 289cdef class NumericArray(Array): 290 pass 291 292 293cdef class IntegerArray(NumericArray): 294 pass 295 296 297cdef class FloatingPointArray(NumericArray): 298 pass 299 300 301cdef class Int8Array(IntegerArray): 302 pass 303 304 305cdef class UInt8Array(IntegerArray): 306 pass 307 308 309cdef class Int16Array(IntegerArray): 310 pass 311 312 313cdef class UInt16Array(IntegerArray): 314 pass 315 316 317cdef class Int32Array(IntegerArray): 318 pass 319 320 321cdef class UInt32Array(IntegerArray): 322 pass 323 324 325cdef class Int64Array(IntegerArray): 326 pass 327 328 329cdef class UInt64Array(IntegerArray): 330 pass 331 332 333cdef class HalfFloatArray(FloatingPointArray): 334 pass 335 336 337cdef class FloatArray(FloatingPointArray): 338 pass 339 340 341cdef class DoubleArray(FloatingPointArray): 342 pass 343 344 345cdef class FixedSizeBinaryArray(Array): 346 pass 347 348 349cdef class Decimal128Array(FixedSizeBinaryArray): 350 pass 351 352 353cdef class Decimal256Array(FixedSizeBinaryArray): 354 pass 355 356 357cdef class StructArray(Array): 358 pass 359 360 361cdef class BaseListArray(Array): 362 pass 363 364 365cdef class ListArray(BaseListArray): 366 pass 367 368 369cdef class LargeListArray(BaseListArray): 370 pass 371 372 373cdef class MapArray(Array): 374 pass 375 376 377cdef class FixedSizeListArray(Array): 378 pass 379 380 381cdef class UnionArray(Array): 382 pass 383 384 385cdef class StringArray(Array): 386 pass 387 388 389cdef class BinaryArray(Array): 390 pass 391 392 393cdef class DictionaryArray(Array): 394 cdef: 395 object _indices, _dictionary 396 397 398cdef class ExtensionArray(Array): 399 pass 400 401 402cdef wrap_array_output(PyObject* output) 403cdef wrap_datum(const CDatum& datum) 404 405 406cdef class ChunkedArray(_PandasConvertible): 407 cdef: 408 shared_ptr[CChunkedArray] sp_chunked_array 409 CChunkedArray* chunked_array 410 411 cdef readonly: 412 # To allow Table to propagate metadata to pandas.Series 413 object _name 414 415 cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array) 416 cdef getitem(self, int64_t i) 417 418 419cdef class Table(_PandasConvertible): 420 cdef: 421 shared_ptr[CTable] sp_table 422 CTable* table 423 424 cdef void init(self, const shared_ptr[CTable]& table) 425 426 427cdef class RecordBatch(_PandasConvertible): 428 cdef: 429 shared_ptr[CRecordBatch] sp_batch 430 CRecordBatch* batch 431 Schema _schema 432 433 cdef void init(self, const shared_ptr[CRecordBatch]& table) 434 435 436cdef class Buffer(_Weakrefable): 437 cdef: 438 shared_ptr[CBuffer] buffer 439 Py_ssize_t shape[1] 440 Py_ssize_t strides[1] 441 442 cdef void init(self, const shared_ptr[CBuffer]& buffer) 443 cdef getitem(self, int64_t i) 444 445 446cdef class ResizableBuffer(Buffer): 447 448 cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer) 449 450 451cdef class NativeFile(_Weakrefable): 452 cdef: 453 shared_ptr[CInputStream] input_stream 454 shared_ptr[CRandomAccessFile] random_access 455 shared_ptr[COutputStream] output_stream 456 bint is_readable 457 bint is_writable 458 bint is_seekable 459 bint own_file 460 461 # By implementing these "virtual" functions (all functions in Cython 462 # extension classes are technically virtual in the C++ sense) we can expose 463 # the arrow::io abstract file interfaces to other components throughout the 464 # suite of Arrow C++ libraries 465 cdef set_random_access_file(self, shared_ptr[CRandomAccessFile] handle) 466 cdef set_input_stream(self, shared_ptr[CInputStream] handle) 467 cdef set_output_stream(self, shared_ptr[COutputStream] handle) 468 469 cdef shared_ptr[CRandomAccessFile] get_random_access_file(self) except * 470 cdef shared_ptr[CInputStream] get_input_stream(self) except * 471 cdef shared_ptr[COutputStream] get_output_stream(self) except * 472 473 474cdef class BufferedInputStream(NativeFile): 475 pass 476 477 478cdef class BufferedOutputStream(NativeFile): 479 pass 480 481 482cdef class CompressedInputStream(NativeFile): 483 pass 484 485 486cdef class CompressedOutputStream(NativeFile): 487 pass 488 489 490cdef class _CRecordBatchWriter(_Weakrefable): 491 cdef: 492 shared_ptr[CRecordBatchWriter] writer 493 494 495cdef class RecordBatchReader(_Weakrefable): 496 cdef: 497 shared_ptr[CRecordBatchReader] reader 498 499 500cdef class Codec(_Weakrefable): 501 cdef: 502 unique_ptr[CCodec] wrapped 503 504 cdef inline CCodec* unwrap(self) nogil 505 506 507cdef get_input_stream(object source, c_bool use_memory_map, 508 shared_ptr[CInputStream]* reader) 509cdef get_reader(object source, c_bool use_memory_map, 510 shared_ptr[CRandomAccessFile]* reader) 511cdef get_writer(object source, shared_ptr[COutputStream]* writer) 512cdef NativeFile get_native_file(object source, c_bool use_memory_map) 513 514cdef shared_ptr[CInputStream] native_transcoding_input_stream( 515 shared_ptr[CInputStream] stream, src_encoding, 516 dest_encoding) except * 517 518# Default is allow_none=False 519cpdef DataType ensure_type(object type, bint allow_none=*) 520 521# Exceptions may be raised when converting dict values, so need to 522# check exception state on return 523cdef shared_ptr[CKeyValueMetadata] pyarrow_unwrap_metadata(object meta) \ 524 except * 525cdef object pyarrow_wrap_metadata( 526 const shared_ptr[const CKeyValueMetadata]& meta) 527 528# 529# Public Cython API for 3rd party code 530# 531 532cdef public object pyarrow_wrap_scalar(const shared_ptr[CScalar]& sp_scalar) 533cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array) 534cdef public object pyarrow_wrap_chunked_array( 535 const shared_ptr[CChunkedArray]& sp_array) 536cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch) 537cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf) 538cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type) 539cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field) 540cdef public object pyarrow_wrap_resizable_buffer( 541 const shared_ptr[CResizableBuffer]& buf) 542cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type) 543cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable) 544cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor) 545cdef public object pyarrow_wrap_sparse_coo_tensor( 546 const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor) 547cdef public object pyarrow_wrap_sparse_csr_matrix( 548 const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor) 549cdef public object pyarrow_wrap_sparse_csc_matrix( 550 const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor) 551cdef public object pyarrow_wrap_sparse_csf_tensor( 552 const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor) 553 554cdef public shared_ptr[CScalar] pyarrow_unwrap_scalar(object scalar) 555cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array) 556cdef public shared_ptr[CChunkedArray] pyarrow_unwrap_chunked_array( 557 object array) 558cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch) 559cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer) 560cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type) 561cdef public shared_ptr[CField] pyarrow_unwrap_field(object field) 562cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema) 563cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table) 564cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor) 565cdef public shared_ptr[CSparseCOOTensor] pyarrow_unwrap_sparse_coo_tensor( 566 object sparse_tensor) 567cdef public shared_ptr[CSparseCSRMatrix] pyarrow_unwrap_sparse_csr_matrix( 568 object sparse_tensor) 569cdef public shared_ptr[CSparseCSCMatrix] pyarrow_unwrap_sparse_csc_matrix( 570 object sparse_tensor) 571cdef public shared_ptr[CSparseCSFTensor] pyarrow_unwrap_sparse_csf_tensor( 572 object sparse_tensor) 573