1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18import collections 19 20 21cdef class Scalar(_Weakrefable): 22 """ 23 The base class for scalars. 24 """ 25 26 def __init__(self): 27 raise TypeError("Do not call {}'s constructor directly, use " 28 "pa.scalar() instead.".format(self.__class__.__name__)) 29 30 cdef void init(self, const shared_ptr[CScalar]& wrapped): 31 self.wrapped = wrapped 32 33 @staticmethod 34 cdef wrap(const shared_ptr[CScalar]& wrapped): 35 cdef: 36 Scalar self 37 Type type_id = wrapped.get().type.get().id() 38 39 if type_id == _Type_NA: 40 return _NULL 41 42 typ = _scalar_classes[type_id] 43 self = typ.__new__(typ) 44 self.init(wrapped) 45 46 return self 47 48 cdef inline shared_ptr[CScalar] unwrap(self) nogil: 49 return self.wrapped 50 51 @property 52 def type(self): 53 """ 54 Data type of the Scalar object. 55 """ 56 return pyarrow_wrap_data_type(self.wrapped.get().type) 57 58 @property 59 def is_valid(self): 60 """ 61 Holds a valid (non-null) value. 62 """ 63 return self.wrapped.get().is_valid 64 65 def cast(self, object target_type): 66 """ 67 Attempt a safe cast to target data type. 68 """ 69 cdef: 70 DataType type = ensure_type(target_type) 71 shared_ptr[CScalar] result 72 73 with nogil: 74 result = GetResultValue(self.wrapped.get().CastTo(type.sp_type)) 75 76 return Scalar.wrap(result) 77 78 def __repr__(self): 79 return '<pyarrow.{}: {!r}>'.format( 80 self.__class__.__name__, self.as_py() 81 ) 82 83 def __str__(self): 84 return str(self.as_py()) 85 86 def equals(self, Scalar other): 87 return self.wrapped.get().Equals(other.unwrap().get()[0]) 88 89 def __eq__(self, other): 90 try: 91 return self.equals(other) 92 except TypeError: 93 return NotImplemented 94 95 def __hash__(self): 96 cdef CScalarHash hasher 97 return hasher(self.wrapped) 98 99 def __reduce__(self): 100 return scalar, (self.as_py(), self.type) 101 102 def as_py(self): 103 raise NotImplementedError() 104 105 106_NULL = NA = None 107 108 109cdef class NullScalar(Scalar): 110 """ 111 Concrete class for null scalars. 112 """ 113 114 def __cinit__(self): 115 global NA 116 if NA is not None: 117 raise RuntimeError('Cannot create multiple NullScalar instances') 118 self.init(shared_ptr[CScalar](new CNullScalar())) 119 120 def __init__(self): 121 pass 122 123 def as_py(self): 124 """ 125 Return this value as a Python None. 126 """ 127 return None 128 129 130_NULL = NA = NullScalar() 131 132 133cdef class BooleanScalar(Scalar): 134 """ 135 Concrete class for boolean scalars. 136 """ 137 138 def as_py(self): 139 """ 140 Return this value as a Python bool. 141 """ 142 cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get() 143 return sp.value if sp.is_valid else None 144 145 146cdef class UInt8Scalar(Scalar): 147 """ 148 Concrete class for uint8 scalars. 149 """ 150 151 def as_py(self): 152 """ 153 Return this value as a Python int. 154 """ 155 cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get() 156 return sp.value if sp.is_valid else None 157 158 159cdef class Int8Scalar(Scalar): 160 """ 161 Concrete class for int8 scalars. 162 """ 163 164 def as_py(self): 165 """ 166 Return this value as a Python int. 167 """ 168 cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get() 169 return sp.value if sp.is_valid else None 170 171 172cdef class UInt16Scalar(Scalar): 173 """ 174 Concrete class for uint16 scalars. 175 """ 176 177 def as_py(self): 178 """ 179 Return this value as a Python int. 180 """ 181 cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get() 182 return sp.value if sp.is_valid else None 183 184 185cdef class Int16Scalar(Scalar): 186 """ 187 Concrete class for int16 scalars. 188 """ 189 190 def as_py(self): 191 """ 192 Return this value as a Python int. 193 """ 194 cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get() 195 return sp.value if sp.is_valid else None 196 197 198cdef class UInt32Scalar(Scalar): 199 """ 200 Concrete class for uint32 scalars. 201 """ 202 203 def as_py(self): 204 """ 205 Return this value as a Python int. 206 """ 207 cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get() 208 return sp.value if sp.is_valid else None 209 210 211cdef class Int32Scalar(Scalar): 212 """ 213 Concrete class for int32 scalars. 214 """ 215 216 def as_py(self): 217 """ 218 Return this value as a Python int. 219 """ 220 cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get() 221 return sp.value if sp.is_valid else None 222 223 224cdef class UInt64Scalar(Scalar): 225 """ 226 Concrete class for uint64 scalars. 227 """ 228 229 def as_py(self): 230 """ 231 Return this value as a Python int. 232 """ 233 cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get() 234 return sp.value if sp.is_valid else None 235 236 237cdef class Int64Scalar(Scalar): 238 """ 239 Concrete class for int64 scalars. 240 """ 241 242 def as_py(self): 243 """ 244 Return this value as a Python int. 245 """ 246 cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get() 247 return sp.value if sp.is_valid else None 248 249 250cdef class HalfFloatScalar(Scalar): 251 """ 252 Concrete class for float scalars. 253 """ 254 255 def as_py(self): 256 """ 257 Return this value as a Python float. 258 """ 259 cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get() 260 return PyHalf_FromHalf(sp.value) if sp.is_valid else None 261 262 263cdef class FloatScalar(Scalar): 264 """ 265 Concrete class for float scalars. 266 """ 267 268 def as_py(self): 269 """ 270 Return this value as a Python float. 271 """ 272 cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get() 273 return sp.value if sp.is_valid else None 274 275 276cdef class DoubleScalar(Scalar): 277 """ 278 Concrete class for double scalars. 279 """ 280 281 def as_py(self): 282 """ 283 Return this value as a Python float. 284 """ 285 cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get() 286 return sp.value if sp.is_valid else None 287 288 289cdef class Decimal128Scalar(Scalar): 290 """ 291 Concrete class for decimal128 scalars. 292 """ 293 294 def as_py(self): 295 """ 296 Return this value as a Python Decimal. 297 """ 298 cdef: 299 CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get() 300 CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get() 301 if sp.is_valid: 302 return _pydecimal.Decimal( 303 frombytes(sp.value.ToString(dtype.scale())) 304 ) 305 else: 306 return None 307 308 309cdef class Decimal256Scalar(Scalar): 310 """ 311 Concrete class for decimal256 scalars. 312 """ 313 314 def as_py(self): 315 """ 316 Return this value as a Python Decimal. 317 """ 318 cdef: 319 CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get() 320 CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get() 321 if sp.is_valid: 322 return _pydecimal.Decimal( 323 frombytes(sp.value.ToString(dtype.scale())) 324 ) 325 else: 326 return None 327 328 329cdef class Date32Scalar(Scalar): 330 """ 331 Concrete class for date32 scalars. 332 """ 333 334 def as_py(self): 335 """ 336 Return this value as a Python datetime.datetime instance. 337 """ 338 cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get() 339 340 if sp.is_valid: 341 # shift to seconds since epoch 342 return ( 343 datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value) 344 ) 345 else: 346 return None 347 348 349cdef class Date64Scalar(Scalar): 350 """ 351 Concrete class for date64 scalars. 352 """ 353 354 def as_py(self): 355 """ 356 Return this value as a Python datetime.datetime instance. 357 """ 358 cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get() 359 360 if sp.is_valid: 361 return ( 362 datetime.date(1970, 1, 1) + 363 datetime.timedelta(days=sp.value / 86400000) 364 ) 365 else: 366 return None 367 368 369def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None): 370 if unit == TimeUnit_SECOND: 371 delta = datetime.timedelta(seconds=value) 372 elif unit == TimeUnit_MILLI: 373 delta = datetime.timedelta(milliseconds=value) 374 elif unit == TimeUnit_MICRO: 375 delta = datetime.timedelta(microseconds=value) 376 else: 377 # TimeUnit_NANO: prefer pandas timestamps if available 378 if _pandas_api.have_pandas: 379 return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns') 380 # otherwise safely truncate to microsecond resolution datetime 381 if value % 1000 != 0: 382 raise ValueError( 383 "Nanosecond resolution temporal type {} is not safely " 384 "convertible to microseconds to convert to datetime.datetime. " 385 "Install pandas to return as Timestamp with nanosecond " 386 "support or access the .value attribute.".format(value) 387 ) 388 delta = datetime.timedelta(microseconds=value // 1000) 389 390 dt = datetime.datetime(1970, 1, 1) + delta 391 # adjust timezone if set to the datatype 392 if tzinfo is not None: 393 dt = tzinfo.fromutc(dt) 394 395 return dt 396 397 398cdef class Time32Scalar(Scalar): 399 """ 400 Concrete class for time32 scalars. 401 """ 402 403 def as_py(self): 404 """ 405 Return this value as a Python datetime.timedelta instance. 406 """ 407 cdef: 408 CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get() 409 CTime32Type* dtype = <CTime32Type*> sp.type.get() 410 411 if sp.is_valid: 412 return _datetime_from_int(sp.value, unit=dtype.unit()).time() 413 else: 414 return None 415 416 417cdef class Time64Scalar(Scalar): 418 """ 419 Concrete class for time64 scalars. 420 """ 421 422 def as_py(self): 423 """ 424 Return this value as a Python datetime.timedelta instance. 425 """ 426 cdef: 427 CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get() 428 CTime64Type* dtype = <CTime64Type*> sp.type.get() 429 430 if sp.is_valid: 431 return _datetime_from_int(sp.value, unit=dtype.unit()).time() 432 else: 433 return None 434 435 436cdef class TimestampScalar(Scalar): 437 """ 438 Concrete class for timestamp scalars. 439 """ 440 441 @property 442 def value(self): 443 cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() 444 return sp.value if sp.is_valid else None 445 446 def as_py(self): 447 """ 448 Return this value as a Pandas Timestamp instance (if available), 449 otherwise as a Python datetime.timedelta instance. 450 """ 451 cdef: 452 CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() 453 CTimestampType* dtype = <CTimestampType*> sp.type.get() 454 455 if not sp.is_valid: 456 return None 457 458 if not dtype.timezone().empty(): 459 tzinfo = string_to_tzinfo(frombytes(dtype.timezone())) 460 else: 461 tzinfo = None 462 463 return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo) 464 465 466cdef class DurationScalar(Scalar): 467 """ 468 Concrete class for duration scalars. 469 """ 470 471 @property 472 def value(self): 473 cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() 474 return sp.value if sp.is_valid else None 475 476 def as_py(self): 477 """ 478 Return this value as a Pandas Timestamp instance (if available), 479 otherwise as a Python datetime.timedelta instance. 480 """ 481 cdef: 482 CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() 483 CDurationType* dtype = <CDurationType*> sp.type.get() 484 TimeUnit unit = dtype.unit() 485 486 if not sp.is_valid: 487 return None 488 489 if unit == TimeUnit_SECOND: 490 return datetime.timedelta(seconds=sp.value) 491 elif unit == TimeUnit_MILLI: 492 return datetime.timedelta(milliseconds=sp.value) 493 elif unit == TimeUnit_MICRO: 494 return datetime.timedelta(microseconds=sp.value) 495 else: 496 # TimeUnit_NANO: prefer pandas timestamps if available 497 if _pandas_api.have_pandas: 498 return _pandas_api.pd.Timedelta(sp.value, unit='ns') 499 # otherwise safely truncate to microsecond resolution timedelta 500 if sp.value % 1000 != 0: 501 raise ValueError( 502 "Nanosecond duration {} is not safely convertible to " 503 "microseconds to convert to datetime.timedelta. Install " 504 "pandas to return as Timedelta with nanosecond support or " 505 "access the .value attribute.".format(sp.value) 506 ) 507 return datetime.timedelta(microseconds=sp.value // 1000) 508 509 510cdef class BinaryScalar(Scalar): 511 """ 512 Concrete class for binary-like scalars. 513 """ 514 515 def as_buffer(self): 516 """ 517 Return a view over this value as a Buffer object. 518 """ 519 cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get() 520 return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None 521 522 def as_py(self): 523 """ 524 Return this value as a Python bytes. 525 """ 526 buffer = self.as_buffer() 527 return None if buffer is None else buffer.to_pybytes() 528 529 530cdef class LargeBinaryScalar(BinaryScalar): 531 pass 532 533 534cdef class FixedSizeBinaryScalar(BinaryScalar): 535 pass 536 537 538cdef class StringScalar(BinaryScalar): 539 """ 540 Concrete class for string-like (utf8) scalars. 541 """ 542 543 def as_py(self): 544 """ 545 Return this value as a Python string. 546 """ 547 buffer = self.as_buffer() 548 return None if buffer is None else str(buffer, 'utf8') 549 550 551cdef class LargeStringScalar(StringScalar): 552 pass 553 554 555cdef class ListScalar(Scalar): 556 """ 557 Concrete class for list-like scalars. 558 """ 559 560 @property 561 def values(self): 562 cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get() 563 if sp.is_valid: 564 return pyarrow_wrap_array(sp.value) 565 else: 566 return None 567 568 def __len__(self): 569 """ 570 Return the number of values. 571 """ 572 return len(self.values) 573 574 def __getitem__(self, i): 575 """ 576 Return the value at the given index. 577 """ 578 return self.values[_normalize_index(i, len(self))] 579 580 def __iter__(self): 581 """ 582 Iterate over this element's values. 583 """ 584 return iter(self.values) 585 586 def as_py(self): 587 """ 588 Return this value as a Python list. 589 """ 590 arr = self.values 591 return None if arr is None else arr.to_pylist() 592 593 594cdef class FixedSizeListScalar(ListScalar): 595 pass 596 597 598cdef class LargeListScalar(ListScalar): 599 pass 600 601 602cdef class StructScalar(Scalar, collections.abc.Mapping): 603 """ 604 Concrete class for struct scalars. 605 """ 606 607 def __len__(self): 608 cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() 609 return sp.value.size() 610 611 def __iter__(self): 612 cdef: 613 CStructScalar* sp = <CStructScalar*> self.wrapped.get() 614 CStructType* dtype = <CStructType*> sp.type.get() 615 vector[shared_ptr[CField]] fields = dtype.fields() 616 617 if sp.is_valid: 618 for i in range(dtype.num_fields()): 619 yield frombytes(fields[i].get().name()) 620 621 def __contains__(self, key): 622 try: 623 self[key] 624 except (KeyError, IndexError): 625 return False 626 else: 627 return True 628 629 def __getitem__(self, key): 630 """ 631 Return the child value for the given field. 632 633 Parameters 634 ---------- 635 index : Union[int, str] 636 Index / position or name of the field. 637 638 Returns 639 ------- 640 result : Scalar 641 """ 642 cdef: 643 CFieldRef ref 644 CStructScalar* sp = <CStructScalar*> self.wrapped.get() 645 646 if isinstance(key, (bytes, str)): 647 ref = CFieldRef(<c_string> tobytes(key)) 648 elif isinstance(key, int): 649 ref = CFieldRef(<int> key) 650 else: 651 raise TypeError('Expected integer or string index') 652 653 try: 654 return Scalar.wrap(GetResultValue(sp.field(ref))) 655 except ArrowInvalid: 656 if isinstance(key, int): 657 raise IndexError(key) 658 else: 659 raise KeyError(key) 660 661 def as_py(self): 662 """ 663 Return this value as a Python dict. 664 """ 665 if self.is_valid: 666 return {k: v.as_py() for k, v in self.items()} 667 else: 668 return None 669 670 671cdef class MapScalar(ListScalar): 672 """ 673 Concrete class for map scalars. 674 """ 675 676 def __getitem__(self, i): 677 """ 678 Return the value at the given index. 679 """ 680 arr = self.values 681 if arr is None: 682 raise IndexError(i) 683 dct = arr[_normalize_index(i, len(arr))] 684 return (dct['key'], dct['value']) 685 686 def __iter__(self): 687 """ 688 Iterate over this element's values. 689 """ 690 arr = self.values 691 if array is None: 692 raise StopIteration 693 for k, v in zip(arr.field('key'), arr.field('value')): 694 yield (k.as_py(), v.as_py()) 695 696 def as_py(self): 697 """ 698 Return this value as a Python list. 699 """ 700 cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() 701 return list(self) if sp.is_valid else None 702 703 704cdef class DictionaryScalar(Scalar): 705 """ 706 Concrete class for dictionary-encoded scalars. 707 """ 708 709 @classmethod 710 def _reconstruct(cls, type, is_valid, index, dictionary): 711 cdef: 712 CDictionaryScalarIndexAndDictionary value 713 shared_ptr[CDictionaryScalar] wrapped 714 DataType type_ 715 Scalar index_ 716 Array dictionary_ 717 718 type_ = ensure_type(type, allow_none=False) 719 if not isinstance(type_, DictionaryType): 720 raise TypeError('Must pass a DictionaryType instance') 721 722 if isinstance(index, Scalar): 723 if not index.type.equals(type.index_type): 724 raise TypeError("The Scalar value passed as index must have " 725 "identical type to the dictionary type's " 726 "index_type") 727 index_ = index 728 else: 729 index_ = scalar(index, type=type_.index_type) 730 731 if isinstance(dictionary, Array): 732 if not dictionary.type.equals(type.value_type): 733 raise TypeError("The Array passed as dictionary must have " 734 "identical type to the dictionary type's " 735 "value_type") 736 dictionary_ = dictionary 737 else: 738 dictionary_ = array(dictionary, type=type_.value_type) 739 740 value.index = pyarrow_unwrap_scalar(index_) 741 value.dictionary = pyarrow_unwrap_array(dictionary_) 742 743 wrapped = make_shared[CDictionaryScalar]( 744 value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid) 745 ) 746 return Scalar.wrap(<shared_ptr[CScalar]> wrapped) 747 748 def __reduce__(self): 749 return DictionaryScalar._reconstruct, ( 750 self.type, self.is_valid, self.index, self.dictionary 751 ) 752 753 @property 754 def index(self): 755 """ 756 Return this value's underlying index as a scalar. 757 """ 758 cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() 759 return Scalar.wrap(sp.value.index) 760 761 @property 762 def value(self): 763 """ 764 Return the encoded value as a scalar. 765 """ 766 cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() 767 return Scalar.wrap(GetResultValue(sp.GetEncodedValue())) 768 769 @property 770 def dictionary(self): 771 cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() 772 return pyarrow_wrap_array(sp.value.dictionary) 773 774 def as_py(self): 775 """ 776 Return this encoded value as a Python object. 777 """ 778 return self.value.as_py() if self.is_valid else None 779 780 @property 781 def index_value(self): 782 warnings.warn("`index_value` property is deprecated as of 1.0.0" 783 "please use the `index` property instead", 784 FutureWarning) 785 return self.index 786 787 @property 788 def dictionary_value(self): 789 warnings.warn("`dictionary_value` property is deprecated as of 1.0.0, " 790 "please use the `value` property instead", FutureWarning) 791 return self.value 792 793 794cdef class UnionScalar(Scalar): 795 """ 796 Concrete class for Union scalars. 797 """ 798 799 @property 800 def value(self): 801 """ 802 Return underlying value as a scalar. 803 """ 804 cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get() 805 return Scalar.wrap(sp.value) if sp.is_valid else None 806 807 def as_py(self): 808 """ 809 Return underlying value as a Python object. 810 """ 811 value = self.value 812 return None if value is None else value.as_py() 813 814 815cdef dict _scalar_classes = { 816 _Type_BOOL: BooleanScalar, 817 _Type_UINT8: UInt8Scalar, 818 _Type_UINT16: UInt16Scalar, 819 _Type_UINT32: UInt32Scalar, 820 _Type_UINT64: UInt64Scalar, 821 _Type_INT8: Int8Scalar, 822 _Type_INT16: Int16Scalar, 823 _Type_INT32: Int32Scalar, 824 _Type_INT64: Int64Scalar, 825 _Type_HALF_FLOAT: HalfFloatScalar, 826 _Type_FLOAT: FloatScalar, 827 _Type_DOUBLE: DoubleScalar, 828 _Type_DECIMAL128: Decimal128Scalar, 829 _Type_DECIMAL256: Decimal256Scalar, 830 _Type_DATE32: Date32Scalar, 831 _Type_DATE64: Date64Scalar, 832 _Type_TIME32: Time32Scalar, 833 _Type_TIME64: Time64Scalar, 834 _Type_TIMESTAMP: TimestampScalar, 835 _Type_DURATION: DurationScalar, 836 _Type_BINARY: BinaryScalar, 837 _Type_LARGE_BINARY: LargeBinaryScalar, 838 _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar, 839 _Type_STRING: StringScalar, 840 _Type_LARGE_STRING: LargeStringScalar, 841 _Type_LIST: ListScalar, 842 _Type_LARGE_LIST: LargeListScalar, 843 _Type_FIXED_SIZE_LIST: FixedSizeListScalar, 844 _Type_STRUCT: StructScalar, 845 _Type_MAP: MapScalar, 846 _Type_DICTIONARY: DictionaryScalar, 847 _Type_SPARSE_UNION: UnionScalar, 848 _Type_DENSE_UNION: UnionScalar, 849} 850 851 852def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None): 853 """ 854 Create a pyarrow.Scalar instance from a Python object. 855 856 Parameters 857 ---------- 858 value : Any 859 Python object coercible to arrow's type system. 860 type : pyarrow.DataType 861 Explicit type to attempt to coerce to, otherwise will be inferred from 862 the value. 863 from_pandas : bool, default None 864 Use pandas's semantics for inferring nulls from values in 865 ndarray-like data. Defaults to False if not passed explicitly by user, 866 or True if a pandas object is passed in. 867 memory_pool : pyarrow.MemoryPool, optional 868 If not passed, will allocate memory from the currently-set default 869 memory pool. 870 871 Returns 872 ------- 873 scalar : pyarrow.Scalar 874 875 Examples 876 -------- 877 >>> import pyarrow as pa 878 879 >>> pa.scalar(42) 880 <pyarrow.Int64Scalar: 42> 881 882 >>> pa.scalar("string") 883 <pyarrow.StringScalar: 'string'> 884 885 >>> pa.scalar([1, 2]) 886 <pyarrow.ListScalar: [1, 2]> 887 888 >>> pa.scalar([1, 2], type=pa.list_(pa.int16())) 889 <pyarrow.ListScalar: [1, 2]> 890 """ 891 cdef: 892 DataType ty 893 PyConversionOptions options 894 shared_ptr[CScalar] scalar 895 shared_ptr[CArray] array 896 shared_ptr[CChunkedArray] chunked 897 bint is_pandas_object = False 898 CMemoryPool* pool 899 900 type = ensure_type(type, allow_none=True) 901 pool = maybe_unbox_memory_pool(memory_pool) 902 903 if _is_array_like(value): 904 value = get_values(value, &is_pandas_object) 905 906 options.size = 1 907 908 if type is not None: 909 ty = ensure_type(type) 910 options.type = ty.sp_type 911 912 if from_pandas is None: 913 options.from_pandas = is_pandas_object 914 else: 915 options.from_pandas = from_pandas 916 917 value = [value] 918 with nogil: 919 chunked = GetResultValue(ConvertPySequence(value, None, options, pool)) 920 921 # get the first chunk 922 assert chunked.get().num_chunks() == 1 923 array = chunked.get().chunk(0) 924 925 # retrieve the scalar from the first position 926 scalar = GetResultValue(array.get().GetScalar(0)) 927 return Scalar.wrap(scalar) 928