scalar.pxi 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. import collections
  18. cdef class Scalar(_Weakrefable):
  19. """
  20. The base class for scalars.
  21. """
  22. def __init__(self):
  23. raise TypeError("Do not call {}'s constructor directly, use "
  24. "pa.scalar() instead.".format(self.__class__.__name__))
  25. cdef void init(self, const shared_ptr[CScalar]& wrapped):
  26. self.wrapped = wrapped
  27. @staticmethod
  28. cdef wrap(const shared_ptr[CScalar]& wrapped):
  29. cdef:
  30. Scalar self
  31. Type type_id = wrapped.get().type.get().id()
  32. if type_id == _Type_NA:
  33. return _NULL
  34. typ = _scalar_classes[type_id]
  35. self = typ.__new__(typ)
  36. self.init(wrapped)
  37. return self
  38. cdef inline shared_ptr[CScalar] unwrap(self) nogil:
  39. return self.wrapped
  40. @property
  41. def type(self):
  42. """
  43. Data type of the Scalar object.
  44. """
  45. return pyarrow_wrap_data_type(self.wrapped.get().type)
  46. @property
  47. def is_valid(self):
  48. """
  49. Holds a valid (non-null) value.
  50. """
  51. return self.wrapped.get().is_valid
  52. def cast(self, object target_type):
  53. """
  54. Attempt a safe cast to target data type.
  55. """
  56. cdef:
  57. DataType type = ensure_type(target_type)
  58. shared_ptr[CScalar] result
  59. with nogil:
  60. result = GetResultValue(self.wrapped.get().CastTo(type.sp_type))
  61. return Scalar.wrap(result)
  62. def __repr__(self):
  63. return '<pyarrow.{}: {!r}>'.format(
  64. self.__class__.__name__, self.as_py()
  65. )
  66. def __str__(self):
  67. return str(self.as_py())
  68. def equals(self, Scalar other not None):
  69. return self.wrapped.get().Equals(other.unwrap().get()[0])
  70. def __eq__(self, other):
  71. try:
  72. return self.equals(other)
  73. except TypeError:
  74. return NotImplemented
  75. def __hash__(self):
  76. cdef CScalarHash hasher
  77. return hasher(self.wrapped)
  78. def __reduce__(self):
  79. return scalar, (self.as_py(), self.type)
  80. def as_py(self):
  81. raise NotImplementedError()
  82. _NULL = NA = None
  83. cdef class NullScalar(Scalar):
  84. """
  85. Concrete class for null scalars.
  86. """
  87. def __cinit__(self):
  88. global NA
  89. if NA is not None:
  90. raise RuntimeError('Cannot create multiple NullScalar instances')
  91. self.init(shared_ptr[CScalar](new CNullScalar()))
  92. def __init__(self):
  93. pass
  94. def as_py(self):
  95. """
  96. Return this value as a Python None.
  97. """
  98. return None
  99. _NULL = NA = NullScalar()
  100. cdef class BooleanScalar(Scalar):
  101. """
  102. Concrete class for boolean scalars.
  103. """
  104. def as_py(self):
  105. """
  106. Return this value as a Python bool.
  107. """
  108. cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get()
  109. return sp.value if sp.is_valid else None
  110. cdef class UInt8Scalar(Scalar):
  111. """
  112. Concrete class for uint8 scalars.
  113. """
  114. def as_py(self):
  115. """
  116. Return this value as a Python int.
  117. """
  118. cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get()
  119. return sp.value if sp.is_valid else None
  120. cdef class Int8Scalar(Scalar):
  121. """
  122. Concrete class for int8 scalars.
  123. """
  124. def as_py(self):
  125. """
  126. Return this value as a Python int.
  127. """
  128. cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get()
  129. return sp.value if sp.is_valid else None
  130. cdef class UInt16Scalar(Scalar):
  131. """
  132. Concrete class for uint16 scalars.
  133. """
  134. def as_py(self):
  135. """
  136. Return this value as a Python int.
  137. """
  138. cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get()
  139. return sp.value if sp.is_valid else None
  140. cdef class Int16Scalar(Scalar):
  141. """
  142. Concrete class for int16 scalars.
  143. """
  144. def as_py(self):
  145. """
  146. Return this value as a Python int.
  147. """
  148. cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get()
  149. return sp.value if sp.is_valid else None
  150. cdef class UInt32Scalar(Scalar):
  151. """
  152. Concrete class for uint32 scalars.
  153. """
  154. def as_py(self):
  155. """
  156. Return this value as a Python int.
  157. """
  158. cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get()
  159. return sp.value if sp.is_valid else None
  160. cdef class Int32Scalar(Scalar):
  161. """
  162. Concrete class for int32 scalars.
  163. """
  164. def as_py(self):
  165. """
  166. Return this value as a Python int.
  167. """
  168. cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get()
  169. return sp.value if sp.is_valid else None
  170. cdef class UInt64Scalar(Scalar):
  171. """
  172. Concrete class for uint64 scalars.
  173. """
  174. def as_py(self):
  175. """
  176. Return this value as a Python int.
  177. """
  178. cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get()
  179. return sp.value if sp.is_valid else None
  180. cdef class Int64Scalar(Scalar):
  181. """
  182. Concrete class for int64 scalars.
  183. """
  184. def as_py(self):
  185. """
  186. Return this value as a Python int.
  187. """
  188. cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get()
  189. return sp.value if sp.is_valid else None
  190. cdef class HalfFloatScalar(Scalar):
  191. """
  192. Concrete class for float scalars.
  193. """
  194. def as_py(self):
  195. """
  196. Return this value as a Python float.
  197. """
  198. cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
  199. return PyHalf_FromHalf(sp.value) if sp.is_valid else None
  200. cdef class FloatScalar(Scalar):
  201. """
  202. Concrete class for float scalars.
  203. """
  204. def as_py(self):
  205. """
  206. Return this value as a Python float.
  207. """
  208. cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get()
  209. return sp.value if sp.is_valid else None
  210. cdef class DoubleScalar(Scalar):
  211. """
  212. Concrete class for double scalars.
  213. """
  214. def as_py(self):
  215. """
  216. Return this value as a Python float.
  217. """
  218. cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get()
  219. return sp.value if sp.is_valid else None
  220. cdef class Decimal128Scalar(Scalar):
  221. """
  222. Concrete class for decimal128 scalars.
  223. """
  224. def as_py(self):
  225. """
  226. Return this value as a Python Decimal.
  227. """
  228. cdef:
  229. CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get()
  230. CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get()
  231. if sp.is_valid:
  232. return _pydecimal.Decimal(
  233. frombytes(sp.value.ToString(dtype.scale()))
  234. )
  235. else:
  236. return None
  237. cdef class Decimal256Scalar(Scalar):
  238. """
  239. Concrete class for decimal256 scalars.
  240. """
  241. def as_py(self):
  242. """
  243. Return this value as a Python Decimal.
  244. """
  245. cdef:
  246. CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get()
  247. CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get()
  248. if sp.is_valid:
  249. return _pydecimal.Decimal(
  250. frombytes(sp.value.ToString(dtype.scale()))
  251. )
  252. else:
  253. return None
  254. cdef class Date32Scalar(Scalar):
  255. """
  256. Concrete class for date32 scalars.
  257. """
  258. def as_py(self):
  259. """
  260. Return this value as a Python datetime.datetime instance.
  261. """
  262. cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get()
  263. if sp.is_valid:
  264. # shift to seconds since epoch
  265. return (
  266. datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value)
  267. )
  268. else:
  269. return None
  270. cdef class Date64Scalar(Scalar):
  271. """
  272. Concrete class for date64 scalars.
  273. """
  274. def as_py(self):
  275. """
  276. Return this value as a Python datetime.datetime instance.
  277. """
  278. cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get()
  279. if sp.is_valid:
  280. return (
  281. datetime.date(1970, 1, 1) +
  282. datetime.timedelta(days=sp.value / 86400000)
  283. )
  284. else:
  285. return None
  286. def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None):
  287. if unit == TimeUnit_SECOND:
  288. delta = datetime.timedelta(seconds=value)
  289. elif unit == TimeUnit_MILLI:
  290. delta = datetime.timedelta(milliseconds=value)
  291. elif unit == TimeUnit_MICRO:
  292. delta = datetime.timedelta(microseconds=value)
  293. else:
  294. # TimeUnit_NANO: prefer pandas timestamps if available
  295. if _pandas_api.have_pandas:
  296. return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns')
  297. # otherwise safely truncate to microsecond resolution datetime
  298. if value % 1000 != 0:
  299. raise ValueError(
  300. "Nanosecond resolution temporal type {} is not safely "
  301. "convertible to microseconds to convert to datetime.datetime. "
  302. "Install pandas to return as Timestamp with nanosecond "
  303. "support or access the .value attribute.".format(value)
  304. )
  305. delta = datetime.timedelta(microseconds=value // 1000)
  306. dt = datetime.datetime(1970, 1, 1) + delta
  307. # adjust timezone if set to the datatype
  308. if tzinfo is not None:
  309. dt = tzinfo.fromutc(dt)
  310. return dt
  311. cdef class Time32Scalar(Scalar):
  312. """
  313. Concrete class for time32 scalars.
  314. """
  315. def as_py(self):
  316. """
  317. Return this value as a Python datetime.timedelta instance.
  318. """
  319. cdef:
  320. CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get()
  321. CTime32Type* dtype = <CTime32Type*> sp.type.get()
  322. if sp.is_valid:
  323. return _datetime_from_int(sp.value, unit=dtype.unit()).time()
  324. else:
  325. return None
  326. cdef class Time64Scalar(Scalar):
  327. """
  328. Concrete class for time64 scalars.
  329. """
  330. def as_py(self):
  331. """
  332. Return this value as a Python datetime.timedelta instance.
  333. """
  334. cdef:
  335. CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get()
  336. CTime64Type* dtype = <CTime64Type*> sp.type.get()
  337. if sp.is_valid:
  338. return _datetime_from_int(sp.value, unit=dtype.unit()).time()
  339. else:
  340. return None
  341. cdef class TimestampScalar(Scalar):
  342. """
  343. Concrete class for timestamp scalars.
  344. """
  345. @property
  346. def value(self):
  347. cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
  348. return sp.value if sp.is_valid else None
  349. def as_py(self):
  350. """
  351. Return this value as a Pandas Timestamp instance (if available),
  352. otherwise as a Python datetime.timedelta instance.
  353. """
  354. cdef:
  355. CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
  356. CTimestampType* dtype = <CTimestampType*> sp.type.get()
  357. if not sp.is_valid:
  358. return None
  359. if not dtype.timezone().empty():
  360. tzinfo = string_to_tzinfo(frombytes(dtype.timezone()))
  361. else:
  362. tzinfo = None
  363. return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo)
  364. cdef class DurationScalar(Scalar):
  365. """
  366. Concrete class for duration scalars.
  367. """
  368. @property
  369. def value(self):
  370. cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
  371. return sp.value if sp.is_valid else None
  372. def as_py(self):
  373. """
  374. Return this value as a Pandas Timestamp instance (if available),
  375. otherwise as a Python datetime.timedelta instance.
  376. """
  377. cdef:
  378. CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
  379. CDurationType* dtype = <CDurationType*> sp.type.get()
  380. TimeUnit unit = dtype.unit()
  381. if not sp.is_valid:
  382. return None
  383. if unit == TimeUnit_SECOND:
  384. return datetime.timedelta(seconds=sp.value)
  385. elif unit == TimeUnit_MILLI:
  386. return datetime.timedelta(milliseconds=sp.value)
  387. elif unit == TimeUnit_MICRO:
  388. return datetime.timedelta(microseconds=sp.value)
  389. else:
  390. # TimeUnit_NANO: prefer pandas timestamps if available
  391. if _pandas_api.have_pandas:
  392. return _pandas_api.pd.Timedelta(sp.value, unit='ns')
  393. # otherwise safely truncate to microsecond resolution timedelta
  394. if sp.value % 1000 != 0:
  395. raise ValueError(
  396. "Nanosecond duration {} is not safely convertible to "
  397. "microseconds to convert to datetime.timedelta. Install "
  398. "pandas to return as Timedelta with nanosecond support or "
  399. "access the .value attribute.".format(sp.value)
  400. )
  401. return datetime.timedelta(microseconds=sp.value // 1000)
  402. cdef class BinaryScalar(Scalar):
  403. """
  404. Concrete class for binary-like scalars.
  405. """
  406. def as_buffer(self):
  407. """
  408. Return a view over this value as a Buffer object.
  409. """
  410. cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get()
  411. return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None
  412. def as_py(self):
  413. """
  414. Return this value as a Python bytes.
  415. """
  416. buffer = self.as_buffer()
  417. return None if buffer is None else buffer.to_pybytes()
  418. cdef class LargeBinaryScalar(BinaryScalar):
  419. pass
  420. cdef class FixedSizeBinaryScalar(BinaryScalar):
  421. pass
  422. cdef class StringScalar(BinaryScalar):
  423. """
  424. Concrete class for string-like (utf8) scalars.
  425. """
  426. def as_py(self):
  427. """
  428. Return this value as a Python string.
  429. """
  430. buffer = self.as_buffer()
  431. return None if buffer is None else str(buffer, 'utf8')
  432. cdef class LargeStringScalar(StringScalar):
  433. pass
  434. cdef class ListScalar(Scalar):
  435. """
  436. Concrete class for list-like scalars.
  437. """
  438. @property
  439. def values(self):
  440. cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get()
  441. if sp.is_valid:
  442. return pyarrow_wrap_array(sp.value)
  443. else:
  444. return None
  445. def __len__(self):
  446. """
  447. Return the number of values.
  448. """
  449. return len(self.values)
  450. def __getitem__(self, i):
  451. """
  452. Return the value at the given index.
  453. """
  454. return self.values[_normalize_index(i, len(self))]
  455. def __iter__(self):
  456. """
  457. Iterate over this element's values.
  458. """
  459. return iter(self.values)
  460. def as_py(self):
  461. """
  462. Return this value as a Python list.
  463. """
  464. arr = self.values
  465. return None if arr is None else arr.to_pylist()
  466. cdef class FixedSizeListScalar(ListScalar):
  467. pass
  468. cdef class LargeListScalar(ListScalar):
  469. pass
  470. cdef class StructScalar(Scalar, collections.abc.Mapping):
  471. """
  472. Concrete class for struct scalars.
  473. """
  474. def __len__(self):
  475. cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get()
  476. return sp.value.size()
  477. def __iter__(self):
  478. cdef:
  479. CStructScalar* sp = <CStructScalar*> self.wrapped.get()
  480. CStructType* dtype = <CStructType*> sp.type.get()
  481. vector[shared_ptr[CField]] fields = dtype.fields()
  482. for i in range(dtype.num_fields()):
  483. yield frombytes(fields[i].get().name())
  484. def items(self):
  485. return ((key, self[i]) for i, key in enumerate(self))
  486. def __contains__(self, key):
  487. return key in list(self)
  488. def __getitem__(self, key):
  489. """
  490. Return the child value for the given field.
  491. Parameters
  492. ----------
  493. index : Union[int, str]
  494. Index / position or name of the field.
  495. Returns
  496. -------
  497. result : Scalar
  498. """
  499. cdef:
  500. CFieldRef ref
  501. CStructScalar* sp = <CStructScalar*> self.wrapped.get()
  502. if isinstance(key, (bytes, str)):
  503. ref = CFieldRef(<c_string> tobytes(key))
  504. elif isinstance(key, int):
  505. ref = CFieldRef(<int> key)
  506. else:
  507. raise TypeError('Expected integer or string index')
  508. try:
  509. return Scalar.wrap(GetResultValue(sp.field(ref)))
  510. except ArrowInvalid as exc:
  511. if isinstance(key, int):
  512. raise IndexError(key) from exc
  513. else:
  514. raise KeyError(key) from exc
  515. def as_py(self):
  516. """
  517. Return this value as a Python dict.
  518. """
  519. if self.is_valid:
  520. try:
  521. return {k: self[k].as_py() for k in self.keys()}
  522. except KeyError:
  523. raise ValueError(
  524. "Converting to Python dictionary is not supported when "
  525. "duplicate field names are present")
  526. else:
  527. return None
  528. def _as_py_tuple(self):
  529. # a version that returns a tuple instead of dict to support repr/str
  530. # with the presence of duplicate field names
  531. if self.is_valid:
  532. return [(key, self[i].as_py()) for i, key in enumerate(self)]
  533. else:
  534. return None
  535. def __repr__(self):
  536. return '<pyarrow.{}: {!r}>'.format(
  537. self.__class__.__name__, self._as_py_tuple()
  538. )
  539. def __str__(self):
  540. return str(self._as_py_tuple())
  541. cdef class MapScalar(ListScalar):
  542. """
  543. Concrete class for map scalars.
  544. """
  545. def __getitem__(self, i):
  546. """
  547. Return the value at the given index.
  548. """
  549. arr = self.values
  550. if arr is None:
  551. raise IndexError(i)
  552. dct = arr[_normalize_index(i, len(arr))]
  553. return (dct['key'], dct['value'])
  554. def __iter__(self):
  555. """
  556. Iterate over this element's values.
  557. """
  558. arr = self.values
  559. if array is None:
  560. raise StopIteration
  561. for k, v in zip(arr.field('key'), arr.field('value')):
  562. yield (k.as_py(), v.as_py())
  563. def as_py(self):
  564. """
  565. Return this value as a Python list.
  566. """
  567. cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get()
  568. return list(self) if sp.is_valid else None
  569. cdef class DictionaryScalar(Scalar):
  570. """
  571. Concrete class for dictionary-encoded scalars.
  572. """
  573. @classmethod
  574. def _reconstruct(cls, type, is_valid, index, dictionary):
  575. cdef:
  576. CDictionaryScalarIndexAndDictionary value
  577. shared_ptr[CDictionaryScalar] wrapped
  578. DataType type_
  579. Scalar index_
  580. Array dictionary_
  581. type_ = ensure_type(type, allow_none=False)
  582. if not isinstance(type_, DictionaryType):
  583. raise TypeError('Must pass a DictionaryType instance')
  584. if isinstance(index, Scalar):
  585. if not index.type.equals(type.index_type):
  586. raise TypeError("The Scalar value passed as index must have "
  587. "identical type to the dictionary type's "
  588. "index_type")
  589. index_ = index
  590. else:
  591. index_ = scalar(index, type=type_.index_type)
  592. if isinstance(dictionary, Array):
  593. if not dictionary.type.equals(type.value_type):
  594. raise TypeError("The Array passed as dictionary must have "
  595. "identical type to the dictionary type's "
  596. "value_type")
  597. dictionary_ = dictionary
  598. else:
  599. dictionary_ = array(dictionary, type=type_.value_type)
  600. value.index = pyarrow_unwrap_scalar(index_)
  601. value.dictionary = pyarrow_unwrap_array(dictionary_)
  602. wrapped = make_shared[CDictionaryScalar](
  603. value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid)
  604. )
  605. return Scalar.wrap(<shared_ptr[CScalar]> wrapped)
  606. def __reduce__(self):
  607. return DictionaryScalar._reconstruct, (
  608. self.type, self.is_valid, self.index, self.dictionary
  609. )
  610. @property
  611. def index(self):
  612. """
  613. Return this value's underlying index as a scalar.
  614. """
  615. cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
  616. return Scalar.wrap(sp.value.index)
  617. @property
  618. def value(self):
  619. """
  620. Return the encoded value as a scalar.
  621. """
  622. cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
  623. return Scalar.wrap(GetResultValue(sp.GetEncodedValue()))
  624. @property
  625. def dictionary(self):
  626. cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
  627. return pyarrow_wrap_array(sp.value.dictionary)
  628. def as_py(self):
  629. """
  630. Return this encoded value as a Python object.
  631. """
  632. return self.value.as_py() if self.is_valid else None
  633. @property
  634. def index_value(self):
  635. warnings.warn("`index_value` property is deprecated as of 1.0.0"
  636. "please use the `index` property instead",
  637. FutureWarning)
  638. return self.index
  639. @property
  640. def dictionary_value(self):
  641. warnings.warn("`dictionary_value` property is deprecated as of 1.0.0, "
  642. "please use the `value` property instead", FutureWarning)
  643. return self.value
  644. cdef class UnionScalar(Scalar):
  645. """
  646. Concrete class for Union scalars.
  647. """
  648. @property
  649. def value(self):
  650. """
  651. Return underlying value as a scalar.
  652. """
  653. cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
  654. return Scalar.wrap(sp.value) if sp.is_valid else None
  655. def as_py(self):
  656. """
  657. Return underlying value as a Python object.
  658. """
  659. value = self.value
  660. return None if value is None else value.as_py()
  661. cdef dict _scalar_classes = {
  662. _Type_BOOL: BooleanScalar,
  663. _Type_UINT8: UInt8Scalar,
  664. _Type_UINT16: UInt16Scalar,
  665. _Type_UINT32: UInt32Scalar,
  666. _Type_UINT64: UInt64Scalar,
  667. _Type_INT8: Int8Scalar,
  668. _Type_INT16: Int16Scalar,
  669. _Type_INT32: Int32Scalar,
  670. _Type_INT64: Int64Scalar,
  671. _Type_HALF_FLOAT: HalfFloatScalar,
  672. _Type_FLOAT: FloatScalar,
  673. _Type_DOUBLE: DoubleScalar,
  674. _Type_DECIMAL128: Decimal128Scalar,
  675. _Type_DECIMAL256: Decimal256Scalar,
  676. _Type_DATE32: Date32Scalar,
  677. _Type_DATE64: Date64Scalar,
  678. _Type_TIME32: Time32Scalar,
  679. _Type_TIME64: Time64Scalar,
  680. _Type_TIMESTAMP: TimestampScalar,
  681. _Type_DURATION: DurationScalar,
  682. _Type_BINARY: BinaryScalar,
  683. _Type_LARGE_BINARY: LargeBinaryScalar,
  684. _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
  685. _Type_STRING: StringScalar,
  686. _Type_LARGE_STRING: LargeStringScalar,
  687. _Type_LIST: ListScalar,
  688. _Type_LARGE_LIST: LargeListScalar,
  689. _Type_FIXED_SIZE_LIST: FixedSizeListScalar,
  690. _Type_STRUCT: StructScalar,
  691. _Type_MAP: MapScalar,
  692. _Type_DICTIONARY: DictionaryScalar,
  693. _Type_SPARSE_UNION: UnionScalar,
  694. _Type_DENSE_UNION: UnionScalar,
  695. }
  696. def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
  697. """
  698. Create a pyarrow.Scalar instance from a Python object.
  699. Parameters
  700. ----------
  701. value : Any
  702. Python object coercible to arrow's type system.
  703. type : pyarrow.DataType
  704. Explicit type to attempt to coerce to, otherwise will be inferred from
  705. the value.
  706. from_pandas : bool, default None
  707. Use pandas's semantics for inferring nulls from values in
  708. ndarray-like data. Defaults to False if not passed explicitly by user,
  709. or True if a pandas object is passed in.
  710. memory_pool : pyarrow.MemoryPool, optional
  711. If not passed, will allocate memory from the currently-set default
  712. memory pool.
  713. Returns
  714. -------
  715. scalar : pyarrow.Scalar
  716. Examples
  717. --------
  718. >>> import pyarrow as pa
  719. >>> pa.scalar(42)
  720. <pyarrow.Int64Scalar: 42>
  721. >>> pa.scalar("string")
  722. <pyarrow.StringScalar: 'string'>
  723. >>> pa.scalar([1, 2])
  724. <pyarrow.ListScalar: [1, 2]>
  725. >>> pa.scalar([1, 2], type=pa.list_(pa.int16()))
  726. <pyarrow.ListScalar: [1, 2]>
  727. """
  728. cdef:
  729. DataType ty
  730. PyConversionOptions options
  731. shared_ptr[CScalar] scalar
  732. shared_ptr[CArray] array
  733. shared_ptr[CChunkedArray] chunked
  734. bint is_pandas_object = False
  735. CMemoryPool* pool
  736. type = ensure_type(type, allow_none=True)
  737. pool = maybe_unbox_memory_pool(memory_pool)
  738. if _is_array_like(value):
  739. value = get_values(value, &is_pandas_object)
  740. options.size = 1
  741. if type is not None:
  742. ty = ensure_type(type)
  743. options.type = ty.sp_type
  744. if from_pandas is None:
  745. options.from_pandas = is_pandas_object
  746. else:
  747. options.from_pandas = from_pandas
  748. value = [value]
  749. with nogil:
  750. chunked = GetResultValue(ConvertPySequence(value, None, options, pool))
  751. # get the first chunk
  752. assert chunked.get().num_chunks() == 1
  753. array = chunked.get().chunk(0)
  754. # retrieve the scalar from the first position
  755. scalar = GetResultValue(array.get().GetScalar(0))
  756. return Scalar.wrap(scalar)