pandas-shim.pxi 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. # pandas lazy-loading API shim that reduces API call and import overhead
  18. import warnings
  19. cdef class _PandasAPIShim(object):
  20. """
  21. Lazy pandas importer that isolates usages of pandas APIs and avoids
  22. importing pandas until it's actually needed
  23. """
  24. cdef:
  25. bint _tried_importing_pandas
  26. bint _have_pandas
  27. cdef readonly:
  28. object _loose_version, _version
  29. object _pd, _types_api, _compat_module
  30. object _data_frame, _index, _series, _categorical_type
  31. object _datetimetz_type, _extension_array, _extension_dtype
  32. object _array_like_types, _is_extension_array_dtype
  33. bint has_sparse
  34. bint _pd024
  35. def __init__(self):
  36. self._tried_importing_pandas = False
  37. self._have_pandas = 0
  38. cdef _import_pandas(self, bint raise_):
  39. try:
  40. import pandas as pd
  41. import pyarrow.pandas_compat as pdcompat
  42. except ImportError:
  43. self._have_pandas = False
  44. if raise_:
  45. raise
  46. else:
  47. return
  48. from pyarrow.vendored.version import Version
  49. self._pd = pd
  50. self._version = pd.__version__
  51. self._loose_version = Version(pd.__version__)
  52. if self._loose_version < Version('0.23.0'):
  53. self._have_pandas = False
  54. if raise_:
  55. raise ImportError(
  56. "pyarrow requires pandas 0.23.0 or above, pandas {} is "
  57. "installed".format(self._version)
  58. )
  59. else:
  60. warnings.warn(
  61. "pyarrow requires pandas 0.23.0 or above, pandas {} is "
  62. "installed. Therefore, pandas-specific integration is not "
  63. "used.".format(self._version), stacklevel=2)
  64. return
  65. self._compat_module = pdcompat
  66. self._data_frame = pd.DataFrame
  67. self._index = pd.Index
  68. self._categorical_type = pd.Categorical
  69. self._series = pd.Series
  70. self._extension_array = pd.api.extensions.ExtensionArray
  71. self._array_like_types = (
  72. self._series, self._index, self._categorical_type,
  73. self._extension_array)
  74. self._extension_dtype = pd.api.extensions.ExtensionDtype
  75. if self._loose_version >= Version('0.24.0'):
  76. self._is_extension_array_dtype = \
  77. pd.api.types.is_extension_array_dtype
  78. else:
  79. self._is_extension_array_dtype = None
  80. self._types_api = pd.api.types
  81. self._datetimetz_type = pd.api.types.DatetimeTZDtype
  82. self._have_pandas = True
  83. if self._loose_version > Version('0.25'):
  84. self.has_sparse = False
  85. else:
  86. self.has_sparse = True
  87. self._pd024 = self._loose_version >= Version('0.24')
  88. cdef inline _check_import(self, bint raise_=True):
  89. if self._tried_importing_pandas:
  90. if not self._have_pandas and raise_:
  91. self._import_pandas(raise_)
  92. return
  93. self._tried_importing_pandas = True
  94. self._import_pandas(raise_)
  95. def series(self, *args, **kwargs):
  96. self._check_import()
  97. return self._series(*args, **kwargs)
  98. def data_frame(self, *args, **kwargs):
  99. self._check_import()
  100. return self._data_frame(*args, **kwargs)
  101. cdef inline bint _have_pandas_internal(self):
  102. if not self._tried_importing_pandas:
  103. self._check_import(raise_=False)
  104. return self._have_pandas
  105. @property
  106. def have_pandas(self):
  107. return self._have_pandas_internal()
  108. @property
  109. def compat(self):
  110. self._check_import()
  111. return self._compat_module
  112. @property
  113. def pd(self):
  114. self._check_import()
  115. return self._pd
  116. cpdef infer_dtype(self, obj):
  117. self._check_import()
  118. try:
  119. return self._types_api.infer_dtype(obj, skipna=False)
  120. except AttributeError:
  121. return self._pd.lib.infer_dtype(obj)
  122. cpdef pandas_dtype(self, dtype):
  123. self._check_import()
  124. try:
  125. return self._types_api.pandas_dtype(dtype)
  126. except AttributeError:
  127. return None
  128. @property
  129. def loose_version(self):
  130. self._check_import()
  131. return self._loose_version
  132. @property
  133. def version(self):
  134. self._check_import()
  135. return self._version
  136. @property
  137. def categorical_type(self):
  138. self._check_import()
  139. return self._categorical_type
  140. @property
  141. def datetimetz_type(self):
  142. self._check_import()
  143. return self._datetimetz_type
  144. @property
  145. def extension_dtype(self):
  146. self._check_import()
  147. return self._extension_dtype
  148. cpdef is_array_like(self, obj):
  149. self._check_import()
  150. return isinstance(obj, self._array_like_types)
  151. cpdef is_categorical(self, obj):
  152. if self._have_pandas_internal():
  153. return isinstance(obj, self._categorical_type)
  154. else:
  155. return False
  156. cpdef is_datetimetz(self, obj):
  157. if self._have_pandas_internal():
  158. return isinstance(obj, self._datetimetz_type)
  159. else:
  160. return False
  161. cpdef is_extension_array_dtype(self, obj):
  162. self._check_import()
  163. if self._is_extension_array_dtype:
  164. return self._is_extension_array_dtype(obj)
  165. else:
  166. return False
  167. cpdef is_sparse(self, obj):
  168. if self._have_pandas_internal():
  169. return self._types_api.is_sparse(obj)
  170. else:
  171. return False
  172. cpdef is_data_frame(self, obj):
  173. if self._have_pandas_internal():
  174. return isinstance(obj, self._data_frame)
  175. else:
  176. return False
  177. cpdef is_series(self, obj):
  178. if self._have_pandas_internal():
  179. return isinstance(obj, self._series)
  180. else:
  181. return False
  182. cpdef is_index(self, obj):
  183. if self._have_pandas_internal():
  184. return isinstance(obj, self._index)
  185. else:
  186. return False
  187. cpdef get_values(self, obj):
  188. """
  189. Get the underlying array values of a pandas Series or Index in the
  190. format (np.ndarray or pandas ExtensionArray) as we need them.
  191. Assumes obj is a pandas Series or Index.
  192. """
  193. self._check_import()
  194. if isinstance(obj.dtype, (self.pd.api.types.IntervalDtype,
  195. self.pd.api.types.PeriodDtype)):
  196. if self._pd024:
  197. # only since pandas 0.24, interval and period are stored as
  198. # such in Series
  199. return obj.array
  200. return obj.values
  201. def assert_frame_equal(self, *args, **kwargs):
  202. self._check_import()
  203. return self._pd.util.testing.assert_frame_equal
  204. def get_rangeindex_attribute(self, level, name):
  205. # public start/stop/step attributes added in pandas 0.25.0
  206. self._check_import()
  207. if hasattr(level, name):
  208. return getattr(level, name)
  209. return getattr(level, '_' + name)
  210. cdef _PandasAPIShim pandas_api = _PandasAPIShim()
  211. _pandas_api = pandas_api