123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements. See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership. The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- # KIND, either express or implied. See the License for the
- # specific language governing permissions and limitations
- # under the License.
- # pandas lazy-loading API shim that reduces API call and import overhead
- import warnings
- cdef class _PandasAPIShim(object):
- """
- Lazy pandas importer that isolates usages of pandas APIs and avoids
- importing pandas until it's actually needed
- """
- cdef:
- bint _tried_importing_pandas
- bint _have_pandas
- cdef readonly:
- object _loose_version, _version
- object _pd, _types_api, _compat_module
- object _data_frame, _index, _series, _categorical_type
- object _datetimetz_type, _extension_array, _extension_dtype
- object _array_like_types, _is_extension_array_dtype
- bint has_sparse
- bint _pd024
- def __init__(self):
- self._tried_importing_pandas = False
- self._have_pandas = 0
- cdef _import_pandas(self, bint raise_):
- try:
- import pandas as pd
- import pyarrow.pandas_compat as pdcompat
- except ImportError:
- self._have_pandas = False
- if raise_:
- raise
- else:
- return
- from pyarrow.vendored.version import Version
- self._pd = pd
- self._version = pd.__version__
- self._loose_version = Version(pd.__version__)
- if self._loose_version < Version('0.23.0'):
- self._have_pandas = False
- if raise_:
- raise ImportError(
- "pyarrow requires pandas 0.23.0 or above, pandas {} is "
- "installed".format(self._version)
- )
- else:
- warnings.warn(
- "pyarrow requires pandas 0.23.0 or above, pandas {} is "
- "installed. Therefore, pandas-specific integration is not "
- "used.".format(self._version), stacklevel=2)
- return
- self._compat_module = pdcompat
- self._data_frame = pd.DataFrame
- self._index = pd.Index
- self._categorical_type = pd.Categorical
- self._series = pd.Series
- self._extension_array = pd.api.extensions.ExtensionArray
- self._array_like_types = (
- self._series, self._index, self._categorical_type,
- self._extension_array)
- self._extension_dtype = pd.api.extensions.ExtensionDtype
- if self._loose_version >= Version('0.24.0'):
- self._is_extension_array_dtype = \
- pd.api.types.is_extension_array_dtype
- else:
- self._is_extension_array_dtype = None
- self._types_api = pd.api.types
- self._datetimetz_type = pd.api.types.DatetimeTZDtype
- self._have_pandas = True
- if self._loose_version > Version('0.25'):
- self.has_sparse = False
- else:
- self.has_sparse = True
- self._pd024 = self._loose_version >= Version('0.24')
- cdef inline _check_import(self, bint raise_=True):
- if self._tried_importing_pandas:
- if not self._have_pandas and raise_:
- self._import_pandas(raise_)
- return
- self._tried_importing_pandas = True
- self._import_pandas(raise_)
- def series(self, *args, **kwargs):
- self._check_import()
- return self._series(*args, **kwargs)
- def data_frame(self, *args, **kwargs):
- self._check_import()
- return self._data_frame(*args, **kwargs)
- cdef inline bint _have_pandas_internal(self):
- if not self._tried_importing_pandas:
- self._check_import(raise_=False)
- return self._have_pandas
- @property
- def have_pandas(self):
- return self._have_pandas_internal()
- @property
- def compat(self):
- self._check_import()
- return self._compat_module
- @property
- def pd(self):
- self._check_import()
- return self._pd
- cpdef infer_dtype(self, obj):
- self._check_import()
- try:
- return self._types_api.infer_dtype(obj, skipna=False)
- except AttributeError:
- return self._pd.lib.infer_dtype(obj)
- cpdef pandas_dtype(self, dtype):
- self._check_import()
- try:
- return self._types_api.pandas_dtype(dtype)
- except AttributeError:
- return None
- @property
- def loose_version(self):
- self._check_import()
- return self._loose_version
- @property
- def version(self):
- self._check_import()
- return self._version
- @property
- def categorical_type(self):
- self._check_import()
- return self._categorical_type
- @property
- def datetimetz_type(self):
- self._check_import()
- return self._datetimetz_type
- @property
- def extension_dtype(self):
- self._check_import()
- return self._extension_dtype
- cpdef is_array_like(self, obj):
- self._check_import()
- return isinstance(obj, self._array_like_types)
- cpdef is_categorical(self, obj):
- if self._have_pandas_internal():
- return isinstance(obj, self._categorical_type)
- else:
- return False
- cpdef is_datetimetz(self, obj):
- if self._have_pandas_internal():
- return isinstance(obj, self._datetimetz_type)
- else:
- return False
- cpdef is_extension_array_dtype(self, obj):
- self._check_import()
- if self._is_extension_array_dtype:
- return self._is_extension_array_dtype(obj)
- else:
- return False
- cpdef is_sparse(self, obj):
- if self._have_pandas_internal():
- return self._types_api.is_sparse(obj)
- else:
- return False
- cpdef is_data_frame(self, obj):
- if self._have_pandas_internal():
- return isinstance(obj, self._data_frame)
- else:
- return False
- cpdef is_series(self, obj):
- if self._have_pandas_internal():
- return isinstance(obj, self._series)
- else:
- return False
- cpdef is_index(self, obj):
- if self._have_pandas_internal():
- return isinstance(obj, self._index)
- else:
- return False
- cpdef get_values(self, obj):
- """
- Get the underlying array values of a pandas Series or Index in the
- format (np.ndarray or pandas ExtensionArray) as we need them.
- Assumes obj is a pandas Series or Index.
- """
- self._check_import()
- if isinstance(obj.dtype, (self.pd.api.types.IntervalDtype,
- self.pd.api.types.PeriodDtype)):
- if self._pd024:
- # only since pandas 0.24, interval and period are stored as
- # such in Series
- return obj.array
- return obj.values
- def assert_frame_equal(self, *args, **kwargs):
- self._check_import()
- return self._pd.util.testing.assert_frame_equal
- def get_rangeindex_attribute(self, level, name):
- # public start/stop/step attributes added in pandas 0.25.0
- self._check_import()
- if hasattr(level, name):
- return getattr(level, name)
- return getattr(level, '_' + name)
- cdef _PandasAPIShim pandas_api = _PandasAPIShim()
- _pandas_api = pandas_api
|