123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892 |
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements. See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership. The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- # KIND, either express or implied. See the License for the
- # specific language governing permissions and limitations
- # under the License.
- cdef class Tensor(_Weakrefable):
- """
- A n-dimensional array a.k.a Tensor.
- """
- def __init__(self):
- raise TypeError("Do not call Tensor's constructor directly, use one "
- "of the `pyarrow.Tensor.from_*` functions instead.")
- cdef void init(self, const shared_ptr[CTensor]& sp_tensor):
- self.sp_tensor = sp_tensor
- self.tp = sp_tensor.get()
- self.type = pyarrow_wrap_data_type(self.tp.type())
- def __repr__(self):
- return """<pyarrow.Tensor>
- type: {0.type}
- shape: {0.shape}
- strides: {0.strides}""".format(self)
- @staticmethod
- def from_numpy(obj, dim_names=None):
- cdef:
- vector[c_string] c_dim_names
- shared_ptr[CTensor] ctensor
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- check_status(NdarrayToTensor(c_default_memory_pool(), obj,
- c_dim_names, &ctensor))
- return pyarrow_wrap_tensor(ctensor)
- def to_numpy(self):
- """
- Convert arrow::Tensor to numpy.ndarray with zero copy
- """
- cdef PyObject* out
- check_status(TensorToNdarray(self.sp_tensor, self, &out))
- return PyObject_to_object(out)
- def equals(self, Tensor other):
- """
- Return true if the tensors contains exactly equal data
- """
- return self.tp.Equals(deref(other.tp))
- def __eq__(self, other):
- if isinstance(other, Tensor):
- return self.equals(other)
- else:
- return NotImplemented
- def dim_name(self, i):
- return frombytes(self.tp.dim_name(i))
- @property
- def dim_names(self):
- return [frombytes(x) for x in tuple(self.tp.dim_names())]
- @property
- def is_mutable(self):
- return self.tp.is_mutable()
- @property
- def is_contiguous(self):
- return self.tp.is_contiguous()
- @property
- def ndim(self):
- return self.tp.ndim()
- @property
- def size(self):
- return self.tp.size()
- @property
- def shape(self):
- # Cython knows how to convert a vector[T] to a Python list
- return tuple(self.tp.shape())
- @property
- def strides(self):
- return tuple(self.tp.strides())
- def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
- buffer.buf = <char *> self.tp.data().get().data()
- pep3118_format = self.type.pep3118_format
- if pep3118_format is None:
- raise NotImplementedError("type %s not supported for buffer "
- "protocol" % (self.type,))
- buffer.format = pep3118_format
- buffer.itemsize = self.type.bit_width // 8
- buffer.internal = NULL
- buffer.len = self.tp.size() * buffer.itemsize
- buffer.ndim = self.tp.ndim()
- buffer.obj = self
- if self.tp.is_mutable():
- buffer.readonly = 0
- else:
- buffer.readonly = 1
- # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
- # and strides arrays lifetime is tied to the tensor's
- buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
- buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
- buffer.suboffsets = NULL
- ctypedef CSparseCOOIndex* _CSparseCOOIndexPtr
- cdef class SparseCOOTensor(_Weakrefable):
- """
- A sparse COO tensor.
- """
- def __init__(self):
- raise TypeError("Do not call SparseCOOTensor's constructor directly, "
- "use one of the `pyarrow.SparseCOOTensor.from_*` "
- "functions instead.")
- cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor):
- self.sp_sparse_tensor = sp_sparse_tensor
- self.stp = sp_sparse_tensor.get()
- self.type = pyarrow_wrap_data_type(self.stp.type())
- def __repr__(self):
- return """<pyarrow.SparseCOOTensor>
- type: {0.type}
- shape: {0.shape}""".format(self)
- @classmethod
- def from_dense_numpy(cls, obj, dim_names=None):
- """
- Convert numpy.ndarray to arrow::SparseCOOTensor
- """
- return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
- @staticmethod
- def from_numpy(data, coords, shape, dim_names=None):
- """
- Create arrow::SparseCOOTensor from numpy.ndarrays
- """
- cdef shared_ptr[CSparseCOOTensor] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- # Enforce precondition for SparseCOOTensor indices
- coords = np.require(coords, dtype='i8', requirements='C')
- if coords.ndim != 2:
- raise ValueError("Expected 2-dimensional array for "
- "SparseCOOTensor indices")
- check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
- data, coords, c_shape,
- c_dim_names, &csparse_tensor))
- return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
- @staticmethod
- def from_scipy(obj, dim_names=None):
- """
- Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor
- """
- import scipy.sparse
- if not isinstance(obj, scipy.sparse.coo_matrix):
- raise TypeError(
- "Expected scipy.sparse.coo_matrix, got {}".format(type(obj)))
- cdef shared_ptr[CSparseCOOTensor] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in obj.shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- row = obj.row
- col = obj.col
- # When SciPy's coo_matrix has canonical format, its indices matrix is
- # sorted in column-major order. As Arrow's SparseCOOIndex is sorted
- # in row-major order if it is canonical, we must sort indices matrix
- # into row-major order to keep its canonicalness, here.
- if obj.has_canonical_format:
- order = np.lexsort((col, row)) # sort in row-major order
- row = row[order]
- col = col[order]
- coords = np.vstack([row, col]).T
- coords = np.require(coords, dtype='i8', requirements='C')
- check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
- obj.data, coords, c_shape,
- c_dim_names, &csparse_tensor))
- return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
- @staticmethod
- def from_pydata_sparse(obj, dim_names=None):
- """
- Convert pydata/sparse.COO to arrow::SparseCOOTensor.
- """
- import sparse
- if not isinstance(obj, sparse.COO):
- raise TypeError(
- "Expected sparse.COO, got {}".format(type(obj)))
- cdef shared_ptr[CSparseCOOTensor] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in obj.shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- coords = np.require(obj.coords.T, dtype='i8', requirements='C')
- check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
- obj.data, coords, c_shape,
- c_dim_names, &csparse_tensor))
- return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
- @staticmethod
- def from_tensor(obj):
- """
- Convert arrow::Tensor to arrow::SparseCOOTensor.
- """
- cdef shared_ptr[CSparseCOOTensor] csparse_tensor
- cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
- with nogil:
- check_status(TensorToSparseCOOTensor(ctensor, &csparse_tensor))
- return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
- def to_numpy(self):
- """
- Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
- """
- cdef PyObject* out_data
- cdef PyObject* out_coords
- check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_coords))
- return PyObject_to_object(out_data), PyObject_to_object(out_coords)
- def to_scipy(self):
- """
- Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix.
- """
- from scipy.sparse import coo_matrix
- cdef PyObject* out_data
- cdef PyObject* out_coords
- check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_coords))
- data = PyObject_to_object(out_data)
- coords = PyObject_to_object(out_coords)
- row, col = coords[:, 0], coords[:, 1]
- result = coo_matrix((data[:, 0], (row, col)), shape=self.shape)
- # As the description in from_scipy above, we sorted indices matrix
- # in row-major order if SciPy's coo_matrix has canonical format.
- # So, we must call sum_duplicates() to make the result coo_matrix
- # has canonical format.
- if self.has_canonical_format:
- result.sum_duplicates()
- return result
- def to_pydata_sparse(self):
- """
- Convert arrow::SparseCOOTensor to pydata/sparse.COO.
- """
- from sparse import COO
- cdef PyObject* out_data
- cdef PyObject* out_coords
- check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_coords))
- data = PyObject_to_object(out_data)
- coords = PyObject_to_object(out_coords)
- result = COO(data=data[:, 0], coords=coords.T, shape=self.shape)
- return result
- def to_tensor(self):
- """
- Convert arrow::SparseCOOTensor to arrow::Tensor.
- """
- cdef shared_ptr[CTensor] ctensor
- with nogil:
- ctensor = GetResultValue(self.stp.ToTensor())
- return pyarrow_wrap_tensor(ctensor)
- def equals(self, SparseCOOTensor other):
- """
- Return true if sparse tensors contains exactly equal data.
- """
- return self.stp.Equals(deref(other.stp))
- def __eq__(self, other):
- if isinstance(other, SparseCOOTensor):
- return self.equals(other)
- else:
- return NotImplemented
- @property
- def is_mutable(self):
- return self.stp.is_mutable()
- @property
- def ndim(self):
- return self.stp.ndim()
- @property
- def shape(self):
- # Cython knows how to convert a vector[T] to a Python list
- return tuple(self.stp.shape())
- @property
- def size(self):
- return self.stp.size()
- def dim_name(self, i):
- return frombytes(self.stp.dim_name(i))
- @property
- def dim_names(self):
- return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
- @property
- def non_zero_length(self):
- return self.stp.non_zero_length()
- @property
- def has_canonical_format(self):
- cdef:
- _CSparseCOOIndexPtr csi
- csi = <_CSparseCOOIndexPtr>(self.stp.sparse_index().get())
- if csi != nullptr:
- return csi.is_canonical()
- return True
- cdef class SparseCSRMatrix(_Weakrefable):
- """
- A sparse CSR matrix.
- """
- def __init__(self):
- raise TypeError("Do not call SparseCSRMatrix's constructor directly, "
- "use one of the `pyarrow.SparseCSRMatrix.from_*` "
- "functions instead.")
- cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor):
- self.sp_sparse_tensor = sp_sparse_tensor
- self.stp = sp_sparse_tensor.get()
- self.type = pyarrow_wrap_data_type(self.stp.type())
- def __repr__(self):
- return """<pyarrow.SparseCSRMatrix>
- type: {0.type}
- shape: {0.shape}""".format(self)
- @classmethod
- def from_dense_numpy(cls, obj, dim_names=None):
- """
- Convert numpy.ndarray to arrow::SparseCSRMatrix
- """
- return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
- @staticmethod
- def from_numpy(data, indptr, indices, shape, dim_names=None):
- """
- Create arrow::SparseCSRMatrix from numpy.ndarrays
- """
- cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- # Enforce precondition for SparseCSRMatrix indices
- indptr = np.require(indptr, dtype='i8')
- indices = np.require(indices, dtype='i8')
- if indptr.ndim != 1:
- raise ValueError("Expected 1-dimensional array for "
- "SparseCSRMatrix indptr")
- if indices.ndim != 1:
- raise ValueError("Expected 1-dimensional array for "
- "SparseCSRMatrix indices")
- check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(),
- data, indptr, indices, c_shape,
- c_dim_names, &csparse_tensor))
- return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)
- @staticmethod
- def from_scipy(obj, dim_names=None):
- """
- Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
- """
- import scipy.sparse
- if not isinstance(obj, scipy.sparse.csr_matrix):
- raise TypeError(
- "Expected scipy.sparse.csr_matrix, got {}".format(type(obj)))
- cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in obj.shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- # Enforce precondition for CSparseCSRMatrix indices
- indptr = np.require(obj.indptr, dtype='i8')
- indices = np.require(obj.indices, dtype='i8')
- check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(),
- obj.data, indptr, indices,
- c_shape, c_dim_names,
- &csparse_tensor))
- return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)
- @staticmethod
- def from_tensor(obj):
- """
- Convert arrow::Tensor to arrow::SparseCSRMatrix.
- """
- cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
- cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
- with nogil:
- check_status(TensorToSparseCSRMatrix(ctensor, &csparse_tensor))
- return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)
- def to_numpy(self):
- """
- Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
- """
- cdef PyObject* out_data
- cdef PyObject* out_indptr
- cdef PyObject* out_indices
- check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_indptr,
- &out_indices))
- return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
- PyObject_to_object(out_indices))
- def to_scipy(self):
- """
- Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix.
- """
- from scipy.sparse import csr_matrix
- cdef PyObject* out_data
- cdef PyObject* out_indptr
- cdef PyObject* out_indices
- check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_indptr,
- &out_indices))
- data = PyObject_to_object(out_data)
- indptr = PyObject_to_object(out_indptr)
- indices = PyObject_to_object(out_indices)
- result = csr_matrix((data[:, 0], indices, indptr), shape=self.shape)
- return result
- def to_tensor(self):
- """
- Convert arrow::SparseCSRMatrix to arrow::Tensor.
- """
- cdef shared_ptr[CTensor] ctensor
- with nogil:
- ctensor = GetResultValue(self.stp.ToTensor())
- return pyarrow_wrap_tensor(ctensor)
- def equals(self, SparseCSRMatrix other):
- """
- Return true if sparse tensors contains exactly equal data.
- """
- return self.stp.Equals(deref(other.stp))
- def __eq__(self, other):
- if isinstance(other, SparseCSRMatrix):
- return self.equals(other)
- else:
- return NotImplemented
- @property
- def is_mutable(self):
- return self.stp.is_mutable()
- @property
- def ndim(self):
- return self.stp.ndim()
- @property
- def shape(self):
- # Cython knows how to convert a vector[T] to a Python list
- return tuple(self.stp.shape())
- @property
- def size(self):
- return self.stp.size()
- def dim_name(self, i):
- return frombytes(self.stp.dim_name(i))
- @property
- def dim_names(self):
- return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
- @property
- def non_zero_length(self):
- return self.stp.non_zero_length()
- cdef class SparseCSCMatrix(_Weakrefable):
- """
- A sparse CSC matrix.
- """
- def __init__(self):
- raise TypeError("Do not call SparseCSCMatrix's constructor directly, "
- "use one of the `pyarrow.SparseCSCMatrix.from_*` "
- "functions instead.")
- cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor):
- self.sp_sparse_tensor = sp_sparse_tensor
- self.stp = sp_sparse_tensor.get()
- self.type = pyarrow_wrap_data_type(self.stp.type())
- def __repr__(self):
- return """<pyarrow.SparseCSCMatrix>
- type: {0.type}
- shape: {0.shape}""".format(self)
- @classmethod
- def from_dense_numpy(cls, obj, dim_names=None):
- """
- Convert numpy.ndarray to arrow::SparseCSCMatrix
- """
- return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
- @staticmethod
- def from_numpy(data, indptr, indices, shape, dim_names=None):
- """
- Create arrow::SparseCSCMatrix from numpy.ndarrays
- """
- cdef shared_ptr[CSparseCSCMatrix] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- # Enforce precondition for SparseCSCMatrix indices
- indptr = np.require(indptr, dtype='i8')
- indices = np.require(indices, dtype='i8')
- if indptr.ndim != 1:
- raise ValueError("Expected 1-dimensional array for "
- "SparseCSCMatrix indptr")
- if indices.ndim != 1:
- raise ValueError("Expected 1-dimensional array for "
- "SparseCSCMatrix indices")
- check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(),
- data, indptr, indices, c_shape,
- c_dim_names, &csparse_tensor))
- return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
- @staticmethod
- def from_scipy(obj, dim_names=None):
- """
- Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
- """
- import scipy.sparse
- if not isinstance(obj, scipy.sparse.csc_matrix):
- raise TypeError(
- "Expected scipy.sparse.csc_matrix, got {}".format(type(obj)))
- cdef shared_ptr[CSparseCSCMatrix] csparse_tensor
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in obj.shape:
- c_shape.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- # Enforce precondition for CSparseCSCMatrix indices
- indptr = np.require(obj.indptr, dtype='i8')
- indices = np.require(obj.indices, dtype='i8')
- check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(),
- obj.data, indptr, indices,
- c_shape, c_dim_names,
- &csparse_tensor))
- return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
- @staticmethod
- def from_tensor(obj):
- """
- Convert arrow::Tensor to arrow::SparseCSCMatrix
- """
- cdef shared_ptr[CSparseCSCMatrix] csparse_tensor
- cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
- with nogil:
- check_status(TensorToSparseCSCMatrix(ctensor, &csparse_tensor))
- return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
- def to_numpy(self):
- """
- Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
- """
- cdef PyObject* out_data
- cdef PyObject* out_indptr
- cdef PyObject* out_indices
- check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_indptr,
- &out_indices))
- return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
- PyObject_to_object(out_indices))
- def to_scipy(self):
- """
- Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix
- """
- from scipy.sparse import csc_matrix
- cdef PyObject* out_data
- cdef PyObject* out_indptr
- cdef PyObject* out_indices
- check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_indptr,
- &out_indices))
- data = PyObject_to_object(out_data)
- indptr = PyObject_to_object(out_indptr)
- indices = PyObject_to_object(out_indices)
- result = csc_matrix((data[:, 0], indices, indptr), shape=self.shape)
- return result
- def to_tensor(self):
- """
- Convert arrow::SparseCSCMatrix to arrow::Tensor
- """
- cdef shared_ptr[CTensor] ctensor
- with nogil:
- ctensor = GetResultValue(self.stp.ToTensor())
- return pyarrow_wrap_tensor(ctensor)
- def equals(self, SparseCSCMatrix other):
- """
- Return true if sparse tensors contains exactly equal data
- """
- return self.stp.Equals(deref(other.stp))
- def __eq__(self, other):
- if isinstance(other, SparseCSCMatrix):
- return self.equals(other)
- else:
- return NotImplemented
- @property
- def is_mutable(self):
- return self.stp.is_mutable()
- @property
- def ndim(self):
- return self.stp.ndim()
- @property
- def shape(self):
- # Cython knows how to convert a vector[T] to a Python list
- return tuple(self.stp.shape())
- @property
- def size(self):
- return self.stp.size()
- def dim_name(self, i):
- return frombytes(self.stp.dim_name(i))
- @property
- def dim_names(self):
- return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
- @property
- def non_zero_length(self):
- return self.stp.non_zero_length()
- cdef class SparseCSFTensor(_Weakrefable):
- """
- A sparse CSF tensor.
- """
- def __init__(self):
- raise TypeError("Do not call SparseCSFTensor's constructor directly, "
- "use one of the `pyarrow.SparseCSFTensor.from_*` "
- "functions instead.")
- cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor):
- self.sp_sparse_tensor = sp_sparse_tensor
- self.stp = sp_sparse_tensor.get()
- self.type = pyarrow_wrap_data_type(self.stp.type())
- def __repr__(self):
- return """<pyarrow.SparseCSFTensor>
- type: {0.type}
- shape: {0.shape}""".format(self)
- @classmethod
- def from_dense_numpy(cls, obj, dim_names=None):
- """
- Convert numpy.ndarray to arrow::SparseCSFTensor
- """
- return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
- @staticmethod
- def from_numpy(data, indptr, indices, shape, axis_order=None,
- dim_names=None):
- """
- Create arrow::SparseCSFTensor from numpy.ndarrays
- """
- cdef shared_ptr[CSparseCSFTensor] csparse_tensor
- cdef vector[int64_t] c_axis_order
- cdef vector[int64_t] c_shape
- cdef vector[c_string] c_dim_names
- for x in shape:
- c_shape.push_back(x)
- if not axis_order:
- axis_order = np.argsort(shape)
- for x in axis_order:
- c_axis_order.push_back(x)
- if dim_names is not None:
- for x in dim_names:
- c_dim_names.push_back(tobytes(x))
- # Enforce preconditions for SparseCSFTensor indices
- if not (isinstance(indptr, (list, tuple)) and
- isinstance(indices, (list, tuple))):
- raise TypeError("Expected list or tuple, got {}, {}"
- .format(type(indptr), type(indices)))
- if len(indptr) != len(shape) - 1:
- raise ValueError("Expected list of {ndim} np.arrays for "
- "SparseCSFTensor.indptr".format(ndim=len(shape)))
- if len(indices) != len(shape):
- raise ValueError("Expected list of {ndim} np.arrays for "
- "SparseCSFTensor.indices".format(ndim=len(shape)))
- if any([x.ndim != 1 for x in indptr]):
- raise ValueError("Expected a list of 1-dimensional arrays for "
- "SparseCSFTensor.indptr")
- if any([x.ndim != 1 for x in indices]):
- raise ValueError("Expected a list of 1-dimensional arrays for "
- "SparseCSFTensor.indices")
- indptr = [np.require(arr, dtype='i8') for arr in indptr]
- indices = [np.require(arr, dtype='i8') for arr in indices]
- check_status(NdarraysToSparseCSFTensor(c_default_memory_pool(), data,
- indptr, indices, c_shape,
- c_axis_order, c_dim_names,
- &csparse_tensor))
- return pyarrow_wrap_sparse_csf_tensor(csparse_tensor)
- @staticmethod
- def from_tensor(obj):
- """
- Convert arrow::Tensor to arrow::SparseCSFTensor
- """
- cdef shared_ptr[CSparseCSFTensor] csparse_tensor
- cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
- with nogil:
- check_status(TensorToSparseCSFTensor(ctensor, &csparse_tensor))
- return pyarrow_wrap_sparse_csf_tensor(csparse_tensor)
- def to_numpy(self):
- """
- Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
- """
- cdef PyObject* out_data
- cdef PyObject* out_indptr
- cdef PyObject* out_indices
- check_status(SparseCSFTensorToNdarray(self.sp_sparse_tensor, self,
- &out_data, &out_indptr,
- &out_indices))
- return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
- PyObject_to_object(out_indices))
- def to_tensor(self):
- """
- Convert arrow::SparseCSFTensor to arrow::Tensor
- """
- cdef shared_ptr[CTensor] ctensor
- with nogil:
- ctensor = GetResultValue(self.stp.ToTensor())
- return pyarrow_wrap_tensor(ctensor)
- def equals(self, SparseCSFTensor other):
- """
- Return true if sparse tensors contains exactly equal data
- """
- return self.stp.Equals(deref(other.stp))
- def __eq__(self, other):
- if isinstance(other, SparseCSFTensor):
- return self.equals(other)
- else:
- return NotImplemented
- @property
- def is_mutable(self):
- return self.stp.is_mutable()
- @property
- def ndim(self):
- return self.stp.ndim()
- @property
- def shape(self):
- # Cython knows how to convert a vector[T] to a Python list
- return tuple(self.stp.shape())
- @property
- def size(self):
- return self.stp.size()
- def dim_name(self, i):
- return frombytes(self.stp.dim_name(i))
- @property
- def dim_names(self):
- return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
- @property
- def non_zero_length(self):
- return self.stp.non_zero_length()
|