tensor.pxi 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. cdef class Tensor(_Weakrefable):
  18. """
  19. A n-dimensional array a.k.a Tensor.
  20. """
  21. def __init__(self):
  22. raise TypeError("Do not call Tensor's constructor directly, use one "
  23. "of the `pyarrow.Tensor.from_*` functions instead.")
  24. cdef void init(self, const shared_ptr[CTensor]& sp_tensor):
  25. self.sp_tensor = sp_tensor
  26. self.tp = sp_tensor.get()
  27. self.type = pyarrow_wrap_data_type(self.tp.type())
  28. def __repr__(self):
  29. return """<pyarrow.Tensor>
  30. type: {0.type}
  31. shape: {0.shape}
  32. strides: {0.strides}""".format(self)
  33. @staticmethod
  34. def from_numpy(obj, dim_names=None):
  35. cdef:
  36. vector[c_string] c_dim_names
  37. shared_ptr[CTensor] ctensor
  38. if dim_names is not None:
  39. for x in dim_names:
  40. c_dim_names.push_back(tobytes(x))
  41. check_status(NdarrayToTensor(c_default_memory_pool(), obj,
  42. c_dim_names, &ctensor))
  43. return pyarrow_wrap_tensor(ctensor)
  44. def to_numpy(self):
  45. """
  46. Convert arrow::Tensor to numpy.ndarray with zero copy
  47. """
  48. cdef PyObject* out
  49. check_status(TensorToNdarray(self.sp_tensor, self, &out))
  50. return PyObject_to_object(out)
  51. def equals(self, Tensor other):
  52. """
  53. Return true if the tensors contains exactly equal data
  54. """
  55. return self.tp.Equals(deref(other.tp))
  56. def __eq__(self, other):
  57. if isinstance(other, Tensor):
  58. return self.equals(other)
  59. else:
  60. return NotImplemented
  61. def dim_name(self, i):
  62. return frombytes(self.tp.dim_name(i))
  63. @property
  64. def dim_names(self):
  65. return [frombytes(x) for x in tuple(self.tp.dim_names())]
  66. @property
  67. def is_mutable(self):
  68. return self.tp.is_mutable()
  69. @property
  70. def is_contiguous(self):
  71. return self.tp.is_contiguous()
  72. @property
  73. def ndim(self):
  74. return self.tp.ndim()
  75. @property
  76. def size(self):
  77. return self.tp.size()
  78. @property
  79. def shape(self):
  80. # Cython knows how to convert a vector[T] to a Python list
  81. return tuple(self.tp.shape())
  82. @property
  83. def strides(self):
  84. return tuple(self.tp.strides())
  85. def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
  86. buffer.buf = <char *> self.tp.data().get().data()
  87. pep3118_format = self.type.pep3118_format
  88. if pep3118_format is None:
  89. raise NotImplementedError("type %s not supported for buffer "
  90. "protocol" % (self.type,))
  91. buffer.format = pep3118_format
  92. buffer.itemsize = self.type.bit_width // 8
  93. buffer.internal = NULL
  94. buffer.len = self.tp.size() * buffer.itemsize
  95. buffer.ndim = self.tp.ndim()
  96. buffer.obj = self
  97. if self.tp.is_mutable():
  98. buffer.readonly = 0
  99. else:
  100. buffer.readonly = 1
  101. # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
  102. # and strides arrays lifetime is tied to the tensor's
  103. buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
  104. buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
  105. buffer.suboffsets = NULL
  106. ctypedef CSparseCOOIndex* _CSparseCOOIndexPtr
  107. cdef class SparseCOOTensor(_Weakrefable):
  108. """
  109. A sparse COO tensor.
  110. """
  111. def __init__(self):
  112. raise TypeError("Do not call SparseCOOTensor's constructor directly, "
  113. "use one of the `pyarrow.SparseCOOTensor.from_*` "
  114. "functions instead.")
  115. cdef void init(self, const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor):
  116. self.sp_sparse_tensor = sp_sparse_tensor
  117. self.stp = sp_sparse_tensor.get()
  118. self.type = pyarrow_wrap_data_type(self.stp.type())
  119. def __repr__(self):
  120. return """<pyarrow.SparseCOOTensor>
  121. type: {0.type}
  122. shape: {0.shape}""".format(self)
  123. @classmethod
  124. def from_dense_numpy(cls, obj, dim_names=None):
  125. """
  126. Convert numpy.ndarray to arrow::SparseCOOTensor
  127. """
  128. return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
  129. @staticmethod
  130. def from_numpy(data, coords, shape, dim_names=None):
  131. """
  132. Create arrow::SparseCOOTensor from numpy.ndarrays
  133. """
  134. cdef shared_ptr[CSparseCOOTensor] csparse_tensor
  135. cdef vector[int64_t] c_shape
  136. cdef vector[c_string] c_dim_names
  137. for x in shape:
  138. c_shape.push_back(x)
  139. if dim_names is not None:
  140. for x in dim_names:
  141. c_dim_names.push_back(tobytes(x))
  142. # Enforce precondition for SparseCOOTensor indices
  143. coords = np.require(coords, dtype='i8', requirements='C')
  144. if coords.ndim != 2:
  145. raise ValueError("Expected 2-dimensional array for "
  146. "SparseCOOTensor indices")
  147. check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
  148. data, coords, c_shape,
  149. c_dim_names, &csparse_tensor))
  150. return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
  151. @staticmethod
  152. def from_scipy(obj, dim_names=None):
  153. """
  154. Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor
  155. """
  156. import scipy.sparse
  157. if not isinstance(obj, scipy.sparse.coo_matrix):
  158. raise TypeError(
  159. "Expected scipy.sparse.coo_matrix, got {}".format(type(obj)))
  160. cdef shared_ptr[CSparseCOOTensor] csparse_tensor
  161. cdef vector[int64_t] c_shape
  162. cdef vector[c_string] c_dim_names
  163. for x in obj.shape:
  164. c_shape.push_back(x)
  165. if dim_names is not None:
  166. for x in dim_names:
  167. c_dim_names.push_back(tobytes(x))
  168. row = obj.row
  169. col = obj.col
  170. # When SciPy's coo_matrix has canonical format, its indices matrix is
  171. # sorted in column-major order. As Arrow's SparseCOOIndex is sorted
  172. # in row-major order if it is canonical, we must sort indices matrix
  173. # into row-major order to keep its canonicalness, here.
  174. if obj.has_canonical_format:
  175. order = np.lexsort((col, row)) # sort in row-major order
  176. row = row[order]
  177. col = col[order]
  178. coords = np.vstack([row, col]).T
  179. coords = np.require(coords, dtype='i8', requirements='C')
  180. check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
  181. obj.data, coords, c_shape,
  182. c_dim_names, &csparse_tensor))
  183. return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
  184. @staticmethod
  185. def from_pydata_sparse(obj, dim_names=None):
  186. """
  187. Convert pydata/sparse.COO to arrow::SparseCOOTensor.
  188. """
  189. import sparse
  190. if not isinstance(obj, sparse.COO):
  191. raise TypeError(
  192. "Expected sparse.COO, got {}".format(type(obj)))
  193. cdef shared_ptr[CSparseCOOTensor] csparse_tensor
  194. cdef vector[int64_t] c_shape
  195. cdef vector[c_string] c_dim_names
  196. for x in obj.shape:
  197. c_shape.push_back(x)
  198. if dim_names is not None:
  199. for x in dim_names:
  200. c_dim_names.push_back(tobytes(x))
  201. coords = np.require(obj.coords.T, dtype='i8', requirements='C')
  202. check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
  203. obj.data, coords, c_shape,
  204. c_dim_names, &csparse_tensor))
  205. return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
  206. @staticmethod
  207. def from_tensor(obj):
  208. """
  209. Convert arrow::Tensor to arrow::SparseCOOTensor.
  210. """
  211. cdef shared_ptr[CSparseCOOTensor] csparse_tensor
  212. cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
  213. with nogil:
  214. check_status(TensorToSparseCOOTensor(ctensor, &csparse_tensor))
  215. return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)
  216. def to_numpy(self):
  217. """
  218. Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
  219. """
  220. cdef PyObject* out_data
  221. cdef PyObject* out_coords
  222. check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self,
  223. &out_data, &out_coords))
  224. return PyObject_to_object(out_data), PyObject_to_object(out_coords)
  225. def to_scipy(self):
  226. """
  227. Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix.
  228. """
  229. from scipy.sparse import coo_matrix
  230. cdef PyObject* out_data
  231. cdef PyObject* out_coords
  232. check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self,
  233. &out_data, &out_coords))
  234. data = PyObject_to_object(out_data)
  235. coords = PyObject_to_object(out_coords)
  236. row, col = coords[:, 0], coords[:, 1]
  237. result = coo_matrix((data[:, 0], (row, col)), shape=self.shape)
  238. # As the description in from_scipy above, we sorted indices matrix
  239. # in row-major order if SciPy's coo_matrix has canonical format.
  240. # So, we must call sum_duplicates() to make the result coo_matrix
  241. # has canonical format.
  242. if self.has_canonical_format:
  243. result.sum_duplicates()
  244. return result
  245. def to_pydata_sparse(self):
  246. """
  247. Convert arrow::SparseCOOTensor to pydata/sparse.COO.
  248. """
  249. from sparse import COO
  250. cdef PyObject* out_data
  251. cdef PyObject* out_coords
  252. check_status(SparseCOOTensorToNdarray(self.sp_sparse_tensor, self,
  253. &out_data, &out_coords))
  254. data = PyObject_to_object(out_data)
  255. coords = PyObject_to_object(out_coords)
  256. result = COO(data=data[:, 0], coords=coords.T, shape=self.shape)
  257. return result
  258. def to_tensor(self):
  259. """
  260. Convert arrow::SparseCOOTensor to arrow::Tensor.
  261. """
  262. cdef shared_ptr[CTensor] ctensor
  263. with nogil:
  264. ctensor = GetResultValue(self.stp.ToTensor())
  265. return pyarrow_wrap_tensor(ctensor)
  266. def equals(self, SparseCOOTensor other):
  267. """
  268. Return true if sparse tensors contains exactly equal data.
  269. """
  270. return self.stp.Equals(deref(other.stp))
  271. def __eq__(self, other):
  272. if isinstance(other, SparseCOOTensor):
  273. return self.equals(other)
  274. else:
  275. return NotImplemented
  276. @property
  277. def is_mutable(self):
  278. return self.stp.is_mutable()
  279. @property
  280. def ndim(self):
  281. return self.stp.ndim()
  282. @property
  283. def shape(self):
  284. # Cython knows how to convert a vector[T] to a Python list
  285. return tuple(self.stp.shape())
  286. @property
  287. def size(self):
  288. return self.stp.size()
  289. def dim_name(self, i):
  290. return frombytes(self.stp.dim_name(i))
  291. @property
  292. def dim_names(self):
  293. return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
  294. @property
  295. def non_zero_length(self):
  296. return self.stp.non_zero_length()
  297. @property
  298. def has_canonical_format(self):
  299. cdef:
  300. _CSparseCOOIndexPtr csi
  301. csi = <_CSparseCOOIndexPtr>(self.stp.sparse_index().get())
  302. if csi != nullptr:
  303. return csi.is_canonical()
  304. return True
  305. cdef class SparseCSRMatrix(_Weakrefable):
  306. """
  307. A sparse CSR matrix.
  308. """
  309. def __init__(self):
  310. raise TypeError("Do not call SparseCSRMatrix's constructor directly, "
  311. "use one of the `pyarrow.SparseCSRMatrix.from_*` "
  312. "functions instead.")
  313. cdef void init(self, const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor):
  314. self.sp_sparse_tensor = sp_sparse_tensor
  315. self.stp = sp_sparse_tensor.get()
  316. self.type = pyarrow_wrap_data_type(self.stp.type())
  317. def __repr__(self):
  318. return """<pyarrow.SparseCSRMatrix>
  319. type: {0.type}
  320. shape: {0.shape}""".format(self)
  321. @classmethod
  322. def from_dense_numpy(cls, obj, dim_names=None):
  323. """
  324. Convert numpy.ndarray to arrow::SparseCSRMatrix
  325. """
  326. return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
  327. @staticmethod
  328. def from_numpy(data, indptr, indices, shape, dim_names=None):
  329. """
  330. Create arrow::SparseCSRMatrix from numpy.ndarrays
  331. """
  332. cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
  333. cdef vector[int64_t] c_shape
  334. cdef vector[c_string] c_dim_names
  335. for x in shape:
  336. c_shape.push_back(x)
  337. if dim_names is not None:
  338. for x in dim_names:
  339. c_dim_names.push_back(tobytes(x))
  340. # Enforce precondition for SparseCSRMatrix indices
  341. indptr = np.require(indptr, dtype='i8')
  342. indices = np.require(indices, dtype='i8')
  343. if indptr.ndim != 1:
  344. raise ValueError("Expected 1-dimensional array for "
  345. "SparseCSRMatrix indptr")
  346. if indices.ndim != 1:
  347. raise ValueError("Expected 1-dimensional array for "
  348. "SparseCSRMatrix indices")
  349. check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(),
  350. data, indptr, indices, c_shape,
  351. c_dim_names, &csparse_tensor))
  352. return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)
  353. @staticmethod
  354. def from_scipy(obj, dim_names=None):
  355. """
  356. Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
  357. """
  358. import scipy.sparse
  359. if not isinstance(obj, scipy.sparse.csr_matrix):
  360. raise TypeError(
  361. "Expected scipy.sparse.csr_matrix, got {}".format(type(obj)))
  362. cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
  363. cdef vector[int64_t] c_shape
  364. cdef vector[c_string] c_dim_names
  365. for x in obj.shape:
  366. c_shape.push_back(x)
  367. if dim_names is not None:
  368. for x in dim_names:
  369. c_dim_names.push_back(tobytes(x))
  370. # Enforce precondition for CSparseCSRMatrix indices
  371. indptr = np.require(obj.indptr, dtype='i8')
  372. indices = np.require(obj.indices, dtype='i8')
  373. check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(),
  374. obj.data, indptr, indices,
  375. c_shape, c_dim_names,
  376. &csparse_tensor))
  377. return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)
  378. @staticmethod
  379. def from_tensor(obj):
  380. """
  381. Convert arrow::Tensor to arrow::SparseCSRMatrix.
  382. """
  383. cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
  384. cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
  385. with nogil:
  386. check_status(TensorToSparseCSRMatrix(ctensor, &csparse_tensor))
  387. return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)
  388. def to_numpy(self):
  389. """
  390. Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
  391. """
  392. cdef PyObject* out_data
  393. cdef PyObject* out_indptr
  394. cdef PyObject* out_indices
  395. check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self,
  396. &out_data, &out_indptr,
  397. &out_indices))
  398. return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
  399. PyObject_to_object(out_indices))
  400. def to_scipy(self):
  401. """
  402. Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix.
  403. """
  404. from scipy.sparse import csr_matrix
  405. cdef PyObject* out_data
  406. cdef PyObject* out_indptr
  407. cdef PyObject* out_indices
  408. check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self,
  409. &out_data, &out_indptr,
  410. &out_indices))
  411. data = PyObject_to_object(out_data)
  412. indptr = PyObject_to_object(out_indptr)
  413. indices = PyObject_to_object(out_indices)
  414. result = csr_matrix((data[:, 0], indices, indptr), shape=self.shape)
  415. return result
  416. def to_tensor(self):
  417. """
  418. Convert arrow::SparseCSRMatrix to arrow::Tensor.
  419. """
  420. cdef shared_ptr[CTensor] ctensor
  421. with nogil:
  422. ctensor = GetResultValue(self.stp.ToTensor())
  423. return pyarrow_wrap_tensor(ctensor)
  424. def equals(self, SparseCSRMatrix other):
  425. """
  426. Return true if sparse tensors contains exactly equal data.
  427. """
  428. return self.stp.Equals(deref(other.stp))
  429. def __eq__(self, other):
  430. if isinstance(other, SparseCSRMatrix):
  431. return self.equals(other)
  432. else:
  433. return NotImplemented
  434. @property
  435. def is_mutable(self):
  436. return self.stp.is_mutable()
  437. @property
  438. def ndim(self):
  439. return self.stp.ndim()
  440. @property
  441. def shape(self):
  442. # Cython knows how to convert a vector[T] to a Python list
  443. return tuple(self.stp.shape())
  444. @property
  445. def size(self):
  446. return self.stp.size()
  447. def dim_name(self, i):
  448. return frombytes(self.stp.dim_name(i))
  449. @property
  450. def dim_names(self):
  451. return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
  452. @property
  453. def non_zero_length(self):
  454. return self.stp.non_zero_length()
  455. cdef class SparseCSCMatrix(_Weakrefable):
  456. """
  457. A sparse CSC matrix.
  458. """
  459. def __init__(self):
  460. raise TypeError("Do not call SparseCSCMatrix's constructor directly, "
  461. "use one of the `pyarrow.SparseCSCMatrix.from_*` "
  462. "functions instead.")
  463. cdef void init(self, const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor):
  464. self.sp_sparse_tensor = sp_sparse_tensor
  465. self.stp = sp_sparse_tensor.get()
  466. self.type = pyarrow_wrap_data_type(self.stp.type())
  467. def __repr__(self):
  468. return """<pyarrow.SparseCSCMatrix>
  469. type: {0.type}
  470. shape: {0.shape}""".format(self)
  471. @classmethod
  472. def from_dense_numpy(cls, obj, dim_names=None):
  473. """
  474. Convert numpy.ndarray to arrow::SparseCSCMatrix
  475. """
  476. return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
  477. @staticmethod
  478. def from_numpy(data, indptr, indices, shape, dim_names=None):
  479. """
  480. Create arrow::SparseCSCMatrix from numpy.ndarrays
  481. """
  482. cdef shared_ptr[CSparseCSCMatrix] csparse_tensor
  483. cdef vector[int64_t] c_shape
  484. cdef vector[c_string] c_dim_names
  485. for x in shape:
  486. c_shape.push_back(x)
  487. if dim_names is not None:
  488. for x in dim_names:
  489. c_dim_names.push_back(tobytes(x))
  490. # Enforce precondition for SparseCSCMatrix indices
  491. indptr = np.require(indptr, dtype='i8')
  492. indices = np.require(indices, dtype='i8')
  493. if indptr.ndim != 1:
  494. raise ValueError("Expected 1-dimensional array for "
  495. "SparseCSCMatrix indptr")
  496. if indices.ndim != 1:
  497. raise ValueError("Expected 1-dimensional array for "
  498. "SparseCSCMatrix indices")
  499. check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(),
  500. data, indptr, indices, c_shape,
  501. c_dim_names, &csparse_tensor))
  502. return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
  503. @staticmethod
  504. def from_scipy(obj, dim_names=None):
  505. """
  506. Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
  507. """
  508. import scipy.sparse
  509. if not isinstance(obj, scipy.sparse.csc_matrix):
  510. raise TypeError(
  511. "Expected scipy.sparse.csc_matrix, got {}".format(type(obj)))
  512. cdef shared_ptr[CSparseCSCMatrix] csparse_tensor
  513. cdef vector[int64_t] c_shape
  514. cdef vector[c_string] c_dim_names
  515. for x in obj.shape:
  516. c_shape.push_back(x)
  517. if dim_names is not None:
  518. for x in dim_names:
  519. c_dim_names.push_back(tobytes(x))
  520. # Enforce precondition for CSparseCSCMatrix indices
  521. indptr = np.require(obj.indptr, dtype='i8')
  522. indices = np.require(obj.indices, dtype='i8')
  523. check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(),
  524. obj.data, indptr, indices,
  525. c_shape, c_dim_names,
  526. &csparse_tensor))
  527. return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
  528. @staticmethod
  529. def from_tensor(obj):
  530. """
  531. Convert arrow::Tensor to arrow::SparseCSCMatrix
  532. """
  533. cdef shared_ptr[CSparseCSCMatrix] csparse_tensor
  534. cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
  535. with nogil:
  536. check_status(TensorToSparseCSCMatrix(ctensor, &csparse_tensor))
  537. return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
  538. def to_numpy(self):
  539. """
  540. Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
  541. """
  542. cdef PyObject* out_data
  543. cdef PyObject* out_indptr
  544. cdef PyObject* out_indices
  545. check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self,
  546. &out_data, &out_indptr,
  547. &out_indices))
  548. return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
  549. PyObject_to_object(out_indices))
  550. def to_scipy(self):
  551. """
  552. Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix
  553. """
  554. from scipy.sparse import csc_matrix
  555. cdef PyObject* out_data
  556. cdef PyObject* out_indptr
  557. cdef PyObject* out_indices
  558. check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self,
  559. &out_data, &out_indptr,
  560. &out_indices))
  561. data = PyObject_to_object(out_data)
  562. indptr = PyObject_to_object(out_indptr)
  563. indices = PyObject_to_object(out_indices)
  564. result = csc_matrix((data[:, 0], indices, indptr), shape=self.shape)
  565. return result
  566. def to_tensor(self):
  567. """
  568. Convert arrow::SparseCSCMatrix to arrow::Tensor
  569. """
  570. cdef shared_ptr[CTensor] ctensor
  571. with nogil:
  572. ctensor = GetResultValue(self.stp.ToTensor())
  573. return pyarrow_wrap_tensor(ctensor)
  574. def equals(self, SparseCSCMatrix other):
  575. """
  576. Return true if sparse tensors contains exactly equal data
  577. """
  578. return self.stp.Equals(deref(other.stp))
  579. def __eq__(self, other):
  580. if isinstance(other, SparseCSCMatrix):
  581. return self.equals(other)
  582. else:
  583. return NotImplemented
  584. @property
  585. def is_mutable(self):
  586. return self.stp.is_mutable()
  587. @property
  588. def ndim(self):
  589. return self.stp.ndim()
  590. @property
  591. def shape(self):
  592. # Cython knows how to convert a vector[T] to a Python list
  593. return tuple(self.stp.shape())
  594. @property
  595. def size(self):
  596. return self.stp.size()
  597. def dim_name(self, i):
  598. return frombytes(self.stp.dim_name(i))
  599. @property
  600. def dim_names(self):
  601. return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
  602. @property
  603. def non_zero_length(self):
  604. return self.stp.non_zero_length()
  605. cdef class SparseCSFTensor(_Weakrefable):
  606. """
  607. A sparse CSF tensor.
  608. """
  609. def __init__(self):
  610. raise TypeError("Do not call SparseCSFTensor's constructor directly, "
  611. "use one of the `pyarrow.SparseCSFTensor.from_*` "
  612. "functions instead.")
  613. cdef void init(self, const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor):
  614. self.sp_sparse_tensor = sp_sparse_tensor
  615. self.stp = sp_sparse_tensor.get()
  616. self.type = pyarrow_wrap_data_type(self.stp.type())
  617. def __repr__(self):
  618. return """<pyarrow.SparseCSFTensor>
  619. type: {0.type}
  620. shape: {0.shape}""".format(self)
  621. @classmethod
  622. def from_dense_numpy(cls, obj, dim_names=None):
  623. """
  624. Convert numpy.ndarray to arrow::SparseCSFTensor
  625. """
  626. return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
  627. @staticmethod
  628. def from_numpy(data, indptr, indices, shape, axis_order=None,
  629. dim_names=None):
  630. """
  631. Create arrow::SparseCSFTensor from numpy.ndarrays
  632. """
  633. cdef shared_ptr[CSparseCSFTensor] csparse_tensor
  634. cdef vector[int64_t] c_axis_order
  635. cdef vector[int64_t] c_shape
  636. cdef vector[c_string] c_dim_names
  637. for x in shape:
  638. c_shape.push_back(x)
  639. if not axis_order:
  640. axis_order = np.argsort(shape)
  641. for x in axis_order:
  642. c_axis_order.push_back(x)
  643. if dim_names is not None:
  644. for x in dim_names:
  645. c_dim_names.push_back(tobytes(x))
  646. # Enforce preconditions for SparseCSFTensor indices
  647. if not (isinstance(indptr, (list, tuple)) and
  648. isinstance(indices, (list, tuple))):
  649. raise TypeError("Expected list or tuple, got {}, {}"
  650. .format(type(indptr), type(indices)))
  651. if len(indptr) != len(shape) - 1:
  652. raise ValueError("Expected list of {ndim} np.arrays for "
  653. "SparseCSFTensor.indptr".format(ndim=len(shape)))
  654. if len(indices) != len(shape):
  655. raise ValueError("Expected list of {ndim} np.arrays for "
  656. "SparseCSFTensor.indices".format(ndim=len(shape)))
  657. if any([x.ndim != 1 for x in indptr]):
  658. raise ValueError("Expected a list of 1-dimensional arrays for "
  659. "SparseCSFTensor.indptr")
  660. if any([x.ndim != 1 for x in indices]):
  661. raise ValueError("Expected a list of 1-dimensional arrays for "
  662. "SparseCSFTensor.indices")
  663. indptr = [np.require(arr, dtype='i8') for arr in indptr]
  664. indices = [np.require(arr, dtype='i8') for arr in indices]
  665. check_status(NdarraysToSparseCSFTensor(c_default_memory_pool(), data,
  666. indptr, indices, c_shape,
  667. c_axis_order, c_dim_names,
  668. &csparse_tensor))
  669. return pyarrow_wrap_sparse_csf_tensor(csparse_tensor)
  670. @staticmethod
  671. def from_tensor(obj):
  672. """
  673. Convert arrow::Tensor to arrow::SparseCSFTensor
  674. """
  675. cdef shared_ptr[CSparseCSFTensor] csparse_tensor
  676. cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
  677. with nogil:
  678. check_status(TensorToSparseCSFTensor(ctensor, &csparse_tensor))
  679. return pyarrow_wrap_sparse_csf_tensor(csparse_tensor)
  680. def to_numpy(self):
  681. """
  682. Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
  683. """
  684. cdef PyObject* out_data
  685. cdef PyObject* out_indptr
  686. cdef PyObject* out_indices
  687. check_status(SparseCSFTensorToNdarray(self.sp_sparse_tensor, self,
  688. &out_data, &out_indptr,
  689. &out_indices))
  690. return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
  691. PyObject_to_object(out_indices))
  692. def to_tensor(self):
  693. """
  694. Convert arrow::SparseCSFTensor to arrow::Tensor
  695. """
  696. cdef shared_ptr[CTensor] ctensor
  697. with nogil:
  698. ctensor = GetResultValue(self.stp.ToTensor())
  699. return pyarrow_wrap_tensor(ctensor)
  700. def equals(self, SparseCSFTensor other):
  701. """
  702. Return true if sparse tensors contains exactly equal data
  703. """
  704. return self.stp.Equals(deref(other.stp))
  705. def __eq__(self, other):
  706. if isinstance(other, SparseCSFTensor):
  707. return self.equals(other)
  708. else:
  709. return NotImplemented
  710. @property
  711. def is_mutable(self):
  712. return self.stp.is_mutable()
  713. @property
  714. def ndim(self):
  715. return self.stp.ndim()
  716. @property
  717. def shape(self):
  718. # Cython knows how to convert a vector[T] to a Python list
  719. return tuple(self.stp.shape())
  720. @property
  721. def size(self):
  722. return self.stp.size()
  723. def dim_name(self, i):
  724. return frombytes(self.stp.dim_name(i))
  725. @property
  726. def dim_names(self):
  727. return tuple(frombytes(x) for x in tuple(self.stp.dim_names()))
  728. @property
  729. def non_zero_length(self):
  730. return self.stp.non_zero_length()