py_lazy_mkql_dict.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. #include "py_cast.h"
  2. #include "py_errors.h"
  3. #include "py_gil.h"
  4. #include "py_utils.h"
  5. #include <yql/essentials/public/udf/udf_value.h>
  6. #include <yql/essentials/public/udf/udf_value_builder.h>
  7. #include <yql/essentials/public/udf/udf_type_inspection.h>
  8. #include <yql/essentials/public/udf/udf_terminator.h>
  9. #include <util/generic/maybe.h>
  10. #include <util/string/builder.h>
  11. using namespace NKikimr;
  12. namespace NPython {
  13. namespace {
  14. //////////////////////////////////////////////////////////////////////////////
  15. // TLazyDictBase
  16. //////////////////////////////////////////////////////////////////////////////
  17. class TLazyDictBase: public NUdf::TBoxedValue
  18. {
  19. protected:
  20. class TIterator: public NUdf::TBoxedValue {
  21. public:
  22. TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter)
  23. : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter))
  24. {}
  25. ~TIterator() {
  26. const TPyGilLocker lock;
  27. PyIter_.Reset();
  28. }
  29. private:
  30. bool Skip() override try {
  31. const TPyGilLocker lock;
  32. const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
  33. if (next) {
  34. return true;
  35. }
  36. if (PyErr_Occurred()) {
  37. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  38. }
  39. return false;
  40. } catch (const yexception& e) {
  41. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  42. }
  43. bool Next(NUdf::TUnboxedValue& value) override try {
  44. const TPyGilLocker lock;
  45. const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
  46. if (next) {
  47. value = FromPyObject(CastCtx_, ItemType_, next.Get());
  48. return true;
  49. }
  50. if (PyErr_Occurred()) {
  51. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  52. }
  53. return false;
  54. } catch (const yexception& e) {
  55. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  56. }
  57. bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
  58. payload = NUdf::TUnboxedValuePod::Void();
  59. return Next(key);
  60. }
  61. private:
  62. const TPyCastContext::TPtr CastCtx_;
  63. const NUdf::TType* ItemType_;
  64. TPyObjectPtr PyIter_;
  65. };
  66. class TPairIterator: public NUdf::TBoxedValue {
  67. public:
  68. TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter)
  69. : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter))
  70. {}
  71. ~TPairIterator() {
  72. const TPyGilLocker lock;
  73. PyIter_.Reset();
  74. }
  75. private:
  76. bool Skip() override try {
  77. const TPyGilLocker lock;
  78. const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
  79. if (next) {
  80. return true;
  81. }
  82. if (PyErr_Occurred()) {
  83. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  84. }
  85. return false;
  86. } catch (const yexception& e) {
  87. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  88. }
  89. bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try {
  90. const TPyGilLocker lock;
  91. const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
  92. if (next) {
  93. key = FromPyObject(CastCtx_, KeyType_, PyTuple_GET_ITEM(next.Get(), 0));
  94. pay = FromPyObject(CastCtx_, PayType_, PyTuple_GET_ITEM(next.Get(), 1));
  95. return true;
  96. }
  97. if (PyErr_Occurred()) {
  98. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  99. }
  100. return false;
  101. } catch (const yexception& e) {
  102. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  103. }
  104. private:
  105. const TPyCastContext::TPtr CastCtx_;
  106. const NUdf::TType* KeyType_;
  107. const NUdf::TType* PayType_;
  108. TPyObjectPtr PyIter_;
  109. };
  110. TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject)
  111. : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef())
  112. {}
  113. ~TLazyDictBase() {
  114. const TPyGilLocker lock;
  115. PyObject_.Reset();
  116. }
  117. bool HasDictItems() const override try {
  118. const TPyGilLocker lock;
  119. const auto has = PyObject_IsTrue(PyObject_.Get());
  120. if (has < 0) {
  121. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  122. }
  123. return bool(has);
  124. }
  125. catch (const yexception& e) {
  126. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  127. }
  128. const TPyCastContext::TPtr CastCtx_;
  129. const NUdf::TType* ItemType_;
  130. TPyObjectPtr PyObject_;
  131. };
  132. //////////////////////////////////////////////////////////////////////////////
  133. // TLazyMapping
  134. //////////////////////////////////////////////////////////////////////////////
  135. class TLazyMapping: public TLazyDictBase
  136. {
  137. public:
  138. TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict)
  139. : TLazyDictBase(ctx, keyType, dict), PayType_(payType)
  140. {}
  141. private:
  142. bool IsSortedDict() const override { return false; }
  143. ui64 GetDictLength() const override try {
  144. const TPyGilLocker lock;
  145. const auto len = PyMapping_Size(PyObject_.Get());
  146. if (len < 0) {
  147. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  148. }
  149. return ui64(len);
  150. } catch (const yexception& e) {
  151. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  152. }
  153. NUdf::TUnboxedValue GetKeysIterator() const override try {
  154. const TPyGilLocker lock;
  155. if (const TPyObjectPtr pyList = PyMapping_Keys(PyObject_.Get())) {
  156. if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
  157. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
  158. }
  159. }
  160. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  161. } catch (const yexception& e) {
  162. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  163. }
  164. NUdf::TUnboxedValue GetPayloadsIterator() const override try {
  165. const TPyGilLocker lock;
  166. if (const TPyObjectPtr pyList = PyMapping_Values(PyObject_.Get())) {
  167. if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
  168. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter)));
  169. }
  170. }
  171. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  172. } catch (const yexception& e) {
  173. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  174. }
  175. NUdf::TUnboxedValue GetDictIterator() const override try {
  176. const TPyGilLocker lock;
  177. if (const TPyObjectPtr pyList = PyMapping_Items(PyObject_.Get())) {
  178. if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
  179. return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter)));
  180. }
  181. }
  182. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  183. } catch (const yexception& e) {
  184. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  185. }
  186. NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try {
  187. const TPyGilLocker lock;
  188. if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
  189. if (const auto item = PyObject_GetItem(PyObject_.Get(), pyKey.Get())) {
  190. return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional();
  191. }
  192. if (PyErr_Occurred()) {
  193. PyErr_Clear();
  194. }
  195. return NUdf::TUnboxedValue();
  196. }
  197. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  198. } catch (const yexception& e) {
  199. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  200. }
  201. bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
  202. const TPyGilLocker lock;
  203. if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
  204. const auto map = PyObject_.Get();
  205. const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ?
  206. (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) :
  207. PyMapping_HasKey(map, pyKey.Get());
  208. if (has >= 0) {
  209. return bool(has);
  210. }
  211. }
  212. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  213. } catch (const yexception& e) {
  214. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  215. }
  216. private:
  217. const NUdf::TType* PayType_;
  218. };
  219. //////////////////////////////////////////////////////////////////////////////
  220. // TLazyDict
  221. //////////////////////////////////////////////////////////////////////////////
  222. class TLazyDict: public TLazyDictBase
  223. {
  224. public:
  225. TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict)
  226. : TLazyDictBase(ctx, keyType, dict), PayType_(payType)
  227. {}
  228. private:
  229. bool IsSortedDict() const override { return false; }
  230. ui64 GetDictLength() const override try {
  231. const TPyGilLocker lock;
  232. const auto len = PyDict_Size(PyObject_.Get());
  233. if (len < 0) {
  234. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  235. }
  236. return ui64(len);
  237. } catch (const yexception& e) {
  238. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  239. }
  240. NUdf::TUnboxedValue GetKeysIterator() const override try {
  241. const TPyGilLocker lock;
  242. if (const TPyObjectPtr pyList = PyDict_Keys(PyObject_.Get())) {
  243. if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
  244. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
  245. }
  246. }
  247. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  248. } catch (const yexception& e) {
  249. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  250. }
  251. NUdf::TUnboxedValue GetPayloadsIterator() const override try {
  252. const TPyGilLocker lock;
  253. if (const TPyObjectPtr pyList = PyDict_Values(PyObject_.Get())) {
  254. if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
  255. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter)));
  256. }
  257. }
  258. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  259. } catch (const yexception& e) {
  260. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  261. }
  262. NUdf::TUnboxedValue GetDictIterator() const override try {
  263. const TPyGilLocker lock;
  264. if (const TPyObjectPtr pyList = PyDict_Items(PyObject_.Get())) {
  265. if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
  266. return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter)));
  267. }
  268. }
  269. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  270. } catch (const yexception& e) {
  271. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  272. }
  273. NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try {
  274. const TPyGilLocker lock;
  275. if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
  276. if (const auto item = PyDict_GetItem(PyObject_.Get(), pyKey.Get())) {
  277. return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional();
  278. } else if (!PyErr_Occurred()) {
  279. return NUdf::TUnboxedValue();
  280. }
  281. }
  282. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  283. } catch (const yexception& e) {
  284. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  285. }
  286. bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
  287. const TPyGilLocker lock;
  288. if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
  289. const auto has = PyDict_Contains(PyObject_.Get(), pyKey.Get());
  290. if (has >= 0) {
  291. return bool(has);
  292. }
  293. }
  294. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  295. } catch (const yexception& e) {
  296. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  297. }
  298. private:
  299. const NUdf::TType* PayType_;
  300. };
  301. //////////////////////////////////////////////////////////////////////////////
  302. // TLazySet
  303. //////////////////////////////////////////////////////////////////////////////
  304. class TLazySet: public TLazyDictBase
  305. {
  306. public:
  307. TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set)
  308. : TLazyDictBase(ctx, itemType, set)
  309. {}
  310. private:
  311. bool IsSortedDict() const override { return false; }
  312. ui64 GetDictLength() const override try {
  313. const TPyGilLocker lock;
  314. const auto len = PySet_Size(PyObject_.Get());
  315. if (len < 0) {
  316. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  317. }
  318. return ui64(len);
  319. } catch (const yexception& e) {
  320. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  321. }
  322. NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
  323. return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod();
  324. }
  325. bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
  326. const TPyGilLocker lock;
  327. if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
  328. const auto has = PySet_Contains(PyObject_.Get(), pyKey.Get());
  329. if (has >= 0) {
  330. return bool(has);
  331. }
  332. }
  333. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  334. } catch (const yexception& e) {
  335. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  336. }
  337. NUdf::TUnboxedValue GetKeysIterator() const override try {
  338. const TPyGilLocker lock;
  339. if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) {
  340. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
  341. }
  342. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  343. } catch (const yexception& e) {
  344. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  345. }
  346. NUdf::TUnboxedValue GetPayloadsIterator() const override {
  347. return GetKeysIterator();
  348. }
  349. NUdf::TUnboxedValue GetDictIterator() const override {
  350. return GetKeysIterator();
  351. }
  352. NUdf::TUnboxedValue GetListIterator() const override {
  353. return GetKeysIterator();
  354. }
  355. ui64 GetListLength() const override {
  356. return GetDictLength();
  357. }
  358. bool HasListItems() const override {
  359. return HasDictItems();
  360. }
  361. bool HasFastListLength() const override {
  362. return true;
  363. }
  364. };
  365. //////////////////////////////////////////////////////////////////////////////
  366. // TLazySequenceAsSet
  367. //////////////////////////////////////////////////////////////////////////////
  368. class TLazySequenceAsSet: public TLazyDictBase
  369. {
  370. public:
  371. TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence)
  372. : TLazyDictBase(ctx, keyType, sequence)
  373. {}
  374. private:
  375. bool IsSortedDict() const override { return false; }
  376. ui64 GetDictLength() const override try {
  377. const TPyGilLocker lock;
  378. const auto len = PySequence_Size(PyObject_.Get());
  379. if (len < 0) {
  380. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  381. }
  382. return ui64(len);
  383. } catch (const yexception& e) {
  384. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  385. }
  386. NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
  387. return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod();
  388. }
  389. bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
  390. const TPyGilLocker lock;
  391. if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
  392. const auto has = PySequence_Contains(PyObject_.Get(), pyKey.Get());
  393. if (has >= 0) {
  394. return bool(has);
  395. }
  396. }
  397. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  398. } catch (const yexception& e) {
  399. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  400. }
  401. NUdf::TUnboxedValue GetKeysIterator() const override try {
  402. const TPyGilLocker lock;
  403. if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) {
  404. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
  405. }
  406. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  407. } catch (const yexception& e) {
  408. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  409. }
  410. NUdf::TUnboxedValue GetPayloadsIterator() const override {
  411. return GetKeysIterator();
  412. }
  413. NUdf::TUnboxedValue GetDictIterator() const override {
  414. return GetKeysIterator();
  415. }
  416. NUdf::TUnboxedValue GetListIterator() const override {
  417. return GetKeysIterator();
  418. }
  419. ui64 GetListLength() const override {
  420. return GetDictLength();
  421. }
  422. bool HasListItems() const override {
  423. return HasDictItems();
  424. }
  425. bool HasFastListLength() const override {
  426. return true;
  427. }
  428. };
  429. //////////////////////////////////////////////////////////////////////////////
  430. // TLazySequenceAsDict
  431. //////////////////////////////////////////////////////////////////////////////
  432. template<typename KeyType>
  433. class TLazySequenceAsDict: public NUdf::TBoxedValue
  434. {
  435. private:
  436. class TKeyIterator: public NUdf::TBoxedValue {
  437. public:
  438. TKeyIterator(Py_ssize_t size)
  439. : Size(size), Index(0)
  440. {}
  441. private:
  442. bool Skip() override {
  443. if (Index >= Size)
  444. return false;
  445. ++Index;
  446. return true;
  447. }
  448. bool Next(NUdf::TUnboxedValue& value) override {
  449. if (Index >= Size)
  450. return false;
  451. value = NUdf::TUnboxedValuePod(KeyType(Index++));
  452. return true;
  453. }
  454. private:
  455. const Py_ssize_t Size;
  456. Py_ssize_t Index;
  457. };
  458. class TIterator: public NUdf::TBoxedValue {
  459. public:
  460. TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq)
  461. : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size(size), Index(0)
  462. {}
  463. ~TIterator() {
  464. const TPyGilLocker lock;
  465. PySeq_.Reset();
  466. }
  467. private:
  468. bool Skip() override {
  469. if (Index >= Size)
  470. return false;
  471. ++Index;
  472. return true;
  473. }
  474. bool Next(NUdf::TUnboxedValue& value) override try {
  475. if (Index >= Size)
  476. return false;
  477. const TPyGilLocker lock;
  478. value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++));
  479. return true;
  480. } catch (const yexception& e) {
  481. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  482. }
  483. bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try {
  484. if (Index >= Size)
  485. return false;
  486. const TPyGilLocker lock;
  487. key = NUdf::TUnboxedValuePod(KeyType(Index));
  488. pay = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++));
  489. return true;
  490. } catch (const yexception& e) {
  491. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  492. }
  493. private:
  494. const TPyCastContext::TPtr CastCtx_;
  495. const NUdf::TType* ItemType_;
  496. TPyObjectPtr PySeq_;
  497. const Py_ssize_t Size;
  498. Py_ssize_t Index;
  499. };
  500. public:
  501. TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size)
  502. : CastCtx_(ctx), ItemType_(itemType), Size(size), PySeq_(std::move(sequence))
  503. {}
  504. ~TLazySequenceAsDict()
  505. {
  506. const TPyGilLocker lock;
  507. PySeq_.Reset();
  508. }
  509. private:
  510. bool IsSortedDict() const override { return true; }
  511. bool HasDictItems() const override {
  512. return Size > 0;
  513. }
  514. ui64 GetDictLength() const override {
  515. return Size;
  516. }
  517. NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
  518. const Py_ssize_t index = key.Get<KeyType>();
  519. if (index >= -Size && index < Size) try {
  520. const TPyGilLocker lock;
  521. if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size + index)) {
  522. return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional();
  523. } else if (PyErr_Occurred()) {
  524. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
  525. }
  526. } catch (const yexception& e) {
  527. UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
  528. }
  529. return NUdf::TUnboxedValue();
  530. }
  531. bool Contains(const NUdf::TUnboxedValuePod& key) const override {
  532. const Py_ssize_t index = key.Get<KeyType>();
  533. return index >= -Size && index < Size;
  534. }
  535. NUdf::TUnboxedValue GetKeysIterator() const override {
  536. return NUdf::TUnboxedValuePod(new TKeyIterator(Size));
  537. }
  538. NUdf::TUnboxedValue GetPayloadsIterator() const override {
  539. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_));
  540. }
  541. NUdf::TUnboxedValue GetDictIterator() const override {
  542. return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_));
  543. }
  544. const TPyCastContext::TPtr CastCtx_;
  545. const NUdf::TType* ItemType_;
  546. const Py_ssize_t Size;
  547. TPyObjectPtr PySeq_;
  548. };
  549. } // namspace
  550. NUdf::TUnboxedValue FromPyDict(
  551. const TPyCastContext::TPtr& castCtx,
  552. const NUdf::TType* keyType,
  553. const NUdf::TType* payType,
  554. PyObject* dict)
  555. {
  556. return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict));
  557. }
  558. NUdf::TUnboxedValue FromPyMapping(
  559. const TPyCastContext::TPtr& castCtx,
  560. const NUdf::TType* keyType,
  561. const NUdf::TType* payType,
  562. PyObject* map)
  563. {
  564. return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map));
  565. }
  566. NUdf::TUnboxedValue FromPySet(
  567. const TPyCastContext::TPtr& castCtx,
  568. const NUdf::TType* itemType,
  569. PyObject* set)
  570. {
  571. return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set));
  572. }
  573. NUdf::TUnboxedValue FromPySequence(
  574. const TPyCastContext::TPtr& castCtx,
  575. const NUdf::TType* keyType,
  576. PyObject* set)
  577. {
  578. return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set));
  579. }
  580. NUdf::TUnboxedValue FromPySequence(
  581. const TPyCastContext::TPtr& castCtx,
  582. const NUdf::TType* itemType,
  583. const NUdf::TDataTypeId keyType,
  584. PyObject* sequence)
  585. {
  586. if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) {
  587. const auto size = PySequence_Fast_GET_SIZE(fast.Get());
  588. if (size >= 0) {
  589. switch (keyType) {
  590. #define MAKE_PRIMITIVE_TYPE_SIZE(type) \
  591. case NUdf::TDataType<type>::Id: \
  592. return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size));
  593. INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE)
  594. #undef MAKE_PRIMITIVE_TYPE_SIZE
  595. }
  596. Y_ABORT("Invalid key type.");
  597. }
  598. }
  599. UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
  600. }
  601. } // namespace NPython