py_cast.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. #include "py_cast.h"
  2. #include "py_ptr.h"
  3. #include "py_errors.h"
  4. #include "py_callable.h"
  5. #include "py_dict.h"
  6. #include "py_list.h"
  7. #include "py_gil.h"
  8. #include "py_utils.h"
  9. #include "py_void.h"
  10. #include "py_resource.h"
  11. #include "py_stream.h"
  12. #include "py_struct.h"
  13. #include "py_tuple.h"
  14. #include "py_variant.h"
  15. #include "py_decimal.h"
  16. #include <yql/essentials/public/udf/udf_value_builder.h>
  17. #include <yql/essentials/public/udf/udf_type_inspection.h>
  18. #include <yql/essentials/public/udf/udf_type_printer.h>
  19. #include <yql/essentials/public/udf/udf_terminator.h>
  20. #include <yql/essentials/utils/utf8.h>
  21. #include <library/cpp/containers/stack_vector/stack_vec.h>
  22. #include <util/string/join.h>
  23. #include <util/string/builder.h>
  24. #ifdef HAVE_LONG_LONG
  25. # define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask
  26. # define YQL_PyLong_Asi64 PyLong_AsLongLong
  27. # define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong
  28. #else
  29. # define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask
  30. # define YQL_PyLong_Asi64 PyLong_AsLong
  31. # define YQL_PyLong_Asui64 PyLong_AsUnsignedLong
  32. #endif
  33. #define TO_PYTHON(Format, Type) \
  34. template <> \
  35. ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \
  36. return Py_BuildValue(Format, value); \
  37. }
  38. #define TO_PYTHON_BYTES(Type) \
  39. template <> \
  40. ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \
  41. TStringBuf value = val; \
  42. if (value.data() == nullptr) \
  43. Py_RETURN_NONE; \
  44. const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
  45. return PyBytes_FromStringAndSize(value.data(), size); \
  46. }
  47. #define TO_PYTHON_UNICODE(Type) \
  48. template <> \
  49. ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \
  50. TStringBuf value = val; \
  51. if (value.data() == nullptr) \
  52. Py_RETURN_NONE; \
  53. Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
  54. return PyUnicode_FromStringAndSize(value.data(), size); \
  55. }
  56. #define PY_ENSURE_TYPE(Type, Value, Message) \
  57. do { \
  58. if (!Py##Type##_Check(Value)) { \
  59. throw yexception() << Message << " " #Type "; Object repr: " \
  60. << PyObjectRepr(Value); \
  61. } \
  62. } while (0)
  63. #define FROM_PYTHON_FLOAT(Type) \
  64. template <> \
  65. Type PyCast<Type>(PyObject* value) { \
  66. double result = PyFloat_AsDouble(value); \
  67. if (result == -1.0 && PyErr_Occurred()) { \
  68. PyErr_Clear(); \
  69. ThrowCastException(value, "Float"); \
  70. } \
  71. return static_cast<Type>(result); \
  72. }
  73. #define FROM_PYTHON_LONG(Type, BigType) \
  74. template <> \
  75. Type PyCast<Type>(PyObject* value) { \
  76. if (PyLong_Check(value)) { \
  77. auto result = YQL_PyLong_As##BigType(value); \
  78. if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
  79. PyErr_Clear(); \
  80. ThrowCastException(value, "Long"); \
  81. } \
  82. if (result < Min<Type>() || result > Max<Type>()) { \
  83. throw yexception() << "Python object " << PyObjectRepr(value) \
  84. << " is out of range for " << #Type; \
  85. } \
  86. return static_cast<Type>(result); \
  87. } \
  88. ThrowCastTypeException(value, "Long"); \
  89. }
  90. #define FROM_PYTHON_INT_OR_LONG(Type, BigType) \
  91. template <> \
  92. Type PyCast<Type>(PyObject* value) { \
  93. if (PyInt_Check(value)) { \
  94. long result = PyInt_AsLong(value); \
  95. if (result == -1L && PyErr_Occurred()) { \
  96. PyErr_Clear(); \
  97. ThrowCastException(value, "Long"); \
  98. } \
  99. if ( \
  100. static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \
  101. static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \
  102. ) { \
  103. throw yexception() << "Python object " << PyObjectRepr(value) \
  104. << " is out of range for " << #Type; \
  105. } \
  106. return static_cast<Type>(result); \
  107. } else if (PyLong_Check(value)) { \
  108. auto result = YQL_PyLong_As##BigType(value); \
  109. if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
  110. PyErr_Clear(); \
  111. ThrowCastException(value, "Long"); \
  112. } \
  113. if (result < Min<Type>() || result > Max<Type>()) { \
  114. throw yexception() << "Python object " << PyObjectRepr(value) \
  115. << " is out of range for " << #Type; \
  116. } \
  117. return static_cast<Type>(result); \
  118. } \
  119. ThrowCastTypeException(value, "Long"); \
  120. }
  121. #define FROM_PYTHON_BYTES_OR_UTF(Type) \
  122. template <> \
  123. Type PyCast<Type>(PyObject* value) { \
  124. if (PyUnicode_Check(value)) { \
  125. Py_ssize_t size = 0U; \
  126. const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
  127. if (!str || size < 0) { \
  128. ThrowCastTypeException(value, "String"); \
  129. } \
  130. return Type(str, size_t(size)); \
  131. } else if (PyBytes_Check(value)) { \
  132. Py_ssize_t size = 0U; \
  133. char *str = nullptr; \
  134. const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
  135. if (rc == -1 || size < 0) { \
  136. ThrowCastTypeException(value, "String"); \
  137. } \
  138. return Type(str, size_t(size)); \
  139. } \
  140. ThrowCastTypeException(value, "String"); \
  141. }
  142. #define FROM_PYTHON_BYTES(Type) \
  143. template <> \
  144. Type PyCast<Type>(PyObject* value) { \
  145. PY_ENSURE_TYPE(Bytes, value, "Expected"); \
  146. char* str = nullptr; \
  147. Py_ssize_t size = 0; \
  148. const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
  149. if (rc == -1 || size < 0) { \
  150. ThrowCastTypeException(value, "String"); \
  151. } \
  152. return Type(str, size_t(size)); \
  153. }
  154. #define TRY_FROM_PYTHON_FLOAT(Type) \
  155. template <> \
  156. bool TryPyCast<Type>(PyObject* value, Type& result) { \
  157. double v = PyFloat_AsDouble(value); \
  158. if (v == -1.0 && PyErr_Occurred()) { \
  159. PyErr_Clear(); \
  160. return false; \
  161. } \
  162. result = static_cast<Type>(v); \
  163. return true; \
  164. }
  165. #define TRY_FROM_PYTHON_LONG(Type, BigType) \
  166. template <> \
  167. bool TryPyCast<Type>(PyObject* value, Type& res) { \
  168. if (PyLong_Check(value)) { \
  169. auto result = YQL_PyLong_As##BigType(value); \
  170. if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
  171. PyErr_Clear(); \
  172. return false; \
  173. } \
  174. if (result < Min<Type>() || result > Max<Type>()) { \
  175. return false; \
  176. } \
  177. res = static_cast<Type>(result); \
  178. return true; \
  179. } \
  180. return false; \
  181. }
  182. #define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \
  183. template <> \
  184. bool TryPyCast<Type>(PyObject* value, Type& res) { \
  185. if (PyInt_Check(value)) { \
  186. long result = PyInt_AsLong(value); \
  187. if (result == -1L && PyErr_Occurred()) { \
  188. PyErr_Clear(); \
  189. return false; \
  190. } \
  191. res = static_cast<Type>(result); \
  192. if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \
  193. return false; \
  194. } \
  195. return true; \
  196. } else if (PyLong_Check(value)) { \
  197. auto result = YQL_PyLong_As##BigType(value); \
  198. if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
  199. PyErr_Clear(); \
  200. return false; \
  201. } \
  202. if (result < Min<Type>() || result > Max<Type>()) { \
  203. return false; \
  204. } \
  205. res = static_cast<Type>(result); \
  206. return true; \
  207. } \
  208. return false; \
  209. }
  210. #define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \
  211. template <> \
  212. bool TryPyCast(PyObject* value, Type& result) { \
  213. if (PyUnicode_Check(value)) { \
  214. Py_ssize_t size = 0U; \
  215. const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
  216. if (!str || size < 0) { \
  217. return false; \
  218. } \
  219. result = Type(str, size_t(size)); \
  220. return true; \
  221. } else if (PyBytes_Check(value)) { \
  222. Py_ssize_t size = 0U; \
  223. char *str = nullptr; \
  224. const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
  225. if (rc == -1 || size < 0) { \
  226. ThrowCastTypeException(value, "String"); \
  227. } \
  228. result = Type(str, size_t(size)); \
  229. return true; \
  230. } \
  231. return false; \
  232. }
  233. #define TRY_FROM_PYTHON_STR_OR_UTF(Type) \
  234. template <> \
  235. bool TryPyCast(PyObject* value, Type& result) { \
  236. if (PyUnicode_Check(value)) { \
  237. const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \
  238. char* str = nullptr; \
  239. Py_ssize_t size = 0; \
  240. int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \
  241. if (rc == -1 || size < 0) { \
  242. return false; \
  243. } \
  244. result = Type(str, size_t(size)); \
  245. return true; \
  246. } else if (PyBytes_Check(value)) { \
  247. char* str = nullptr; \
  248. Py_ssize_t size = 0; \
  249. int rc = PyBytes_AsStringAndSize(value, &str, &size); \
  250. if (rc == -1 || size < 0) { \
  251. return false; \
  252. } \
  253. result = Type(str, size_t(size)); \
  254. return true; \
  255. } else { \
  256. return false; \
  257. } \
  258. }
  259. namespace NPython {
  260. using namespace NKikimr;
  261. inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) {
  262. throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType
  263. << "; Object repr: " << PyObjectRepr(value);
  264. }
  265. inline void ThrowCastException(PyObject* value, TStringBuf toType) {
  266. throw yexception() << "Cast error object " << PyObjectRepr(value) << " to " << toType << ": "
  267. << GetLastErrorAsString();
  268. }
  269. template <>
  270. bool TryPyCast<bool>(PyObject* value, bool& result)
  271. {
  272. int isTrue = PyObject_IsTrue(value);
  273. if (isTrue == -1) {
  274. return false;
  275. }
  276. result = (isTrue == 1);
  277. return true;
  278. }
  279. #if PY_MAJOR_VERSION >= 3
  280. TRY_FROM_PYTHON_LONG(i8, i64)
  281. TRY_FROM_PYTHON_LONG(ui8, ui64)
  282. TRY_FROM_PYTHON_LONG(i16, i64)
  283. TRY_FROM_PYTHON_LONG(ui16, ui64)
  284. TRY_FROM_PYTHON_LONG(i32, i64)
  285. TRY_FROM_PYTHON_LONG(ui32, ui64)
  286. TRY_FROM_PYTHON_LONG(i64, i64)
  287. TRY_FROM_PYTHON_LONG(ui64, ui64)
  288. TRY_FROM_PYTHON_BYTES_OR_UTF(TString)
  289. TRY_FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
  290. #else
  291. TRY_FROM_PYTHON_INT_OR_LONG(i8, i64)
  292. TRY_FROM_PYTHON_INT_OR_LONG(ui8, ui64)
  293. TRY_FROM_PYTHON_INT_OR_LONG(i16, i64)
  294. TRY_FROM_PYTHON_INT_OR_LONG(ui16, ui64)
  295. TRY_FROM_PYTHON_INT_OR_LONG(i32, i64)
  296. TRY_FROM_PYTHON_INT_OR_LONG(ui32, ui64)
  297. TRY_FROM_PYTHON_INT_OR_LONG(i64, i64)
  298. TRY_FROM_PYTHON_INT_OR_LONG(ui64, ui64)
  299. TRY_FROM_PYTHON_STR_OR_UTF(TString)
  300. TRY_FROM_PYTHON_STR_OR_UTF(NUdf::TStringRef)
  301. #endif
  302. TRY_FROM_PYTHON_FLOAT(float)
  303. TRY_FROM_PYTHON_FLOAT(double)
  304. template <>
  305. bool PyCast<bool>(PyObject* value)
  306. {
  307. int res = PyObject_IsTrue(value);
  308. if (res == -1) {
  309. throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to bool. "
  310. << GetLastErrorAsString();
  311. }
  312. return res == 1;
  313. }
  314. #if PY_MAJOR_VERSION >= 3
  315. FROM_PYTHON_LONG(i8, i64)
  316. FROM_PYTHON_LONG(ui8, ui64)
  317. FROM_PYTHON_LONG(i16, i64)
  318. FROM_PYTHON_LONG(ui16, ui64)
  319. FROM_PYTHON_LONG(i32, i64)
  320. FROM_PYTHON_LONG(ui32, ui64)
  321. FROM_PYTHON_LONG(i64, i64)
  322. FROM_PYTHON_LONG(ui64, ui64)
  323. FROM_PYTHON_BYTES_OR_UTF(TString)
  324. FROM_PYTHON_BYTES_OR_UTF(TStringBuf)
  325. FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
  326. #else
  327. FROM_PYTHON_INT_OR_LONG(i8, i64)
  328. FROM_PYTHON_INT_OR_LONG(ui8, ui64)
  329. FROM_PYTHON_INT_OR_LONG(i16, i64)
  330. FROM_PYTHON_INT_OR_LONG(ui16, ui64)
  331. FROM_PYTHON_INT_OR_LONG(i32, i64)
  332. FROM_PYTHON_INT_OR_LONG(ui32, ui64)
  333. FROM_PYTHON_INT_OR_LONG(i64, i64)
  334. FROM_PYTHON_INT_OR_LONG(ui64, ui64)
  335. FROM_PYTHON_BYTES(TString)
  336. FROM_PYTHON_BYTES(TStringBuf)
  337. FROM_PYTHON_BYTES(NUdf::TStringRef)
  338. #endif
  339. FROM_PYTHON_FLOAT(float)
  340. FROM_PYTHON_FLOAT(double)
  341. template <>
  342. TPyObjectPtr PyCast<bool>(bool value)
  343. {
  344. PyObject* res = value ? Py_True : Py_False;
  345. return TPyObjectPtr(res, TPyObjectPtr::ADD_REF);
  346. }
  347. TO_PYTHON("b", i8)
  348. TO_PYTHON("B", ui8)
  349. TO_PYTHON("h", i16)
  350. TO_PYTHON("H", ui16)
  351. TO_PYTHON("i", i32)
  352. TO_PYTHON("I", ui32)
  353. #ifdef HAVE_LONG_LONG
  354. TO_PYTHON("L", i64)
  355. TO_PYTHON("K", ui64)
  356. #else
  357. TO_PYTHON("l", i64)
  358. TO_PYTHON("k", ui64)
  359. #endif
  360. TO_PYTHON_BYTES(TString)
  361. TO_PYTHON_BYTES(TStringBuf)
  362. TO_PYTHON_BYTES(NUdf::TStringRef)
  363. TO_PYTHON_UNICODE(TString)
  364. TO_PYTHON_UNICODE(TStringBuf)
  365. TO_PYTHON_UNICODE(NUdf::TStringRef)
  366. template <typename T>
  367. NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) {
  368. PY_ENSURE(PyTuple_Check(value),
  369. "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name);
  370. Py_ssize_t tupleSize = PyTuple_GET_SIZE(value);
  371. PY_ENSURE(tupleSize == 2,
  372. "Expected to get Tuple with 2 elements, but got "
  373. << tupleSize << " elements");
  374. PyObject* el0 = PyTuple_GET_ITEM(value, 0);
  375. PyObject* el1 = PyTuple_GET_ITEM(value, 1);
  376. auto num = PyCast<T>(el0);
  377. if (num >= limit) {
  378. throw yexception() << "Python object " << PyObjectRepr(el0) \
  379. << " is out of range for " << typeName;
  380. }
  381. auto name = PyCast<NUdf::TStringRef>(el1);
  382. auto ret = NUdf::TUnboxedValuePod(num);
  383. ui32 tzId;
  384. if (!ctx->ValueBuilder->GetDateBuilder().FindTimezoneId(name, tzId)) {
  385. throw yexception() << "Unknown timezone: " << TStringBuf(name);
  386. }
  387. ret.SetTimezoneId(tzId);
  388. return ret;
  389. }
  390. TO_PYTHON("f", float)
  391. TO_PYTHON("d", double)
  392. namespace {
  393. TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx,
  394. const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
  395. {
  396. const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  397. const auto typeId = inspector.GetTypeId();
  398. switch (typeId) {
  399. case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>());
  400. case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>());
  401. case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>());
  402. case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>());
  403. case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>());
  404. case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>());
  405. case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>());
  406. case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>());
  407. case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>());
  408. case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>());
  409. case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>());
  410. case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
  411. case NUdf::TDataType<const char*>::Id: {
  412. if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) {
  413. return PyCast<NUdf::TStringRef>(value.AsStringRef());
  414. } else {
  415. auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
  416. if (!pyObj) {
  417. UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos <<
  418. "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" <<
  419. GetLastErrorAsString()).data()
  420. );
  421. }
  422. return pyObj;
  423. }
  424. }
  425. case NUdf::TDataType<NUdf::TYson>::Id: {
  426. auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef());
  427. if (ctx->YsonConverterIn) {
  428. TPyObjectPtr pyArgs(PyTuple_New(1));
  429. PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release());
  430. pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get());
  431. if (!pyObj) {
  432. UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
  433. }
  434. }
  435. return pyObj;
  436. }
  437. case NUdf::TDataType<NUdf::TUuid>::Id:
  438. return PyCast<NUdf::TStringRef>(value.AsStringRef());
  439. case NUdf::TDataType<NUdf::TJson>::Id:
  440. case NUdf::TDataType<NUdf::TUtf8>::Id:
  441. return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
  442. case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>());
  443. case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>());
  444. case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>());
  445. case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>());
  446. case NUdf::TDataType<NUdf::TTzDate>::Id: {
  447. TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>());
  448. auto tzId = value.GetTimezoneId();
  449. auto tzName = ctx->GetTimezoneName(tzId);
  450. return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
  451. }
  452. case NUdf::TDataType<NUdf::TTzDatetime>::Id: {
  453. TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>());
  454. auto tzId = value.GetTimezoneId();
  455. auto tzName = ctx->GetTimezoneName(tzId);
  456. return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
  457. }
  458. case NUdf::TDataType<NUdf::TTzTimestamp>::Id: {
  459. TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>());
  460. auto tzId = value.GetTimezoneId();
  461. auto tzName = ctx->GetTimezoneName(tzId);
  462. return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
  463. }
  464. }
  465. throw yexception()
  466. << "Unsupported type " << typeId;
  467. }
  468. NUdf::TUnboxedValue FromPyData(
  469. const TPyCastContext::TPtr& ctx,
  470. const NUdf::TType* type, PyObject* value)
  471. {
  472. const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  473. const auto typeId = inspector.GetTypeId();
  474. switch (typeId) {
  475. case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value));
  476. case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value));
  477. case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value));
  478. case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value));
  479. case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value));
  480. case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value));
  481. case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value));
  482. case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value));
  483. case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value));
  484. case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value));
  485. case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value));
  486. case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
  487. case NUdf::TDataType<NUdf::TYson>::Id: {
  488. if (ctx->YsonConverterOut) {
  489. TPyObjectPtr input(value, TPyObjectPtr::ADD_REF);
  490. TPyObjectPtr pyArgs(PyTuple_New(1));
  491. // PyTuple_SET_ITEM steals reference, so pass ownership to it
  492. PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release());
  493. input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get()));
  494. if (!input) {
  495. UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
  496. }
  497. return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get()));
  498. }
  499. }
  500. #if PY_MAJOR_VERSION >= 3
  501. case NUdf::TDataType<const char*>::Id:
  502. return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
  503. case NUdf::TDataType<NUdf::TUtf8>::Id:
  504. case NUdf::TDataType<NUdf::TJson>::Id:
  505. if (PyUnicode_Check(value)) {
  506. const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value));
  507. return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get()));
  508. }
  509. throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode";
  510. #else
  511. case NUdf::TDataType<const char*>::Id:
  512. case NUdf::TDataType<NUdf::TJson>::Id:
  513. case NUdf::TDataType<NUdf::TUtf8>::Id: {
  514. if (PyUnicode_Check(value)) {
  515. const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value));
  516. return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get()));
  517. }
  518. if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) &&
  519. PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) {
  520. throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json";
  521. }
  522. return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
  523. }
  524. #endif
  525. case NUdf::TDataType<NUdf::TUuid>::Id: {
  526. const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
  527. if (ret.AsStringRef().Size() != 16) {
  528. throw yexception() << "Python object " << PyObjectRepr(value) \
  529. << " has invalid value for Uuid";
  530. }
  531. return ret;
  532. }
  533. case NUdf::TDataType<NUdf::TDate>::Id: {
  534. auto num = PyCast<ui16>(value);
  535. if (num >= NUdf::MAX_DATE) {
  536. throw yexception() << "Python object " << PyObjectRepr(value) \
  537. << " is out of range for Date";
  538. }
  539. return NUdf::TUnboxedValuePod(num);
  540. }
  541. case NUdf::TDataType<NUdf::TDatetime>::Id: {
  542. auto num = PyCast<ui32>(value);
  543. if (num >= NUdf::MAX_DATETIME) {
  544. throw yexception() << "Python object " << PyObjectRepr(value) \
  545. << " is out of range for Datetime";
  546. }
  547. return NUdf::TUnboxedValuePod(num);
  548. }
  549. case NUdf::TDataType<NUdf::TTimestamp>::Id: {
  550. auto num = PyCast<ui64>(value);
  551. if (num >= NUdf::MAX_TIMESTAMP) {
  552. throw yexception() << "Python object " << PyObjectRepr(value) \
  553. << " is out of range for Timestamp";
  554. }
  555. return NUdf::TUnboxedValuePod(num);
  556. }
  557. case NUdf::TDataType<NUdf::TInterval>::Id: {
  558. auto num = PyCast<i64>(value);
  559. if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) {
  560. throw yexception() << "Python object " << PyObjectRepr(value) \
  561. << " is out of range for Interval";
  562. }
  563. return NUdf::TUnboxedValuePod(num);
  564. }
  565. case NUdf::TDataType<NUdf::TTzDate>::Id:
  566. return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx);
  567. case NUdf::TDataType<NUdf::TTzDatetime>::Id:
  568. return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx);
  569. case NUdf::TDataType<NUdf::TTzTimestamp>::Id:
  570. return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx);
  571. }
  572. throw yexception()
  573. << "Unsupported type " << typeId;
  574. }
  575. TPyObjectPtr ToPyList(
  576. const TPyCastContext::TPtr& ctx,
  577. const NUdf::TType* type,
  578. const NUdf::TUnboxedValuePod& value)
  579. {
  580. const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  581. const auto itemType = inspector.GetItemType();
  582. if (ctx->LazyInputObjects) {
  583. return ToPyLazyList(ctx, itemType, value);
  584. }
  585. TPyObjectPtr list(PyList_New(0));
  586. const auto iterator = value.GetListIterator();
  587. for (NUdf::TUnboxedValue item; iterator.Next(item);) {
  588. auto pyItem = ToPyObject(ctx, itemType, item);
  589. if (PyList_Append(list.Get(), pyItem.Get()) < 0) {
  590. throw yexception() << "Can't append item to list"
  591. << GetLastErrorAsString();
  592. }
  593. }
  594. return list;
  595. }
  596. NUdf::TUnboxedValue FromPyList(
  597. const TPyCastContext::TPtr& ctx,
  598. const NUdf::TType* type, PyObject* value)
  599. {
  600. const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  601. if (PyList_Check(value)) {
  602. // eager list to list conversion
  603. auto itemType = inspector.GetItemType();
  604. Py_ssize_t cnt = PyList_GET_SIZE(value);
  605. NUdf::TUnboxedValue *items = nullptr;
  606. const auto list = ctx->ValueBuilder->NewArray(cnt, items);
  607. for (Py_ssize_t i = 0; i < cnt; ++i) {
  608. PyObject *item = PyList_GET_ITEM(value, i);
  609. *items++ = FromPyObject(ctx, itemType, item);
  610. }
  611. return list;
  612. }
  613. if (PyTuple_Check(value)) {
  614. // eager tuple to list conversion
  615. auto itemType = inspector.GetItemType();
  616. Py_ssize_t cnt = PyTuple_GET_SIZE(value);
  617. NUdf::TUnboxedValue *items = nullptr;
  618. const auto list = ctx->ValueBuilder->NewArray(cnt, items);
  619. for (Py_ssize_t i = 0; i < cnt; ++i) {
  620. PyObject *item = PyTuple_GET_ITEM(value, i);
  621. *items++ = FromPyObject(ctx, itemType, item);
  622. }
  623. return list;
  624. }
  625. if (PyGen_Check(value)) {
  626. TPyObjectPtr valuePtr(PyObject_GetIter(value));
  627. return FromPyLazyIterator(ctx, type, std::move(valuePtr));
  628. }
  629. if (PyIter_Check(value)
  630. #if PY_MAJOR_VERSION < 3
  631. // python 2 iterators must also implement "next" method
  632. && 1 == PyObject_HasAttrString(value, "next")
  633. #endif
  634. ) {
  635. TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
  636. return FromPyLazyIterator(ctx, type, std::move(valuePtr));
  637. }
  638. // assume that this function will returns generator
  639. if (PyCallable_Check(value)) {
  640. TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
  641. return FromPyLazyGenerator(ctx, type, std::move(valuePtr));
  642. }
  643. if (PySequence_Check(value) || PyObject_HasAttrString(value, "__iter__")) {
  644. TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
  645. return FromPyLazyIterable(ctx, type, std::move(valuePtr));
  646. }
  647. throw yexception() << "Expected list, tuple, generator, generator factory, "
  648. "iterator or iterable object, but got: " << PyObjectRepr(value);
  649. }
  650. TPyObjectPtr ToPyOptional(
  651. const TPyCastContext::TPtr& ctx,
  652. const NUdf::TType* type,
  653. const NUdf::TUnboxedValuePod& value)
  654. {
  655. if (!value) {
  656. return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF);
  657. }
  658. const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  659. return ToPyObject(ctx, inspector.GetItemType(), value);
  660. }
  661. NUdf::TUnboxedValue FromPyOptional(
  662. const TPyCastContext::TPtr& ctx,
  663. const NUdf::TType* type, PyObject* value)
  664. {
  665. if (value == Py_None) {
  666. return NUdf::TUnboxedValue();
  667. }
  668. const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  669. return FromPyObject(ctx, inspector.GetItemType(), value).Release().MakeOptional();
  670. }
  671. TPyObjectPtr ToPyDict(
  672. const TPyCastContext::TPtr& ctx,
  673. const NUdf::TType* type,
  674. const NUdf::TUnboxedValuePod& value)
  675. {
  676. const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  677. const auto keyType = inspector.GetKeyType();
  678. const auto valueType = inspector.GetValueType();
  679. if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
  680. if (ctx->LazyInputObjects) { // TODO
  681. return ToPyLazySet(ctx, keyType, value);
  682. }
  683. const TPyObjectPtr set(PyFrozenSet_New(nullptr));
  684. const auto iterator = value.GetKeysIterator();
  685. for (NUdf::TUnboxedValue key; iterator.Next(key);) {
  686. auto pyKey = ToPyObject(ctx, keyType, key);
  687. if (PySet_Add(set.Get(), pyKey.Get()) < 0) {
  688. throw yexception() << "Can't add item to set" << GetLastErrorAsString();
  689. }
  690. }
  691. return set;
  692. } else {
  693. if (ctx->LazyInputObjects) {
  694. return ToPyLazyDict(ctx, keyType, valueType, value);
  695. }
  696. const TPyObjectPtr dict(PyDict_New());
  697. const auto iterator = value.GetDictIterator();
  698. for (NUdf::TUnboxedValue key, valueObj; iterator.NextPair(key, valueObj);) {
  699. auto pyKey = ToPyObject(ctx, keyType, key);
  700. auto pyValue = ToPyObject(ctx, valueType, valueObj);
  701. if (PyDict_SetItem(dict.Get(), pyKey.Get(), pyValue.Get()) < 0) {
  702. throw yexception() << "Can't add item to dict" << GetLastErrorAsString();
  703. }
  704. }
  705. return dict;
  706. }
  707. }
  708. NUdf::TUnboxedValue FromPyDict(
  709. const TPyCastContext::TPtr& ctx,
  710. const NUdf::TType* type, PyObject* value)
  711. {
  712. const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
  713. const auto keyType = inspector.GetKeyType();
  714. const auto valueType = inspector.GetValueType();
  715. if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType)
  716. && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) {
  717. const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType);
  718. if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) {
  719. return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value);
  720. }
  721. } else if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
  722. if (PyAnySet_Check(value)) {
  723. return FromPySet(ctx, keyType, value);
  724. } else if (value->ob_type->tp_as_sequence && value->ob_type->tp_as_sequence->sq_contains) {
  725. return FromPySequence(ctx, keyType, value);
  726. }
  727. } else if (PyDict_Check(value)) {
  728. return FromPyDict(ctx, keyType, valueType, value);
  729. } else if (PyMapping_Check(value)) {
  730. return FromPyMapping(ctx, keyType, valueType, value);
  731. }
  732. throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict.";
  733. }
  734. TPyObjectPtr ToPyNull(
  735. const TPyCastContext::TPtr& ctx,
  736. const NUdf::TType* type,
  737. const NUdf::TUnboxedValuePod& value)
  738. {
  739. if (!value.HasValue()) {
  740. return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF);
  741. }
  742. throw yexception() << "Value is not null";
  743. }
  744. NUdf::TUnboxedValue FromPyNull(
  745. const TPyCastContext::TPtr& ctx,
  746. const NUdf::TType* type, PyObject* value)
  747. {
  748. if (value == Py_None) {
  749. return NYql::NUdf::TUnboxedValuePod();
  750. }
  751. throw yexception() << "Can't cast " << PyObjectRepr(value) << " to null.";
  752. }
  753. } // namespace
  754. TPyObjectPtr ToPyObject(
  755. const TPyCastContext::TPtr& ctx,
  756. const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
  757. {
  758. switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
  759. case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value);
  760. case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value);
  761. case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value);
  762. case NUdf::ETypeKind::List: return ToPyList(ctx, type, value);
  763. case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value);
  764. case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value);
  765. case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value);
  766. case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value);
  767. case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value);
  768. case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value);
  769. case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value);
  770. case NUdf::ETypeKind::Null: return ToPyNull(ctx, type, value);
  771. default: {
  772. ::TStringBuilder sb;
  773. sb << "Failed to export: ";
  774. NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
  775. throw yexception() << sb;
  776. }
  777. }
  778. }
  779. NUdf::TUnboxedValue FromPyObject(
  780. const TPyCastContext::TPtr& ctx,
  781. const NUdf::TType* type, PyObject* value)
  782. {
  783. switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
  784. case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value);
  785. case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value);
  786. case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value);
  787. case NUdf::ETypeKind::List: return FromPyList(ctx, type, value);
  788. case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value);
  789. case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value);
  790. case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value);
  791. case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value);
  792. case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value);
  793. case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr);
  794. case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value);
  795. case NUdf::ETypeKind::Null: return FromPyNull(ctx, type, value);
  796. default: {
  797. ::TStringBuilder sb;
  798. sb << "Failed to import: ";
  799. NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
  800. throw yexception() << sb;
  801. }
  802. }
  803. }
  804. TPyObjectPtr ToPyArgs(
  805. const TPyCastContext::TPtr& ctx,
  806. const NUdf::TType* type,
  807. const NUdf::TUnboxedValuePod* args,
  808. const NUdf::TCallableTypeInspector& inspector)
  809. {
  810. const auto argsCount = inspector.GetArgsCount();
  811. TPyObjectPtr tuple(PyTuple_New(argsCount));
  812. for (ui32 i = 0; i < argsCount; i++) {
  813. auto arg = ToPyObject(ctx, inspector.GetArgType(i), args[i]);
  814. PyTuple_SET_ITEM(tuple.Get(), i, arg.Release());
  815. }
  816. return tuple;
  817. }
  818. void FromPyArgs(
  819. const TPyCastContext::TPtr& ctx,
  820. const NUdf::TType* type,
  821. PyObject* pyArgs,
  822. NUdf::TUnboxedValue* cArgs,
  823. const NUdf::TCallableTypeInspector& inspector)
  824. {
  825. PY_ENSURE_TYPE(Tuple, pyArgs, "Expected");
  826. const auto argsCount = inspector.GetArgsCount();
  827. const auto optArgsCount = inspector.GetOptionalArgsCount();
  828. ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs));
  829. PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount,
  830. "arguments count missmatch: "
  831. "min " << (argsCount - optArgsCount) << ", max " << argsCount
  832. << ", got " << pyArgsCount);
  833. for (ui32 i = 0; i < pyArgsCount; i++) {
  834. PyObject* item = PyTuple_GET_ITEM(pyArgs, i);
  835. cArgs[i] = FromPyObject(ctx, inspector.GetArgType(i), item);
  836. }
  837. for (ui32 i = pyArgsCount; i < argsCount; i++) {
  838. cArgs[i] = NUdf::TUnboxedValuePod();
  839. }
  840. }
  841. class TDummyMemoryLock : public IMemoryLock {
  842. public:
  843. void Acquire() override {}
  844. void Release() override {}
  845. };
  846. TPyCastContext::TPyCastContext(
  847. const NKikimr::NUdf::IValueBuilder* builder,
  848. TPyContext::TPtr pyCtx,
  849. THolder<IMemoryLock> memoryLock)
  850. : ValueBuilder(builder)
  851. , PyCtx(std::move(pyCtx))
  852. , MemoryLock(std::move(memoryLock))
  853. {
  854. if (!MemoryLock) {
  855. MemoryLock = MakeHolder<TDummyMemoryLock>();
  856. }
  857. }
  858. TPyCastContext::~TPyCastContext() {
  859. TPyGilLocker locker;
  860. StructTypes.clear();
  861. YsonConverterIn.Reset();
  862. YsonConverterOut.Reset();
  863. TimezoneNames.clear();
  864. }
  865. const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) {
  866. auto& x = TimezoneNames[id];
  867. if (!x) {
  868. NKikimr::NUdf::TStringRef ref;
  869. if (!ValueBuilder->GetDateBuilder().FindTimezoneName(id, ref)) {
  870. throw yexception() << "Unknown timezone id: " << id;
  871. }
  872. x = PyRepr(ref);
  873. }
  874. return x;
  875. }
  876. } // namspace NPython