loads.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #include "loads.h"
  2. #include <Python.h>
  3. #include <library/cpp/json/fast_sax/parser.h>
  4. #include <util/generic/algorithm.h>
  5. #include <util/generic/stack.h>
  6. #include <util/generic/vector.h>
  7. #include <util/generic/ylimits.h>
  8. #include <util/string/ascii.h>
  9. using namespace NJson;
  10. namespace {
  11. enum EKind {
  12. Undefined,
  13. Array,
  14. Dict,
  15. Value,
  16. Key,
  17. };
  18. static inline TStringBuf ToStr(EKind kind) noexcept {
  19. switch (kind) {
  20. case Undefined:
  21. return TStringBuf("Undefined");
  22. case Array:
  23. return TStringBuf("Array");
  24. case Dict:
  25. return TStringBuf("Dict");
  26. case Value:
  27. return TStringBuf("Value");
  28. case Key:
  29. return TStringBuf("Key");
  30. }
  31. Y_UNREACHABLE();
  32. }
  33. struct TUnref {
  34. static inline void Destroy(PyObject* o) noexcept {
  35. Py_XDECREF(o);
  36. }
  37. };
  38. using TObjectPtr = TAutoPtr<PyObject, TUnref>;
  39. static inline TObjectPtr BuildBool(bool val) noexcept {
  40. if (val) {
  41. Py_RETURN_TRUE;
  42. }
  43. Py_RETURN_FALSE;
  44. }
  45. // Translate python exceptions from object-creating functions into c++ exceptions
  46. // Such errors are reported by returning nullptr
  47. // When a python error is set and C++ exception is caught by Cython wrapper,
  48. // Python exception is propagated, while C++ exception is discarded.
  49. PyObject* CheckNewObject(PyObject* obj) {
  50. Y_ENSURE(obj != nullptr, "got python exception");
  51. return obj;
  52. }
  53. void CheckRetcode(int retcode) {
  54. Y_ENSURE(retcode == 0, "got python exception");
  55. }
  56. static inline TObjectPtr BuildSmall(long val) {
  57. #if PY_VERSION_HEX >= 0x03000000
  58. return CheckNewObject(PyLong_FromLong(val));
  59. #else
  60. return CheckNewObject(PyInt_FromLong(val));
  61. #endif
  62. }
  63. PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) {
  64. #if PY_VERSION_HEX >= 0x03000000
  65. Y_UNUSED(mayUnicode);
  66. PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size());
  67. if (intern) {
  68. PyUnicode_InternInPlace(&pyStr);
  69. }
  70. #else
  71. const bool needUnicode = mayUnicode && !AllOf(str, IsAscii);
  72. PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size())
  73. : PyString_FromStringAndSize(str.data(), str.size());
  74. if (intern && !needUnicode) {
  75. PyString_InternInPlace(&pyStr);
  76. }
  77. #endif
  78. return pyStr;
  79. }
  80. struct TVal {
  81. EKind Kind = Undefined;
  82. TObjectPtr Val;
  83. inline TVal() noexcept
  84. : Kind(Undefined)
  85. {
  86. }
  87. inline TVal(EKind kind, TObjectPtr val) noexcept
  88. : Kind(kind)
  89. , Val(val)
  90. {
  91. }
  92. };
  93. static inline TObjectPtr NoneRef() noexcept {
  94. Py_RETURN_NONE;
  95. }
  96. struct TContext: public TJsonCallbacks {
  97. const bool InternKeys;
  98. const bool InternVals;
  99. const bool MayUnicode;
  100. TStack<TVal, TVector<TVal>> S;
  101. inline TContext(bool internKeys, bool internVals, bool mayUnicode)
  102. : TJsonCallbacks(true)
  103. , InternKeys(internKeys)
  104. , InternVals(internVals)
  105. , MayUnicode(mayUnicode)
  106. {
  107. S.emplace();
  108. }
  109. inline bool Consume(TObjectPtr o) {
  110. auto& t = S.top();
  111. if (t.Kind == Array) {
  112. CheckRetcode(PyList_Append(t.Val.Get(), o.Get()));
  113. } else if (t.Kind == Key) {
  114. auto key = S.top().Val;
  115. S.pop();
  116. CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get()));
  117. } else {
  118. t = TVal(Value, o);
  119. }
  120. return true;
  121. }
  122. inline TObjectPtr Pop(EKind expect) {
  123. auto res = S.top();
  124. S.pop();
  125. if (res.Kind != expect) {
  126. ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")";
  127. }
  128. return res.Val;
  129. }
  130. inline void Push(EKind kind, TObjectPtr object) {
  131. S.push(TVal(kind, object));
  132. }
  133. virtual bool OnNull() {
  134. return Consume(NoneRef());
  135. }
  136. virtual bool OnBoolean(bool v) {
  137. return Consume(BuildBool(v));
  138. }
  139. virtual bool OnInteger(long long v) {
  140. if (v >= (long long)Min<long>()) {
  141. return Consume(BuildSmall((long)v));
  142. }
  143. return Consume(CheckNewObject(PyLong_FromLongLong(v)));
  144. }
  145. virtual bool OnUInteger(unsigned long long v) {
  146. if (v <= (unsigned long long)Max<long>()) {
  147. return Consume(BuildSmall((long)v));
  148. }
  149. return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v)));
  150. }
  151. virtual bool OnDouble(double v) {
  152. return Consume(CheckNewObject(PyFloat_FromDouble(v)));
  153. }
  154. virtual bool OnString(const TStringBuf& v) {
  155. return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode)));
  156. }
  157. virtual bool OnOpenMap() {
  158. Push(Dict, CheckNewObject(PyDict_New()));
  159. return true;
  160. }
  161. virtual bool OnCloseMap() {
  162. return Consume(Pop(Dict));
  163. }
  164. virtual bool OnMapKey(const TStringBuf& k) {
  165. Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode)));
  166. return true;
  167. }
  168. virtual bool OnOpenArray() {
  169. Push(Array, CheckNewObject(PyList_New(0)));
  170. return true;
  171. }
  172. virtual bool OnCloseArray() {
  173. return Consume(Pop(Array));
  174. }
  175. };
  176. }
  177. PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) {
  178. TContext ctx(internKeys, internVals, mayUnicode);
  179. if (!len) {
  180. ythrow yexception() << "parse error: zero length input string";
  181. }
  182. if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) {
  183. ythrow yexception() << "parse error";
  184. }
  185. auto& s = ctx.S;
  186. if (!s || s.top().Kind != Value) {
  187. ythrow yexception() << "shit happen";
  188. }
  189. return s.top().Val.Release();
  190. }