123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- #include "loads.h"
- #include <Python.h>
- #include <library/cpp/json/fast_sax/parser.h>
- #include <util/generic/algorithm.h>
- #include <util/generic/stack.h>
- #include <util/generic/vector.h>
- #include <util/generic/ylimits.h>
- #include <util/string/ascii.h>
- using namespace NJson;
- namespace {
- enum EKind {
- Undefined,
- Array,
- Dict,
- Value,
- Key,
- };
- static inline TStringBuf ToStr(EKind kind) noexcept {
- switch (kind) {
- case Undefined:
- return TStringBuf("Undefined");
- case Array:
- return TStringBuf("Array");
- case Dict:
- return TStringBuf("Dict");
- case Value:
- return TStringBuf("Value");
- case Key:
- return TStringBuf("Key");
- }
- Y_UNREACHABLE();
- }
- struct TUnref {
- static inline void Destroy(PyObject* o) noexcept {
- Py_XDECREF(o);
- }
- };
- using TObjectPtr = TAutoPtr<PyObject, TUnref>;
- static inline TObjectPtr BuildBool(bool val) noexcept {
- if (val) {
- Py_RETURN_TRUE;
- }
- Py_RETURN_FALSE;
- }
- // Translate python exceptions from object-creating functions into c++ exceptions
- // Such errors are reported by returning nullptr
- // When a python error is set and C++ exception is caught by Cython wrapper,
- // Python exception is propagated, while C++ exception is discarded.
- PyObject* CheckNewObject(PyObject* obj) {
- Y_ENSURE(obj != nullptr, "got python exception");
- return obj;
- }
- void CheckRetcode(int retcode) {
- Y_ENSURE(retcode == 0, "got python exception");
- }
- static inline TObjectPtr BuildSmall(long val) {
- #if PY_VERSION_HEX >= 0x03000000
- return CheckNewObject(PyLong_FromLong(val));
- #else
- return CheckNewObject(PyInt_FromLong(val));
- #endif
- }
- PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) {
- #if PY_VERSION_HEX >= 0x03000000
- Y_UNUSED(mayUnicode);
- PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size());
- if (intern) {
- PyUnicode_InternInPlace(&pyStr);
- }
- #else
- const bool needUnicode = mayUnicode && !AllOf(str, IsAscii);
- PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size())
- : PyString_FromStringAndSize(str.data(), str.size());
- if (intern && !needUnicode) {
- PyString_InternInPlace(&pyStr);
- }
- #endif
- return pyStr;
- }
- struct TVal {
- EKind Kind = Undefined;
- TObjectPtr Val;
- inline TVal() noexcept
- : Kind(Undefined)
- {
- }
- inline TVal(EKind kind, TObjectPtr val) noexcept
- : Kind(kind)
- , Val(val)
- {
- }
- };
- static inline TObjectPtr NoneRef() noexcept {
- Py_RETURN_NONE;
- }
- struct TContext: public TJsonCallbacks {
- const bool InternKeys;
- const bool InternVals;
- const bool MayUnicode;
- TStack<TVal, TVector<TVal>> S;
- inline TContext(bool internKeys, bool internVals, bool mayUnicode)
- : TJsonCallbacks(true)
- , InternKeys(internKeys)
- , InternVals(internVals)
- , MayUnicode(mayUnicode)
- {
- S.emplace();
- }
- inline bool Consume(TObjectPtr o) {
- auto& t = S.top();
- if (t.Kind == Array) {
- CheckRetcode(PyList_Append(t.Val.Get(), o.Get()));
- } else if (t.Kind == Key) {
- auto key = S.top().Val;
- S.pop();
- CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get()));
- } else {
- t = TVal(Value, o);
- }
- return true;
- }
- inline TObjectPtr Pop(EKind expect) {
- auto res = S.top();
- S.pop();
- if (res.Kind != expect) {
- ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")";
- }
- return res.Val;
- }
- inline void Push(EKind kind, TObjectPtr object) {
- S.push(TVal(kind, object));
- }
- virtual bool OnNull() {
- return Consume(NoneRef());
- }
- virtual bool OnBoolean(bool v) {
- return Consume(BuildBool(v));
- }
- virtual bool OnInteger(long long v) {
- if (v >= (long long)Min<long>()) {
- return Consume(BuildSmall((long)v));
- }
- return Consume(CheckNewObject(PyLong_FromLongLong(v)));
- }
- virtual bool OnUInteger(unsigned long long v) {
- if (v <= (unsigned long long)Max<long>()) {
- return Consume(BuildSmall((long)v));
- }
- return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v)));
- }
- virtual bool OnDouble(double v) {
- return Consume(CheckNewObject(PyFloat_FromDouble(v)));
- }
- virtual bool OnString(const TStringBuf& v) {
- return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode)));
- }
- virtual bool OnOpenMap() {
- Push(Dict, CheckNewObject(PyDict_New()));
- return true;
- }
- virtual bool OnCloseMap() {
- return Consume(Pop(Dict));
- }
- virtual bool OnMapKey(const TStringBuf& k) {
- Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode)));
- return true;
- }
- virtual bool OnOpenArray() {
- Push(Array, CheckNewObject(PyList_New(0)));
- return true;
- }
- virtual bool OnCloseArray() {
- return Consume(Pop(Array));
- }
- };
- }
- PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) {
- TContext ctx(internKeys, internVals, mayUnicode);
- if (!len) {
- ythrow yexception() << "parse error: zero length input string";
- }
- if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) {
- ythrow yexception() << "parse error";
- }
- auto& s = ctx.S;
- if (!s || s.top().Kind != Value) {
- ythrow yexception() << "shit happen";
- }
- return s.top().Val.Release();
- }
|