Browse Source

library/yson: Add NodeFromJsonStringIterative

AI для
ionagamed 1 year ago
parent
commit
bbdd76a966

+ 11 - 1
library/cpp/yson/json/yson2json_adapter.cpp

@@ -1,9 +1,13 @@
 #include "yson2json_adapter.h"
 
 namespace NYT {
-    TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException)
+    TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(
+        ::NYson::TYsonConsumerBase* impl,
+        bool throwException,
+        ui64 maxDepth)
         : NJson::TJsonCallbacks(throwException)
         , Impl_(impl)
+        , MaxDepth_(maxDepth)
     {
     }
 
@@ -46,6 +50,9 @@ namespace NYT {
     bool TYson2JsonCallbacksAdapter::OnOpenArray() {
         WrapIfListItem();
         State_.ContextStack.push(true);
+        if (State_.ContextStack.size() > MaxDepth_) {
+            return false;
+        }
         Impl_->OnBeginList();
         return true;
     }
@@ -59,6 +66,9 @@ namespace NYT {
     bool TYson2JsonCallbacksAdapter::OnOpenMap() {
         WrapIfListItem();
         State_.ContextStack.push(false);
+        if (State_.ContextStack.size() > MaxDepth_) {
+            return false;
+        }
         Impl_->OnBeginMap();
         return true;
     }

+ 5 - 1
library/cpp/yson/json/yson2json_adapter.h

@@ -21,7 +21,10 @@ namespace NYT {
         };
 
     public:
-        TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException = false);
+        TYson2JsonCallbacksAdapter(
+            ::NYson::TYsonConsumerBase* impl,
+            bool throwException = false,
+            ui64 maxDepth = std::numeric_limits<ui64>::max());
 
         bool OnNull() override;
         bool OnBoolean(bool val) override;
@@ -49,5 +52,6 @@ namespace NYT {
     private:
         ::NYson::TYsonConsumerBase* Impl_;
         TState State_;
+        ui64 MaxDepth_;
     };
 }

+ 16 - 0
library/cpp/yson/node/node_io.cpp

@@ -151,6 +151,22 @@ TNode NodeFromJsonString(const TStringBuf input)
     return result;
 }
 
+TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth)
+{
+    TMemoryInput stream(input);
+
+    TNode result;
+
+    TNodeBuilder builder(&result);
+    TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true, maxDepth);
+    NJson::TJsonReaderConfig config;
+    config.DontValidateUtf8 = true;
+    config.UseIterativeParser = true;
+    config.MaxDepth = maxDepth;
+    NJson::ReadJson(&stream, &config, &callbacks);
+    return result;
+}
+
 TNode NodeFromJsonValue(const NJson::TJsonValue& input)
 {
     TNode result;

+ 6 - 0
library/cpp/yson/node/node_io.h

@@ -33,6 +33,12 @@ void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, ::NYson
 TNode NodeFromJsonString(const TStringBuf input);
 bool TryNodeFromJsonString(const TStringBuf input, TNode& dst);
 
+// Parse TNode from string in JSON format using an iterative JSON parser.
+// Iterative JSON parsers still use the stack, but allocate it on the heap (instead of using the system call stack).
+// Needed to mitigate stack overflow with short stacks on deeply nested JSON strings
+//  (e.g. 256kb of stack when parsing "[[[[[[...]]]]]]" crashes the whole binary).
+TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth = 1024);
+
 // Convert TJsonValue to TNode
 TNode NodeFromJsonValue(const ::NJson::TJsonValue& input);
 

+ 53 - 0
library/cpp/yson/node/node_io_ut.cpp

@@ -0,0 +1,53 @@
+#include "node_io.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/stream/mem.h>
+
+using namespace NYson;
+
+namespace {
+    void GenerateDeepJson(TStringStream& stream, ui64 depth) {
+        stream << "{\"key\":";
+        for (ui32 i = 0; i < depth - 1; ++i) {
+            stream << "[";
+        }
+        for (ui32 i = 0; i < depth - 1; ++i) {
+            stream << "]";
+        }
+        stream << "}";
+    }
+}
+
+Y_UNIT_TEST_SUITE(TestNodeFromJsonStringIterativeTest) {
+    Y_UNIT_TEST(NoCrashOn1e5Brackets) {
+        constexpr ui32 brackets = static_cast<ui32>(1e5);
+
+        TStringStream jsonStream;
+        GenerateDeepJson(jsonStream, brackets);
+
+        UNIT_ASSERT_EXCEPTION(
+            NYT::NodeFromJsonStringIterative(jsonStream.Str()),
+            std::exception);
+    }
+
+    Y_UNIT_TEST(NoCrashOn1025Brackets) {
+        constexpr ui32 brackets = 1025;
+
+        TStringStream jsonStream;
+        GenerateDeepJson(jsonStream, brackets);
+
+        UNIT_ASSERT_EXCEPTION(
+            NYT::NodeFromJsonStringIterative(jsonStream.Str()),
+            std::exception);
+    }
+
+    Y_UNIT_TEST(NoErrorOn1024Brackets) {
+        constexpr ui32 brackets = 1024;
+
+        TStringStream jsonStream;
+        GenerateDeepJson(jsonStream, brackets);
+
+        UNIT_ASSERT_NO_EXCEPTION(NYT::NodeFromJsonStringIterative(jsonStream.Str()));
+    }
+}

+ 1 - 0
library/cpp/yson/node/ut/ya.make

@@ -2,6 +2,7 @@ UNITTEST_FOR(library/cpp/yson/node)
 
 SRCS(
     node_ut.cpp
+    node_io_ut.cpp
 )
 
 END()