Browse Source

parse line on deploy by option

ivanmorozov 2 years ago
parent
commit
f6fd83a9bc

+ 1 - 0
library/cpp/string_utils/CMakeLists.txt

@@ -7,6 +7,7 @@
 
 
 add_subdirectory(base64)
+add_subdirectory(csv)
 add_subdirectory(indent_text)
 add_subdirectory(levenshtein_diff)
 add_subdirectory(parse_size)

+ 17 - 0
library/cpp/string_utils/csv/CMakeLists.darwin.txt

@@ -0,0 +1,17 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-string_utils-csv)
+target_link_libraries(cpp-string_utils-csv PUBLIC
+  contrib-libs-cxxsupp
+  yutil
+)
+target_sources(cpp-string_utils-csv PRIVATE
+  ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp
+)

+ 18 - 0
library/cpp/string_utils/csv/CMakeLists.linux-aarch64.txt

@@ -0,0 +1,18 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-string_utils-csv)
+target_link_libraries(cpp-string_utils-csv PUBLIC
+  contrib-libs-linux-headers
+  contrib-libs-cxxsupp
+  yutil
+)
+target_sources(cpp-string_utils-csv PRIVATE
+  ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp
+)

+ 18 - 0
library/cpp/string_utils/csv/CMakeLists.linux.txt

@@ -0,0 +1,18 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-string_utils-csv)
+target_link_libraries(cpp-string_utils-csv PUBLIC
+  contrib-libs-linux-headers
+  contrib-libs-cxxsupp
+  yutil
+)
+target_sources(cpp-string_utils-csv PRIVATE
+  ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp
+)

+ 15 - 0
library/cpp/string_utils/csv/CMakeLists.txt

@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+  include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+  include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+  include(CMakeLists.linux.txt)
+endif()

+ 82 - 0
library/cpp/string_utils/csv/csv.cpp

@@ -0,0 +1,82 @@
+#include "csv.h"
+
+TStringBuf NCsvFormat::CsvSplitter::Consume() {
+    if (Begin == End) {
+        return nullptr;
+    }
+    TString::iterator TokenStart = Begin;
+    TString::iterator TokenEnd = Begin;
+    if (Quote == '\0') {
+        while (1) {
+            if (TokenEnd == End || *TokenEnd == Delimeter) {
+                Begin = TokenEnd;
+                return TStringBuf(TokenStart, TokenEnd);
+            }
+            ++TokenEnd;
+        }
+    } else {
+        bool Escape = false;
+        if (*Begin == Quote) {
+            Escape = true;
+            ++TokenStart;
+            ++TokenEnd;
+            Y_ENSURE(TokenStart != End, TStringBuf("RFC4180 violation: quotation mark must be followed by something"));
+        }
+        while (1) {
+            if (TokenEnd == End || (!Escape && *TokenEnd == Delimeter)) {
+                Begin = TokenEnd;
+                return TStringBuf(TokenStart, TokenEnd);
+            } else if (*TokenEnd == Quote) {
+                Y_ENSURE(Escape, TStringBuf("RFC4180 violation: quotation mark must be in the escaped string only"));
+                if (TokenEnd + 1 == End) {
+                    Begin = TokenEnd + 1;
+                } else if (*(TokenEnd + 1) == Delimeter) {
+                    Begin = TokenEnd + 1;
+                } else if (*(TokenEnd + 1) == Quote) {
+                    CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1)));
+                    TokenEnd += 2;
+                    TokenStart = TokenEnd;
+                    continue;
+                } else {
+                    Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark"));
+                }
+                if (CustomStringBufs.size()) {
+                    CustomString.clear();
+                    for (auto CustomStringBuf : CustomStringBufs) {
+                        CustomString += TString{ CustomStringBuf };
+                    }
+                    CustomString += TString{ TStringBuf(TokenStart, TokenEnd) };
+                    CustomStringBufs.clear();
+                    return TStringBuf(CustomString);
+                } else {
+                    return TStringBuf(TokenStart, TokenEnd);
+                }
+            }
+            ++TokenEnd;
+        }
+    }
+};
+
+TString NCsvFormat::TLinesSplitter::ConsumeLine() {
+    bool Escape = false;
+    TString result;
+    TString line;
+    while (Input.ReadLine(line)) {
+        for (auto it = line.begin(); it != line.end(); ++it) {
+            if (*it == Quote) {
+                Escape = !Escape;
+            }
+        }
+        if (!result) {
+            result = line;
+        } else {
+            result += line;
+        }
+        if (!Escape) {
+            break;
+        } else {
+            result += "\n";
+        }
+    }
+    return result;
+};

+ 64 - 0
library/cpp/string_utils/csv/csv.h

@@ -0,0 +1,64 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+#include <util/stream/input.h>
+
+/*
+    Split string by rfc4180
+*/
+
+namespace NCsvFormat {
+    class TLinesSplitter {
+    private:
+        IInputStream& Input;
+        const char Quote;
+    public:
+        TLinesSplitter(IInputStream& input, const char quote = '"')
+            : Input(input)
+            , Quote(quote) {
+        }
+        TString ConsumeLine();
+    };
+
+    class CsvSplitter {
+    public:
+        CsvSplitter(TString& data, const char delimeter = ',', const char quote = '"')
+        // quote = '\0' ignores quoting in values and words like simple split
+            : Delimeter(delimeter)
+            , Quote(quote)
+            , Begin(data.begin())
+            , End(data.end())
+        {
+        }
+
+        bool Step() {
+            if (Begin == End) {
+                return false;
+            }
+            ++Begin;
+            return true;
+        }
+
+        TStringBuf Consume();
+        explicit operator TVector<TString>() {
+            TVector<TString> ret;
+
+            do {
+                TStringBuf buf = Consume();
+                ret.push_back(TString{buf});
+            } while (Step());
+
+            return ret;
+        }
+
+    private:
+        const char Delimeter;
+        const char Quote;
+        TString::iterator Begin;
+        const TString::const_iterator End;
+        TString CustomString;
+        TVector<TStringBuf> CustomStringBufs;
+    };
+}

+ 1 - 0
ydb/public/lib/ydb_cli/import/CMakeLists.darwin.txt

@@ -16,6 +16,7 @@ target_link_libraries(lib-ydb_cli-import PUBLIC
   cpp-client-ydb_proto
   public-lib-json_value
   libs-apache-arrow
+  cpp-string_utils-csv
 )
 target_sources(lib-ydb_cli-import PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/import/import.cpp

+ 1 - 0
ydb/public/lib/ydb_cli/import/CMakeLists.linux-aarch64.txt

@@ -17,6 +17,7 @@ target_link_libraries(lib-ydb_cli-import PUBLIC
   cpp-client-ydb_proto
   public-lib-json_value
   libs-apache-arrow
+  cpp-string_utils-csv
 )
 target_sources(lib-ydb_cli-import PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/import/import.cpp

+ 1 - 0
ydb/public/lib/ydb_cli/import/CMakeLists.linux.txt

@@ -17,6 +17,7 @@ target_link_libraries(lib-ydb_cli-import PUBLIC
   cpp-client-ydb_proto
   public-lib-json_value
   libs-apache-arrow
+  cpp-string_utils-csv
 )
 target_sources(lib-ydb_cli-import PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/import/import.cpp

Some files were not shown because too many files changed in this diff