Browse Source

enum_parser: support digit separators in numeric literals
commit_hash:9791d25e9ea02f73329f9755f7c70f335c612121

swarmer 5 months ago
parent
commit
803c95f77d

+ 34 - 0
library/cpp/cppparser/parser.cpp

@@ -1,4 +1,5 @@
 #include <util/generic/hash.h>
+#include <util/string/ascii.h>
 #include <util/string/cast.h>
 #include <util/generic/hash_set.h>
 #include <util/generic/yexception.h>
@@ -127,6 +128,10 @@ private:
                             break;
 
                         case '\'':
+                            if (QuoteCharIsADigitSeparator()) {
+                                Text_.Data += ch;
+                                break;
+                            }
                             Action(ch);
                             State_ = Character;
 
@@ -356,6 +361,35 @@ private:
         }
     }
 
+    // digit separator in integral literal (ex. 73'709'550'592)
+    bool QuoteCharIsADigitSeparator() const {
+        const TStringBuf data = Text_.Data;
+        if (data.empty()) {
+            return false;
+        }
+        if (!IsAsciiHex(data.back())) {
+            return false;
+        }
+        // check for char literal prefix (ex. `u8'$'`)
+        static constexpr TStringBuf literalPrefixes[] {
+            "u8",
+            "u",
+            "U",
+            "L",
+        };
+        for (const TStringBuf& literalPrefix : literalPrefixes) {
+            if (TStringBuf prev; data.BeforeSuffix(literalPrefix, prev)) {
+                if (!prev.empty() && (IsAsciiAlnum(prev.back()) || prev.back() == '_' || prev.back() == '$')) {
+                    // some macro name ends with an `u8` sequence
+                    continue;
+                }
+                // it is a prefixed character literal
+                return false;
+            }
+        }
+        return true;
+    }
+
     inline void Action(char ch) {
         Action();
         Text_.Data += ch;

+ 14 - 0
tools/enum_parser/parse_enum/parse_enum.cpp

@@ -146,6 +146,20 @@ public:
         InEnumState = AfterCppName;
     }
 
+    void DoKeyword(const TText& text) override {
+        if (InValue == InEnumState || InValueCall == InEnumState) {
+            AppendValue(text.Data);
+            return;
+        }
+    }
+
+    void DoCharacter(const TText& text) override {
+        if (InValue == InEnumState || InValueCall == InEnumState) {
+            AppendValue(text.Data);
+            return;
+        }
+    }
+
     void DoMultiLineComment(const TText& text) override {
         Y_ENSURE(text.Data.size() >= 4, "Invalid multiline comment " << text.Data.Quote() << ". ");
         TString commentText = text.Data.substr(2, text.Data.size() - 4);

+ 57 - 0
tools/enum_parser/parse_enum/parse_enum_ut.cpp

@@ -3,10 +3,30 @@
 
 #include <tools/enum_parser/parse_enum/parse_enum.h>
 
+#include <util/generic/array_ref.h>
+#include <util/generic/maybe.h>
+
 typedef TEnumParser::TEnum TEnum;
 typedef TEnumParser::TEnums TEnums;
 typedef TEnumParser::TItems TItems;
 
+namespace {
+    using TNameValuePair = std::pair<TStringBuf, TMaybe<TStringBuf>>;
+
+    void CompareNameValueItems(TConstArrayRef<TNameValuePair> ref, const TEnum& e) {
+        const TItems& it = e.Items;
+        for (size_t i = 0; i < Min(ref.size(), it.size()); ++i) {
+            const auto& [refCppName, refValue] = ref[i];
+            UNIT_ASSERT_VALUES_EQUAL_C(it[i].CppName, refCppName, e.CppName);
+            UNIT_ASSERT_EQUAL_C(it[i].Value.Defined(), refValue.Defined(), e.CppName);
+            if (refValue.Defined() && it[i].Value.Defined()) {
+                UNIT_ASSERT_VALUES_EQUAL_C(*it[i].Value, *refValue, e.CppName);
+            }
+        }
+        UNIT_ASSERT_VALUES_EQUAL_C(it.size(), ref.size(), e.CppName);
+    }
+}
+
 Y_UNIT_TEST_SUITE(TEnumParserTest) {
 
     Y_UNIT_TEST(MainTest) {
@@ -312,4 +332,41 @@ Y_UNIT_TEST_SUITE(TEnumParserTest) {
             UNIT_ASSERT(CurrentExceptionMessage().Contains("https://clubs.at.yandex-team.ru/stackoverflow/2603"));
         }
     }
+
+    Y_UNIT_TEST(DigitSeparatorTest) {
+        TString text = NResource::Find("/digit_separator");
+        TMemoryInput input(text.data(), text.size());
+        TEnumParser parser(input);
+        const TEnums& enums = parser.Enums;
+        UNIT_ASSERT_VALUES_EQUAL(enums.size(), 2u);
+        {
+            const TEnum& e = enums[0];
+            UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ELiterals");
+            static constexpr TNameValuePair ref[]{
+                {"Char", "sizeof(u8'.')"},
+                {"Int", "123'456'789"},
+                {"Float1", "int(456'789.123'456)"},
+                {"Float2", "int(1'2e0'1)"},
+                {"Float3", "int(0x1'2p4)"},
+            };
+            CompareNameValueItems(ref, e);
+            UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u);
+        }
+        {
+            const TEnum& e = enums[1];
+            UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u);
+            UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ETimePrecision");
+            static constexpr TNameValuePair ref[]{
+                {"MicroSeconds", "1"},
+                {"MilliSeconds", "1'000"},
+                {"Seconds", "1'000'000"},
+                {"Minutes", "60'000'000"},
+                {"Hours", "3'600'000'000"},
+                {"Days", "86'400'000'000"},
+                {"Weeks", "604'800'000'000"},
+            };
+            CompareNameValueItems(ref, e);
+            UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u);
+        }
+    }
 }

+ 19 - 0
tools/enum_parser/parse_enum/ut/digit_separator.h

@@ -0,0 +1,19 @@
+#pragma once
+
+enum class ELiterals {
+    Char = sizeof(u8'.'),
+    Int = 123'456'789,
+    Float1 = int(456'789.123'456),
+    Float2 = int(1'2e0'1),
+    Float3 = int(0x1'2p4),
+};
+
+enum class ETimePrecision : unsigned long long {
+    MicroSeconds    =               1   /* "us" */,
+    MilliSeconds    =           1'000   /* "ms" */,
+    Seconds         =       1'000'000   /* "s" */,
+    Minutes         =      60'000'000   /* "m" */,
+    Hours           =   3'600'000'000   /* "h" */,
+    Days            =  86'400'000'000   /* "d" */,
+    Weeks           = 604'800'000'000   /* "w" */,
+};

+ 1 - 0
tools/enum_parser/parse_enum/ut/ya.make

@@ -8,6 +8,7 @@ PEERDIR(
 SRCDIR(tools/enum_parser/parse_enum)
 
 RESOURCE(
+    digit_separator.h /digit_separator
     enums.h /enums
     badcode.h /badcode
     unbalanced.h /unbalanced