Browse Source

[relaxed_escaper] Speed up

sobols 2 years ago
parent
commit
230d573d67
1 changed files with 19 additions and 15 deletions
  1. 19 15
      library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h

+ 19 - 15
library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h

@@ -41,14 +41,14 @@ namespace NEscJ {
     }
 
     struct TEscapeUtil {
-        static const size_t ESCAPE_C_BUFFER_SIZE = 6;
+        static constexpr size_t ESCAPE_C_BUFFER_SIZE = 6;
 
-        template <bool asunicode>
+        template <bool asunicode, bool hasCustomSafeUnsafe>
         static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
             // (1) Printable characters go as-is, except backslash and double quote.
             // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
             // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
-            if (safe.find(c) != TStringBuf::npos) {
+            if (hasCustomSafeUnsafe && safe.find(c) != TStringBuf::npos) {
                 r[0] = c;
                 return 1;
             }
@@ -60,7 +60,7 @@ namespace NEscJ {
                 r[0] = '\\';
                 r[1] = '\\';
                 return 2;
-            } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) {
+            } else if (IsPrintable(c) && (!hasCustomSafeUnsafe || unsafe.find(c) == TStringBuf::npos)) {
                 r[0] = c;
                 return 1;
             } else if (c == '\b') {
@@ -109,46 +109,50 @@ namespace NEscJ {
                 return 4;
             }
         }
-
-        static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
-            return EscapeJ<false>(c, next, r, safe, unsafe);
-        }
     };
 
     inline size_t SuggestBuffer(size_t len) {
         return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE;
     }
 
-    template <bool tounicode>
-    inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+    template <bool tounicode, bool hasCustomSafeUnsafe>
+    inline size_t EscapeJImpl(const char* str, size_t len, char* out, TStringBuf safe, TStringBuf unsafe) {
         char* out0 = out;
         char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE];
 
         size_t i, j;
         for (i = 0, j = 0; i < len; ++i) {
-            size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
+            size_t rlen = TEscapeUtil::EscapeJ<tounicode, hasCustomSafeUnsafe>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
 
             if (rlen > 1) {
-                strncpy(out, str + j, i - j);
+                memcpy(out, str + j, i - j);
                 out += i - j;
                 j = i + 1;
 
-                strncpy(out, buffer, rlen);
+                memcpy(out, buffer, rlen);
                 out += rlen;
             }
         }
 
         if (j > 0) {
-            strncpy(out, str + j, len - j);
+            memcpy(out, str + j, len - j);
             out += len - j;
         } else {
-            strncpy(out, str, len);
+            memcpy(out, str, len);
             out += len;
         }
 
         return out - out0;
     }
 
+    template <bool tounicode>
+    inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+        if (Y_LIKELY(safe.empty() && unsafe.empty())) {
+            return EscapeJImpl<tounicode, false>(str, len, out, safe, unsafe);
+        }
+        return EscapeJImpl<tounicode, true>(str, len, out, safe, unsafe);
+    }
+
     template <bool quote, bool tounicode>
     inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
         TTempBuf b(SuggestBuffer(in.size()) + 2);