#pragma once #include #include #include #include namespace NEscJ { // almost copypaste from util/string/escape.h // todo: move there (note difference in IsPrintable and handling of string) inline char HexDigit(char value) { if (value < 10) return '0' + value; else return 'A' + value - 10; } inline char OctDigit(char value) { return '0' + value; } inline bool IsUTF8(ui8 c) { return c < 0xf5 && c != 0xC0 && c != 0xC1; } inline bool IsControl(ui8 c) { return c < 0x20 || c == 0x7f; } inline bool IsPrintable(ui8 c) { return IsUTF8(c) && !IsControl(c); } inline bool IsHexDigit(ui8 c) { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); } inline bool IsOctDigit(ui8 c) { return c >= '0' && c <= '7'; } struct TEscapeUtil { static constexpr size_t ESCAPE_C_BUFFER_SIZE = 6; template static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { // (1) Printable characters go as-is, except backslash and double quote. // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. if (hasCustomSafeUnsafe && safe.find(c) != TStringBuf::npos) { r[0] = c; return 1; } if (c == '\"') { r[0] = '\\'; r[1] = '\"'; return 2; } else if (c == '\\') { r[0] = '\\'; r[1] = '\\'; return 2; } else if (IsPrintable(c) && (!hasCustomSafeUnsafe || unsafe.find(c) == TStringBuf::npos)) { r[0] = c; return 1; } else if (c == '\b') { r[0] = '\\'; r[1] = 'b'; return 2; } else if (c == '\f') { r[0] = '\\'; r[1] = 'f'; return 2; } else if (c == '\r') { r[0] = '\\'; r[1] = 'r'; return 2; } else if (c == '\n') { r[0] = '\\'; r[1] = 'n'; return 2; } else if (c == '\t') { r[0] = '\\'; r[1] = 't'; return 2; } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json r[0] = '\\'; r[1] = 'u'; r[2] = '0'; r[3] = '0'; r[4] = HexDigit((c & 0xF0) >> 4); r[5] = HexDigit((c & 0x0F) >> 0); return 6; } else if (c < 8 && !IsOctDigit(next)) { r[0] = '\\'; r[1] = OctDigit(c); return 2; } else if (!IsHexDigit(next)) { r[0] = '\\'; r[1] = 'x'; r[2] = HexDigit((c & 0xF0) >> 4); r[3] = HexDigit((c & 0x0F) >> 0); return 4; } else { r[0] = '\\'; r[1] = OctDigit((c & 0700) >> 6); r[2] = OctDigit((c & 0070) >> 3); r[3] = OctDigit((c & 0007) >> 0); return 4; } } }; inline size_t SuggestBuffer(size_t len) { return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE; } template inline size_t EscapeJImpl(const char* str, size_t len, char* out, TStringBuf safe, TStringBuf unsafe) { char* out0 = out; char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE]; size_t i, j; for (i = 0, j = 0; i < len; ++i) { size_t rlen = TEscapeUtil::EscapeJ(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe); if (rlen > 1) { memcpy(out, str + j, i - j); out += i - j; j = i + 1; memcpy(out, buffer, rlen); out += rlen; } } if (j > 0) { memcpy(out, str + j, len - j); out += len - j; } else { memcpy(out, str, len); out += len; } return out - out0; } template inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { if (Y_LIKELY(safe.empty() && unsafe.empty())) { return EscapeJImpl(str, len, out, safe, unsafe); } return EscapeJImpl(str, len, out, safe, unsafe); } template inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TTempBuf b(SuggestBuffer(in.size()) + 2); if (quote) b.Append("\"", 1); b.Proceed(EscapeJ(in.data(), in.size(), b.Current(), safe, unsafe)); if (quote) b.Append("\"", 1); out.Write(b.Data(), b.Filled()); } template inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TTempBuf b(SuggestBuffer(in.size()) + 2); if (quote) b.Append("\"", 1); b.Proceed(EscapeJ(in.data(), in.size(), b.Current(), safe, unsafe)); if (quote) b.Append("\"", 1); out.append(b.Data(), b.Filled()); } template inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TString s; EscapeJ(in, s, safe, unsafe); return s; } // If the template parameter "tounicode" is ommited, then use the default value false inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { return EscapeJ(str, len, out, safe, unsafe); } template inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { EscapeJ(in, out, safe, unsafe); } template inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { EscapeJ(in, out, safe, unsafe); } template inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { return EscapeJ(in, safe, unsafe); } }