cescape.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #pragma once
  2. #include "byte_writer.h"
  3. #include "cescape_decode.h"
  4. #include "cescape_encode.h"
  5. #include "macros.h"
  6. #include <util/generic/strbuf.h>
  7. #include <util/generic/string.h>
  8. #include <util/generic/vector.h>
  9. /* REFERENCES FOR ESCAPE SEQUENCE INTERPRETATION:
  10. * C99 p. 6.4.3 Universal character names.
  11. * C99 p. 6.4.4.4 Character constants.
  12. *
  13. * <simple-escape-sequence> ::= {
  14. * \' , \" , \? , \\ ,
  15. * \a , \b , \f , \n , \r , \t , \v
  16. * }
  17. *
  18. * <octal-escape-sequence> ::= \ <octal-digit> {1, 3}
  19. * <hexadecimal-escape-sequence> ::= \x <hexadecimal-digit> +
  20. * <universal-character-name> ::= \u <hexadecimal-digit> {4}
  21. * || \U <hexadecimal-digit> {8}
  22. *
  23. * NOTE (6.4.4.4.7):
  24. * Each octal or hexadecimal escape sequence is the longest sequence of characters that can
  25. * constitute the escape sequence.
  26. *
  27. * THEREFORE:
  28. * - Octal escape sequence spans until rightmost non-octal-digit character.
  29. * - Octal escape sequence always terminates after three octal digits.
  30. * - Hexadecimal escape sequence spans until rightmost non-hexadecimal-digit character.
  31. * - Universal character name consists of exactly 4 or 8 hexadecimal digit.
  32. *
  33. */
  34. namespace NYsonPull {
  35. namespace NDetail {
  36. namespace NCEscape {
  37. inline void encode(TString& dest, TStringBuf data) {
  38. NImpl::escape_impl(
  39. reinterpret_cast<const ui8*>(data.data()),
  40. data.size(),
  41. [&](const ui8* str, size_t size) {
  42. dest.append(
  43. reinterpret_cast<const char*>(str),
  44. size);
  45. });
  46. }
  47. // dest must have at least 4*data.size() bytes available
  48. inline size_t encode(ui8* dest, TStringBuf data) {
  49. auto* dest_begin = dest;
  50. NImpl::escape_impl(
  51. reinterpret_cast<const ui8*>(data.data()),
  52. data.size(),
  53. [&](const ui8* str, size_t size) {
  54. ::memcpy(dest, str, size);
  55. dest += size;
  56. });
  57. return dest - dest_begin;
  58. }
  59. template <typename U>
  60. void encode(byte_writer<U>& dest, TStringBuf data) {
  61. auto& buffer = dest.stream().buffer();
  62. if (Y_LIKELY(buffer.available() >= data.size() * 4)) {
  63. auto size = encode(buffer.pos(), data);
  64. dest.advance(size);
  65. } else {
  66. NImpl::escape_impl(
  67. reinterpret_cast<const ui8*>(data.data()),
  68. data.size(),
  69. [&](const ui8* str, size_t size) {
  70. dest.write(str, size);
  71. });
  72. }
  73. }
  74. inline TString encode(TStringBuf data) {
  75. TString result;
  76. result.reserve(data.size());
  77. encode(result, data);
  78. return result;
  79. }
  80. inline void decode(TString& dest, TStringBuf data) {
  81. NImpl::unescape_impl(
  82. reinterpret_cast<const ui8*>(data.begin()),
  83. reinterpret_cast<const ui8*>(data.end()),
  84. [&](ui8 c) {
  85. dest += c;
  86. },
  87. [&](const ui8* p, size_t len) {
  88. dest.append(reinterpret_cast<const char*>(p), len);
  89. });
  90. }
  91. inline void decode_inplace(TVector<ui8>& data) {
  92. auto* out = static_cast<ui8*>(
  93. ::memchr(data.data(), '\\', data.size()));
  94. if (out == nullptr) {
  95. return;
  96. }
  97. NImpl::unescape_impl(
  98. out,
  99. data.data() + data.size(),
  100. [&](ui8 c) {
  101. *out++ = c;
  102. },
  103. [&](const ui8* p, size_t len) {
  104. ::memmove(out, p, len);
  105. out += len;
  106. });
  107. data.resize(out - &data[0]);
  108. }
  109. inline TString decode(TStringBuf data) {
  110. TString result;
  111. result.reserve(data.size());
  112. decode(result, data);
  113. return result;
  114. }
  115. ATTRIBUTE(noinline, cold)
  116. inline TString quote(TStringBuf str) {
  117. TString result;
  118. result.reserve(str.size() + 16);
  119. result += '"';
  120. encode(result, str);
  121. result += '"';
  122. return result;
  123. }
  124. ATTRIBUTE(noinline, cold)
  125. inline TString quote(ui8 ch) {
  126. char c = ch;
  127. return quote(TStringBuf(&c, 1));
  128. }
  129. }
  130. } // namespace NDetail
  131. }