cescape_decode.h 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. #pragma once
  2. #include <util/system/types.h>
  3. #include <algorithm>
  4. #include <cstring>
  5. namespace NYsonPull {
  6. namespace NDetail {
  7. namespace NCEscape {
  8. namespace NImpl {
  9. inline ui8 as_digit(ui8 c) {
  10. return c - ui8{'0'};
  11. }
  12. inline ui8 as_hexdigit(ui8 c) {
  13. static constexpr ui8 hex_decode_map[256] = {
  14. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  15. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  16. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  17. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  18. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255,
  19. 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255,
  20. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  21. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  22. 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255,
  23. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  24. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  25. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  26. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  27. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  28. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  29. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  30. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  31. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  32. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  33. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  34. 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  35. 255, 255, 255, 255};
  36. return hex_decode_map[c];
  37. }
  38. inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) {
  39. auto digit = ui8{0};
  40. while (n-- && (digit = as_digit(*p)) < 8) {
  41. result = result * 8 + digit;
  42. ++p;
  43. }
  44. return p;
  45. }
  46. inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) {
  47. auto digit = ui8{0};
  48. while (n-- && (digit = as_hexdigit(*p)) < 16) {
  49. result = result * 16 + digit;
  50. ++p;
  51. }
  52. return p;
  53. }
  54. inline const ui8* unescape_char_and_advance(
  55. ui8& result,
  56. const ui8* p,
  57. const ui8* end) {
  58. switch (*p) {
  59. default:
  60. result = *p;
  61. ++p;
  62. break;
  63. case 'b':
  64. result = '\b';
  65. ++p;
  66. break;
  67. case 'f':
  68. result = '\f';
  69. ++p;
  70. break;
  71. case 'n':
  72. result = '\n';
  73. ++p;
  74. break;
  75. case 'r':
  76. result = '\r';
  77. ++p;
  78. break;
  79. case 't':
  80. result = '\t';
  81. ++p;
  82. break;
  83. case 'x': {
  84. ++p;
  85. result = 0;
  86. auto* next = read_hex(
  87. result,
  88. p, std::min<ptrdiff_t>(2, end - p));
  89. if (next > p) {
  90. p = next;
  91. } else {
  92. result = 'x';
  93. }
  94. } break;
  95. case '0':
  96. case '1':
  97. case '2':
  98. case '3':
  99. result = 0;
  100. p = read_oct(
  101. result,
  102. p, std::min<ptrdiff_t>(3, end - p));
  103. break;
  104. case '4':
  105. case '5':
  106. case '6':
  107. case '7':
  108. result = 0;
  109. p = read_oct(
  110. result,
  111. p, std::min<ptrdiff_t>(2, end - p));
  112. break;
  113. }
  114. return p;
  115. }
  116. template <typename T, typename U>
  117. inline void unescape_impl(
  118. const ui8* p,
  119. const ui8* end,
  120. T&& consume_one,
  121. U&& consume_span) {
  122. while (p < end) {
  123. auto* escaped = static_cast<const ui8*>(
  124. ::memchr(p, '\\', end - p));
  125. if (escaped == nullptr) {
  126. consume_span(p, end - p);
  127. return;
  128. } else {
  129. consume_span(p, escaped - p);
  130. auto c = ui8{'\\'};
  131. p = escaped + 1;
  132. if (p < end) {
  133. p = unescape_char_and_advance(c, p, end);
  134. }
  135. consume_one(c);
  136. }
  137. }
  138. }
  139. }
  140. } // namespace NCEscape
  141. } // namespace NDetail
  142. }