quote.cpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #include "quote.h"
  2. #include <util/memory/tempbuf.h>
  3. #include <util/string/ascii.h>
  4. #include <util/string/cstriter.h>
  5. /* note: (x & 0xdf) makes x upper case */
  6. #define GETXC \
  7. do { \
  8. c *= 16; \
  9. c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \
  10. ++x; \
  11. } while (0)
  12. #define GETSBXC \
  13. do { \
  14. c *= 16; \
  15. c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \
  16. x.Skip(1); \
  17. } while (0)
  18. namespace {
  19. class TFromHexZeroTerm {
  20. public:
  21. static inline char x2c(const char*& x) {
  22. if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1]))
  23. return '%';
  24. ui8 c = 0;
  25. GETXC;
  26. GETXC;
  27. return c;
  28. }
  29. static inline char x2c(TStringBuf& x) {
  30. if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1]))
  31. return '%';
  32. ui8 c = 0;
  33. GETSBXC;
  34. GETSBXC;
  35. return c;
  36. }
  37. };
  38. class TFromHexLenLimited {
  39. public:
  40. explicit TFromHexLenLimited(const char* end)
  41. : End(end)
  42. {
  43. }
  44. inline char x2c(const char*& x) {
  45. if (x + 2 > End)
  46. return '%';
  47. return TFromHexZeroTerm::x2c(x);
  48. }
  49. private:
  50. const char* End;
  51. };
  52. }
  53. static inline char d2x(unsigned x) {
  54. return (char)((x < 10) ? ('0' + x) : ('A' + x - 10));
  55. }
  56. static inline const char* FixZero(const char* s) noexcept {
  57. return s ? s : "";
  58. }
  59. // we escape:
  60. // '\"', '|', '(', ')',
  61. // '%', '&', '+', ',',
  62. // '#', '<', '=', '>',
  63. // '[', '\\',']', '?',
  64. // ':', '{', '}', '^'
  65. // all below ' ' (0x20) and above '~' (0x7E).
  66. // ' ' converted to '+'
  67. static const bool chars_to_url_escape[256] = {
  68. // 0 1 2 3 4 5 6 7 8 9 A B C D E F
  69. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //0
  70. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //1
  71. 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, //2
  72. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, //3
  73. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4
  74. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //5
  75. 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6
  76. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, //7
  77. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //8
  78. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //9
  79. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //A
  80. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //B
  81. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //C
  82. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //D
  83. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //E
  84. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F
  85. };
  86. template <class It1, class It2, class It3>
  87. static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) {
  88. while (from != end) {
  89. if (escape_map[(unsigned char)*from]) {
  90. *to++ = '%';
  91. *to++ = d2x((unsigned char)*from >> 4);
  92. *to++ = d2x((unsigned char)*from & 0xF);
  93. } else {
  94. *to++ = (*from == ' ' ? '+' : *from);
  95. }
  96. ++from;
  97. }
  98. *to = 0;
  99. return to;
  100. }
  101. template <class It1, class It2, class It3, class FromHex>
  102. static inline It1 Unescape(It1 to, It2 from, It3 end, FromHex fromHex) {
  103. (void)fromHex;
  104. while (from != end) {
  105. switch (*from) {
  106. case '%':
  107. ++from;
  108. *to++ = fromHex.x2c(from);
  109. break;
  110. case '+':
  111. *to++ = ' ';
  112. ++from;
  113. break;
  114. default:
  115. *to++ = *from++;
  116. }
  117. }
  118. *to = 0;
  119. return to;
  120. }
  121. // CGIEscape returns pointer to the end of the result string
  122. // so as it could be possible to populate single long buffer
  123. // with several calls to CGIEscape in a row.
  124. char* CGIEscape(char* to, const char* from) {
  125. return Escape(to, FixZero(from), TCStringEndIterator());
  126. }
  127. char* CGIEscape(char* to, const char* from, size_t len) {
  128. return Escape(to, from, from + len);
  129. }
  130. void CGIEscape(TString& url) {
  131. TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
  132. char* to = tempBuf.Data();
  133. url.AssignNoAlias(to, CGIEscape(to, url.data(), url.size()));
  134. }
  135. TString CGIEscapeRet(const TStringBuf url) {
  136. TString to;
  137. to.ReserveAndResize(CgiEscapeBufLen(url.size()));
  138. to.resize(CGIEscape(to.begin(), url.data(), url.size()) - to.data());
  139. return to;
  140. }
  141. TString& AppendCgiEscaped(const TStringBuf value, TString& to) {
  142. const size_t origLength = to.length();
  143. to.ReserveAndResize(origLength + CgiEscapeBufLen(value.size()));
  144. to.resize(CGIEscape(to.begin() + origLength, value.data(), value.size()) - to.data());
  145. return to;
  146. }
  147. // More general version of CGIEscape. The optional safe parameter specifies
  148. // additional characters that should not be quoted — its default value is '/'.
  149. // Also returns pointer to the end of result string.
  150. template <class It1, class It2, class It3>
  151. static inline It1 Quote(It1 to, It2 from, It3 end, const char* safe) {
  152. bool escape_map[256];
  153. memcpy(escape_map, chars_to_url_escape, 256);
  154. // RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax
  155. // lists following reserved characters:
  156. const char* reserved = ":/?#[]@!$&\'()*+,;=";
  157. for (const char* p = reserved; *p; ++p) {
  158. escape_map[(unsigned char)*p] = true;
  159. }
  160. // characters we think are safe at the moment
  161. for (const char* p = safe; *p; ++p) {
  162. escape_map[(unsigned char)*p] = false;
  163. }
  164. return Escape(to, from, end, escape_map);
  165. }
  166. char* Quote(char* to, const char* from, const char* safe) {
  167. return Quote(to, FixZero(from), TCStringEndIterator(), safe);
  168. }
  169. char* Quote(char* to, const TStringBuf s, const char* safe) {
  170. return Quote(to, s.data(), s.data() + s.size(), safe);
  171. }
  172. void Quote(TString& url, const char* safe) {
  173. TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
  174. char* to = tempBuf.Data();
  175. url.AssignNoAlias(to, Quote(to, url, safe));
  176. }
  177. char* CGIUnescape(char* to, const char* from) {
  178. return Unescape(to, FixZero(from), TCStringEndIterator(), TFromHexZeroTerm());
  179. }
  180. char* CGIUnescape(char* to, const char* from, size_t len) {
  181. return Unescape(to, from, from + len, TFromHexLenLimited(from + len));
  182. }
  183. void CGIUnescape(TString& url) {
  184. if (url.empty()) {
  185. return;
  186. }
  187. if (url.IsDetached()) { // in-place when refcount == 1
  188. char* resBegin = url.begin();
  189. const char* resEnd = CGIUnescape(resBegin, resBegin, url.size());
  190. url.resize(resEnd - resBegin);
  191. } else {
  192. url = CGIUnescapeRet(url);
  193. }
  194. }
  195. TString CGIUnescapeRet(const TStringBuf from) {
  196. TString to;
  197. to.ReserveAndResize(CgiUnescapeBufLen(from.size()));
  198. to.resize(CGIUnescape(to.begin(), from.data(), from.size()) - to.data());
  199. return to;
  200. }
  201. char* UrlUnescape(char* to, TStringBuf from) {
  202. while (!from.empty()) {
  203. char ch = from[0];
  204. from.Skip(1);
  205. if ('%' == ch && 2 <= from.length())
  206. ch = TFromHexZeroTerm::x2c(from);
  207. *to++ = ch;
  208. }
  209. *to = 0;
  210. return to;
  211. }
  212. void UrlUnescape(TString& url) {
  213. if (url.empty()) {
  214. return;
  215. }
  216. if (url.IsDetached()) { // in-place when refcount == 1
  217. char* resBegin = url.begin();
  218. const char* resEnd = UrlUnescape(resBegin, url);
  219. url.resize(resEnd - resBegin);
  220. } else {
  221. url = UrlUnescapeRet(url);
  222. }
  223. }
  224. TString UrlUnescapeRet(const TStringBuf from) {
  225. TString to;
  226. to.ReserveAndResize(CgiUnescapeBufLen(from.size()));
  227. to.resize(UrlUnescape(to.begin(), from) - to.data());
  228. return to;
  229. }
  230. char* UrlEscape(char* to, const char* from, bool forceEscape) {
  231. from = FixZero(from);
  232. while (*from) {
  233. const bool escapePercent = (*from == '%') &&
  234. (forceEscape || !((*(from + 1) && IsAsciiHex(*(from + 1)) && *(from + 2) && IsAsciiHex(*(from + 2)))));
  235. if (escapePercent || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') {
  236. *to++ = '%';
  237. *to++ = d2x((unsigned char)*from >> 4);
  238. *to++ = d2x((unsigned char)*from & 0xF);
  239. } else
  240. *to++ = *from;
  241. ++from;
  242. }
  243. *to = 0;
  244. return to;
  245. }
  246. void UrlEscape(TString& url, bool forceEscape) {
  247. TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
  248. char* to = tempBuf.Data();
  249. url.AssignNoAlias(to, UrlEscape(to, url.data(), forceEscape));
  250. }
  251. TString UrlEscapeRet(const TStringBuf from, bool forceEscape) {
  252. TString to;
  253. to.ReserveAndResize(CgiEscapeBufLen(from.size()));
  254. to.resize(UrlEscape(to.begin(), from.begin(), forceEscape) - to.data());
  255. return to;
  256. }