encode.h 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. #pragma once
  2. #include "common.h"
  3. #include <util/stream/output.h>
  4. namespace NUri {
  5. namespace NEncode {
  6. #define CHAR_TYPE_NAME(f) _ECT##f
  7. #define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f)
  8. enum ECharType {
  9. CHAR_TYPE_NAME(Digit),
  10. CHAR_TYPE_NAME(Lower),
  11. CHAR_TYPE_NAME(Upper),
  12. CHAR_TYPE_NAME(Unres),
  13. CHAR_TYPE_NAME(Stdrd),
  14. };
  15. enum ECharFlag {
  16. CHAR_TYPE_FLAG(Digit),
  17. CHAR_TYPE_FLAG(Lower),
  18. CHAR_TYPE_FLAG(Upper),
  19. CHAR_TYPE_FLAG(Unres),
  20. CHAR_TYPE_FLAG(Stdrd),
  21. // compound group flags
  22. ECGAlpha = ECFUpper | ECFLower,
  23. ECGAlnum = ECGAlpha | ECFDigit,
  24. ECGUnres = ECGAlnum | ECFUnres,
  25. ECGStdrd = ECGUnres | ECFStdrd,
  26. };
  27. #undef CHAR_TYPE_NAME
  28. #undef CHAR_TYPE_FLAG
  29. struct TCharFlags {
  30. ui32 TypeFlags;
  31. ui64 FeatFlags;
  32. ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed
  33. ui32 EncodeFld; // encode if shouldn't be treated as delimiter
  34. TCharFlags(ui64 feat = 0)
  35. : TypeFlags(0)
  36. , FeatFlags(feat)
  37. , DecodeFld(0)
  38. , EncodeFld(0)
  39. {
  40. }
  41. TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0)
  42. : TypeFlags(type)
  43. , FeatFlags(feat)
  44. , DecodeFld(decmask)
  45. , EncodeFld(encmask)
  46. {
  47. }
  48. TCharFlags& Add(const TCharFlags& val) {
  49. TypeFlags |= val.TypeFlags;
  50. FeatFlags |= val.FeatFlags;
  51. DecodeFld |= val.DecodeFld;
  52. EncodeFld |= val.EncodeFld;
  53. return *this;
  54. }
  55. bool IsAllowed(ui32 fldmask) const {
  56. return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask);
  57. }
  58. // should we decode an encoded character
  59. bool IsDecode(ui32 fldmask, ui64 flags) const;
  60. };
  61. class TEncodeMapperBase {
  62. protected:
  63. TEncodeMapperBase()
  64. : Flags(0)
  65. , FldMask(0)
  66. , Q_DecodeAny(false)
  67. {
  68. }
  69. TEncodeMapperBase(ui64 flags, TField::EField fld)
  70. : Flags(flags)
  71. , FldMask(1u << fld)
  72. , Q_DecodeAny(flags & TFeature::FeatureDecodeANY)
  73. {
  74. }
  75. public:
  76. bool Is(TField::EField fld) const {
  77. return FldMask & (1u << fld);
  78. }
  79. protected:
  80. const ui64 Flags;
  81. const ui32 FldMask;
  82. const bool Q_DecodeAny; // this is a special option for username/password
  83. };
  84. // maps a sym or hex character and indicates whether it has to be encoded
  85. class TEncodeMapper
  86. : public TEncodeMapperBase {
  87. public:
  88. TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
  89. : TEncodeMapperBase(flags, fld)
  90. , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus)
  91. {
  92. }
  93. // negative=sym, positive=hex, zero=maybesym
  94. int EncodeSym(unsigned char&) const;
  95. int EncodeHex(unsigned char&) const;
  96. protected:
  97. const bool Q_EncodeSpcAsPlus;
  98. };
  99. // indicates whether a character has to be encoded when copying to a field
  100. class TEncodeToMapper
  101. : public TEncodeMapperBase {
  102. public:
  103. TEncodeToMapper()
  104. : TEncodeMapperBase()
  105. {
  106. }
  107. TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
  108. : TEncodeMapperBase(flags, fld)
  109. {
  110. }
  111. bool Enabled() const {
  112. return 0 != FldMask;
  113. }
  114. bool Encode(unsigned char) const;
  115. };
  116. class TEncoder {
  117. public:
  118. TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper());
  119. ui64 ReEncode(const TStringBuf& url);
  120. ui64 ReEncode(const char* str, size_t len) {
  121. return ReEncode(TStringBuf(str, len));
  122. }
  123. protected:
  124. static bool IsType(unsigned char c, ui64 flags) {
  125. return GetFlags(c).TypeFlags & flags;
  126. }
  127. public:
  128. static bool IsDigit(unsigned char c) {
  129. return IsType(c, ECFDigit);
  130. }
  131. static bool IsUpper(unsigned char c) {
  132. return IsType(c, ECFUpper);
  133. }
  134. static bool IsLower(unsigned char c) {
  135. return IsType(c, ECFLower);
  136. }
  137. static bool IsAlpha(unsigned char c) {
  138. return IsType(c, ECGAlpha);
  139. }
  140. static bool IsAlnum(unsigned char c) {
  141. return IsType(c, ECGAlnum);
  142. }
  143. static bool IsUnres(unsigned char c) {
  144. return IsType(c, ECGUnres);
  145. }
  146. static const TCharFlags& GetFlags(unsigned char c) {
  147. return Grammar().Get(c);
  148. }
  149. public:
  150. // process an encoded string, decoding safe chars and encoding unsafe
  151. static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) {
  152. TEncoder(out, srcfld).ReEncode(val);
  153. return out;
  154. }
  155. static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) {
  156. TEncoder(out, srcfld, dstfld).ReEncode(val);
  157. return out;
  158. }
  159. // see also UrlUnescape() from string/quote.h
  160. static IOutputStream& Decode(
  161. IOutputStream& out, const TStringBuf& val, ui64 flags) {
  162. return ReEncode(out, val, flags | TFeature::FeatureDecodeANY);
  163. }
  164. public:
  165. // process a raw string or char, encode as needed
  166. static IOutputStream& Hex(IOutputStream& out, unsigned char val);
  167. static IOutputStream& Encode(IOutputStream& out, unsigned char val) {
  168. out << '%';
  169. return Hex(out, val);
  170. }
  171. static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val);
  172. static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val);
  173. static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld);
  174. static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags);
  175. static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) {
  176. return EncodeField(out, val, TField::FieldAllMAX);
  177. }
  178. static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) {
  179. return EncodeField(out, val, TField::FieldAllMAX, flags);
  180. }
  181. public:
  182. class TGrammar {
  183. TCharFlags Map_[256];
  184. public:
  185. TGrammar();
  186. const TCharFlags& Get(unsigned char ch) const {
  187. return Map_[ch];
  188. }
  189. TCharFlags& GetMutable(unsigned char ch) {
  190. return Map_[ch];
  191. }
  192. TCharFlags& Add(unsigned char ch, const TCharFlags& val) {
  193. return GetMutable(ch).Add(val);
  194. }
  195. void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) {
  196. for (unsigned i = lo; i <= hi; ++i)
  197. Add(i, val);
  198. }
  199. void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
  200. AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask));
  201. }
  202. void Add(const TStringBuf& set, const TCharFlags& val) {
  203. for (size_t i = 0; i != set.length(); ++i)
  204. Add(set[i], val);
  205. }
  206. void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
  207. Add(set, TCharFlags(type, feat, decmask, encmask));
  208. }
  209. };
  210. static const TGrammar& Grammar();
  211. protected:
  212. IOutputStream& Out;
  213. const TEncodeMapper FldSrc;
  214. const TEncodeToMapper FldDst;
  215. ui64 OutFlags;
  216. int HexValue;
  217. protected:
  218. void HexReset() {
  219. HexValue = 0;
  220. }
  221. void HexDigit(char c) {
  222. HexAdd(c - '0');
  223. }
  224. void HexUpper(char c) {
  225. HexAdd(c - 'A' + 10);
  226. }
  227. void HexLower(char c) {
  228. HexAdd(c - 'a' + 10);
  229. }
  230. void HexAdd(int val) {
  231. HexValue <<= 4;
  232. HexValue += val;
  233. }
  234. protected:
  235. void DoSym(unsigned char ch) {
  236. const int res = FldSrc.EncodeSym(ch);
  237. Do(ch, res);
  238. }
  239. void DoHex(unsigned char ch) {
  240. const int res = FldSrc.EncodeHex(ch);
  241. Do(ch, res);
  242. }
  243. void DoHex() {
  244. DoHex(HexValue);
  245. HexValue = 0;
  246. }
  247. void Do(unsigned char, int);
  248. };
  249. }
  250. using TEncoder = NEncode::TEncoder;
  251. }