encode.h 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. #pragma once
  2. #include "common.h"
  3. #include <util/stream/output.h>
  4. namespace NUri {
  5. namespace NEncode {
  6. #define CHAR_TYPE_NAME(f) _ECT##f
  7. #define CHAR_TYPE_FLAG(f) ECF##f = 1u << CHAR_TYPE_NAME(f)
  8. enum ECharType {
  9. CHAR_TYPE_NAME(Digit),
  10. CHAR_TYPE_NAME(Lower),
  11. CHAR_TYPE_NAME(Upper),
  12. CHAR_TYPE_NAME(Unres),
  13. CHAR_TYPE_NAME(Stdrd),
  14. };
  15. enum ECharFlag {
  16. CHAR_TYPE_FLAG(Digit),
  17. CHAR_TYPE_FLAG(Lower),
  18. CHAR_TYPE_FLAG(Upper),
  19. CHAR_TYPE_FLAG(Unres),
  20. CHAR_TYPE_FLAG(Stdrd),
  21. // compound group flags
  22. ECGAlpha = ECFUpper | ECFLower,
  23. ECGAlnum = ECGAlpha | ECFDigit,
  24. ECGUnres = ECGAlnum | ECFUnres,
  25. ECGStdrd = ECGUnres | ECFStdrd,
  26. };
  27. #undef CHAR_TYPE_NAME
  28. #undef CHAR_TYPE_FLAG
  29. struct TCharFlags {
  30. ui32 TypeFlags;
  31. ui64 FeatFlags;
  32. ui32 DecodeFld; // decode if FeatureDecodeFieldAllowed
  33. ui32 EncodeFld; // encode if shouldn't be treated as delimiter
  34. TCharFlags(ui64 feat = 0)
  35. : TypeFlags(0)
  36. , FeatFlags(feat)
  37. , DecodeFld(0)
  38. , EncodeFld(0)
  39. {
  40. }
  41. TCharFlags(ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0)
  42. : TypeFlags(type)
  43. , FeatFlags(feat)
  44. , DecodeFld(decmask)
  45. , EncodeFld(encmask)
  46. {
  47. }
  48. TCharFlags& Add(const TCharFlags& val) {
  49. TypeFlags |= val.TypeFlags;
  50. FeatFlags |= val.FeatFlags;
  51. DecodeFld |= val.DecodeFld;
  52. EncodeFld |= val.EncodeFld;
  53. return *this;
  54. }
  55. bool IsAllowed(ui32 fldmask) const {
  56. return (TypeFlags & ECGUnres) || (DecodeFld & ~EncodeFld & fldmask);
  57. }
  58. // should we decode an encoded character
  59. bool IsDecode(ui32 fldmask, ui64 flags) const;
  60. };
  61. class TEncodeMapperBase {
  62. protected:
  63. TEncodeMapperBase()
  64. : Flags(0)
  65. , FldMask(0)
  66. , Q_DecodeAny(false)
  67. {
  68. }
  69. TEncodeMapperBase(ui64 flags, TField::EField fld)
  70. : Flags(flags)
  71. , FldMask(1u << fld)
  72. , Q_DecodeAny(flags & TFeature::FeatureDecodeANY)
  73. {
  74. }
  75. protected:
  76. const ui64 Flags;
  77. const ui32 FldMask;
  78. const bool Q_DecodeAny; // this is a special option for username/password
  79. };
  80. // maps a sym or hex character and indicates whether it has to be encoded
  81. class TEncodeMapper
  82. : public TEncodeMapperBase {
  83. public:
  84. TEncodeMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
  85. : TEncodeMapperBase(flags, fld)
  86. , Q_EncodeSpcAsPlus(flags & TFeature::FeatureEncodeSpaceAsPlus)
  87. {
  88. }
  89. // negative=sym, positive=hex, zero=maybesym
  90. int EncodeSym(unsigned char&) const;
  91. int EncodeHex(unsigned char&) const;
  92. protected:
  93. const bool Q_EncodeSpcAsPlus;
  94. };
  95. // indicates whether a character has to be encoded when copying to a field
  96. class TEncodeToMapper
  97. : public TEncodeMapperBase {
  98. public:
  99. TEncodeToMapper()
  100. : TEncodeMapperBase()
  101. {
  102. }
  103. TEncodeToMapper(ui64 flags, TField::EField fld = TField::FieldAllMAX)
  104. : TEncodeMapperBase(flags, fld)
  105. {
  106. }
  107. bool Enabled() const {
  108. return 0 != FldMask;
  109. }
  110. bool Encode(unsigned char) const;
  111. };
  112. class TEncoder {
  113. public:
  114. TEncoder(IOutputStream& out, const TEncodeMapper& fldsrc, const TEncodeToMapper& flddst = TEncodeToMapper());
  115. ui64 ReEncode(const TStringBuf& url);
  116. ui64 ReEncode(const char* str, size_t len) {
  117. return ReEncode(TStringBuf(str, len));
  118. }
  119. protected:
  120. static bool IsType(unsigned char c, ui64 flags) {
  121. return GetFlags(c).TypeFlags & flags;
  122. }
  123. public:
  124. static bool IsDigit(unsigned char c) {
  125. return IsType(c, ECFDigit);
  126. }
  127. static bool IsUpper(unsigned char c) {
  128. return IsType(c, ECFUpper);
  129. }
  130. static bool IsLower(unsigned char c) {
  131. return IsType(c, ECFLower);
  132. }
  133. static bool IsAlpha(unsigned char c) {
  134. return IsType(c, ECGAlpha);
  135. }
  136. static bool IsAlnum(unsigned char c) {
  137. return IsType(c, ECGAlnum);
  138. }
  139. static bool IsUnres(unsigned char c) {
  140. return IsType(c, ECGUnres);
  141. }
  142. static const TCharFlags& GetFlags(unsigned char c) {
  143. return Grammar().Get(c);
  144. }
  145. public:
  146. // process an encoded string, decoding safe chars and encoding unsafe
  147. static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld) {
  148. TEncoder(out, srcfld).ReEncode(val);
  149. return out;
  150. }
  151. static IOutputStream& ReEncodeTo(IOutputStream& out, const TStringBuf& val, const TEncodeMapper& srcfld, const TEncodeToMapper& dstfld) {
  152. TEncoder(out, srcfld, dstfld).ReEncode(val);
  153. return out;
  154. }
  155. // see also UrlUnescape() from string/quote.h
  156. static IOutputStream& Decode(
  157. IOutputStream& out, const TStringBuf& val, ui64 flags) {
  158. return ReEncode(out, val, flags | TFeature::FeatureDecodeANY);
  159. }
  160. public:
  161. // process a raw string or char, encode as needed
  162. static IOutputStream& Hex(IOutputStream& out, unsigned char val);
  163. static IOutputStream& Encode(IOutputStream& out, unsigned char val) {
  164. out << '%';
  165. return Hex(out, val);
  166. }
  167. static IOutputStream& EncodeAll(IOutputStream& out, const TStringBuf& val);
  168. static IOutputStream& EncodeNotAlnum(IOutputStream& out, const TStringBuf& val);
  169. static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld);
  170. static IOutputStream& EncodeField(IOutputStream& out, const TStringBuf& val, TField::EField fld, ui64 flags);
  171. static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val) {
  172. return EncodeField(out, val, TField::FieldAllMAX);
  173. }
  174. static IOutputStream& Encode(IOutputStream& out, const TStringBuf& val, ui64 flags) {
  175. return EncodeField(out, val, TField::FieldAllMAX, flags);
  176. }
  177. public:
  178. class TGrammar {
  179. TCharFlags Map_[256];
  180. public:
  181. TGrammar();
  182. const TCharFlags& Get(unsigned char ch) const {
  183. return Map_[ch];
  184. }
  185. TCharFlags& GetMutable(unsigned char ch) {
  186. return Map_[ch];
  187. }
  188. TCharFlags& Add(unsigned char ch, const TCharFlags& val) {
  189. return GetMutable(ch).Add(val);
  190. }
  191. void AddRng(unsigned char lo, unsigned char hi, const TCharFlags& val) {
  192. for (unsigned i = lo; i <= hi; ++i)
  193. Add(i, val);
  194. }
  195. void AddRng(unsigned char lo, unsigned char hi, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
  196. AddRng(lo, hi, TCharFlags(type, feat, decmask, encmask));
  197. }
  198. void Add(const TStringBuf& set, const TCharFlags& val) {
  199. for (size_t i = 0; i != set.length(); ++i)
  200. Add(set[i], val);
  201. }
  202. void Add(const TStringBuf& set, ui32 type, ui64 feat, ui32 decmask = 0, ui32 encmask = 0) {
  203. Add(set, TCharFlags(type, feat, decmask, encmask));
  204. }
  205. };
  206. static const TGrammar& Grammar();
  207. protected:
  208. IOutputStream& Out;
  209. const TEncodeMapper FldSrc;
  210. const TEncodeToMapper FldDst;
  211. ui64 OutFlags;
  212. int HexValue;
  213. protected:
  214. void HexReset() {
  215. HexValue = 0;
  216. }
  217. void HexDigit(char c) {
  218. HexAdd(c - '0');
  219. }
  220. void HexUpper(char c) {
  221. HexAdd(c - 'A' + 10);
  222. }
  223. void HexLower(char c) {
  224. HexAdd(c - 'a' + 10);
  225. }
  226. void HexAdd(int val) {
  227. HexValue <<= 4;
  228. HexValue += val;
  229. }
  230. protected:
  231. void DoSym(unsigned char ch) {
  232. const int res = FldSrc.EncodeSym(ch);
  233. Do(ch, res);
  234. }
  235. void DoHex(unsigned char ch) {
  236. const int res = FldSrc.EncodeHex(ch);
  237. Do(ch, res);
  238. }
  239. void DoHex() {
  240. DoHex(HexValue);
  241. HexValue = 0;
  242. }
  243. void Do(unsigned char, int);
  244. };
  245. }
  246. using TEncoder = NEncode::TEncoder;
  247. }