parse.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. #pragma once
  2. // #define DO_PRN
  3. #include <cstddef>
  4. #include "common.h"
  5. #include <library/cpp/charset/doccodes.h>
  6. #include <util/generic/strbuf.h>
  7. #include <util/stream/output.h>
  8. #include <util/string/cast.h>
  9. #include <util/system/yassert.h>
  10. namespace NUri {
  11. class TParser;
  12. namespace NParse {
  13. class TRange {
  14. public:
  15. const char* Beg;
  16. ui64 FlagsEncodeMasked;
  17. ui64 FlagsAllPlaintext;
  18. ui32 Encode;
  19. ui32 Decode;
  20. public:
  21. TRange(const char* beg = nullptr)
  22. : Beg(beg)
  23. , FlagsEncodeMasked(0)
  24. , FlagsAllPlaintext(0)
  25. , Encode(0)
  26. , Decode(0)
  27. {
  28. }
  29. void Reset(const char* beg = nullptr) {
  30. *this = TRange(beg);
  31. }
  32. void AddRange(const TRange& range, ui64 mask);
  33. void AddFlag(const char* ptr, ui64 mask, ui64 flag) {
  34. if (0 != flag)
  35. AddFlagImpl(ptr, mask, flag, flag);
  36. }
  37. void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) {
  38. if (0 != flag)
  39. AddFlagImpl(ptr, mask, flag & ~exclflag, flag);
  40. }
  41. void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) {
  42. if (0 != flag)
  43. AddFlagImpl(ptr, mask, flag, flag, exclmask);
  44. }
  45. void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) {
  46. if (0 != flag)
  47. AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask);
  48. }
  49. private:
  50. void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) {
  51. AddFlagAllPlaintextImpl(ptr, plainflag);
  52. AddFlagEncodeMaskedImpl(encflag & mask);
  53. }
  54. void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) {
  55. AddFlagAllPlaintextImpl(ptr, plainflag);
  56. if (0 == (mask & exclmask))
  57. AddFlagEncodeMaskedImpl(encflag & mask);
  58. }
  59. void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) {
  60. if (nullptr == Beg)
  61. Beg = ptr;
  62. FlagsAllPlaintext |= flag;
  63. }
  64. void AddFlagEncodeMaskedImpl(ui64 flag) {
  65. if (0 == flag)
  66. return;
  67. FlagsEncodeMasked |= flag;
  68. if (flag & TFeature::FeaturesMaybeEncode)
  69. ++Encode;
  70. else if (flag & TFeature::FeaturesDecode)
  71. ++Decode;
  72. }
  73. };
  74. }
  75. class TSection
  76. : protected NParse::TRange {
  77. private:
  78. friend class TParser;
  79. private:
  80. const char* End;
  81. TSection(const char* beg = nullptr)
  82. : NParse::TRange(beg)
  83. , End(nullptr)
  84. {
  85. }
  86. void Reset() {
  87. Enter(nullptr);
  88. }
  89. void Reset(const char* pc) {
  90. Y_ASSERT(!Beg || !pc || Beg < pc);
  91. Reset();
  92. }
  93. void Enter(const char* pc) {
  94. *this = TSection(pc);
  95. }
  96. bool Leave(const char* pc) {
  97. Y_ASSERT(Beg);
  98. End = pc;
  99. return true;
  100. }
  101. void Set(const TStringBuf& buf) {
  102. Enter(buf.data());
  103. Leave(buf.data() + buf.length());
  104. }
  105. public:
  106. bool IsSet() const {
  107. return End;
  108. }
  109. TStringBuf Get() const {
  110. return TStringBuf(Beg, End);
  111. }
  112. size_t Len() const {
  113. return End - Beg;
  114. }
  115. size_t DecodedLen() const {
  116. return Len() - 2 * Decode;
  117. }
  118. size_t EncodedLen() const {
  119. return 2 * Encode + DecodedLen();
  120. }
  121. ui32 GetEncode() const {
  122. return Encode;
  123. }
  124. ui32 GetDecode() const {
  125. return Decode;
  126. }
  127. ui64 GetFlagsEncode() const {
  128. return FlagsEncodeMasked;
  129. }
  130. ui64 GetFlagsAllPlaintext() const {
  131. return FlagsAllPlaintext;
  132. }
  133. };
  134. class TParser {
  135. public:
  136. TSection Sections[TField::FieldUrlMAX];
  137. TScheme::EKind Scheme;
  138. const TParseFlags Flags;
  139. const TStringBuf UriStr;
  140. TState::EParsed State;
  141. ECharset Enc;
  142. public:
  143. TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8)
  144. : Scheme(TScheme::SchemeEmpty)
  145. , Flags(flags | TFeature::FeatureDecodeANY)
  146. , UriStr(uri)
  147. , State(TState::ParsedEmpty)
  148. , Enc(enc)
  149. , HexValue(0)
  150. , PctBegin(nullptr)
  151. {
  152. Y_ASSERT(0 == (Flags & TFeature::FeaturePathOperation)
  153. // can't define all of them
  154. || TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath));
  155. State = ParseImpl();
  156. }
  157. public:
  158. const TSection& Get(TField::EField fld) const {
  159. return Sections[fld];
  160. }
  161. TSection& GetMutable(TField::EField fld) {
  162. return Sections[fld];
  163. }
  164. bool Has(TField::EField fld) const {
  165. return Get(fld).IsSet();
  166. }
  167. bool IsNetPath() const {
  168. return Has(TField::FieldHost) && 2 < UriStr.length() && '/' == UriStr[0] && '/' == UriStr[1];
  169. }
  170. bool IsRootless() const {
  171. return Has(TField::FieldScheme) && !Has(TField::FieldHost) && (!Has(TField::FieldPath) || '/' != Get(TField::FieldPath).Get()[0]);
  172. }
  173. // for RFC 2396 compatibility
  174. bool IsOpaque() const {
  175. return IsRootless();
  176. }
  177. static ui64 GetFieldFlags(TField::EField fld, const TParseFlags& flags) {
  178. return FieldFlags[fld] & flags;
  179. }
  180. ui64 GetFieldFlags(TField::EField fld) const {
  181. return GetFieldFlags(fld, Flags);
  182. }
  183. protected:
  184. static const TParseFlags FieldFlags[TField::FieldUrlMAX];
  185. TSection::TRange CurRange;
  186. unsigned HexValue;
  187. const char* PctBegin;
  188. #ifdef DO_PRN
  189. IOutputStream& PrintAddr(const char* ptr) const {
  190. return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] ";
  191. }
  192. IOutputStream& PrintHead(const char* ptr, const char* func) const {
  193. return PrintAddr(ptr) << func << " ";
  194. }
  195. IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const {
  196. return PrintHead(ptr, func) << fld;
  197. }
  198. IOutputStream& PrintTail(const TStringBuf& val) const {
  199. return Cdbg << " [" << val << "]" << Endl;
  200. }
  201. IOutputStream& PrintTail(const char* beg, const char* end) const {
  202. return PrintTail(TStringBuf(beg, end));
  203. }
  204. #endif
  205. void ResetSection(TField::EField fld, const char* pc = nullptr) {
  206. #ifdef DO_PRN
  207. PrintHead(pc, __FUNCTION__, fld);
  208. PrintTail(pc);
  209. #endif
  210. Sections[fld].Reset(pc);
  211. }
  212. void storeSection(const TStringBuf& val, TField::EField fld) {
  213. #ifdef DO_PRN
  214. PrintHead(val.data(), __FUNCTION__, fld);
  215. PrintTail(val);
  216. #endif
  217. Sections[fld].Set(val);
  218. }
  219. void startSection(const char* pc, TField::EField fld) {
  220. #ifdef DO_PRN
  221. PrintHead(pc, __FUNCTION__, fld);
  222. PrintTail(pc);
  223. #endif
  224. copyRequirements(pc);
  225. Sections[fld].Enter(pc);
  226. }
  227. void finishSection(const char* pc, TField::EField fld) {
  228. #ifdef DO_PRN
  229. PrintHead(pc, __FUNCTION__, fld);
  230. PrintTail(pc);
  231. #endif
  232. if (Sections[fld].Leave(pc))
  233. copyRequirements(pc);
  234. }
  235. void setRequirement(const char* ptr, ui64 flags) {
  236. #ifdef DO_PRN
  237. PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
  238. << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra);
  239. PrintTail(ptr);
  240. #endif
  241. CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags);
  242. }
  243. void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) {
  244. #ifdef DO_PRN
  245. PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
  246. << " & exclflag=" << IntToString<16>(exclflag)
  247. << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra);
  248. PrintTail(ptr);
  249. #endif
  250. CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag);
  251. }
  252. void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) {
  253. #ifdef DO_PRN
  254. PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
  255. << " & exclmask=" << IntToString<16>(exclmask)
  256. << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra);
  257. PrintTail(ptr);
  258. #endif
  259. CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask);
  260. }
  261. void copyRequirementsImpl(const char* ptr);
  262. void copyRequirements(const char* ptr) {
  263. PctEnd(ptr);
  264. if (nullptr != CurRange.Beg && CurRange.Beg != ptr)
  265. copyRequirementsImpl(ptr);
  266. }
  267. void HexDigit(const char* ptr, char c) {
  268. Y_UNUSED(ptr);
  269. HexAdd(c - '0');
  270. }
  271. void HexUpper(const char* ptr, char c) {
  272. setRequirementUnless(ptr, TFeature::FeatureToLower, TFeature::FeatureUpperEncoded);
  273. HexAdd(c - 'A' + 10);
  274. }
  275. void HexLower(const char* ptr, char c) {
  276. setRequirement(ptr, TFeature::FeatureUpperEncoded);
  277. HexAdd(c - 'a' + 10);
  278. }
  279. void HexAdd(unsigned val) {
  280. HexValue <<= 4;
  281. HexValue += val;
  282. }
  283. void HexReset() {
  284. HexValue = 0;
  285. }
  286. void HexSet(const char* ptr);
  287. void PctEndImpl(const char* ptr);
  288. void PctEnd(const char* ptr) {
  289. if (nullptr != PctBegin && ptr != PctBegin)
  290. PctEndImpl(ptr);
  291. }
  292. void PctBeg(const char* ptr) {
  293. PctEnd(ptr);
  294. HexReset();
  295. PctBegin = ptr;
  296. }
  297. void checkSectionCollision(TField::EField fld1, TField::EField fld2) {
  298. if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) {
  299. Sections[fld1].Reset();
  300. }
  301. }
  302. bool doParse(const char* str_beg, size_t length);
  303. TState::EParsed ParseImpl();
  304. };
  305. }