#pragma once // #define DO_PRN #include #include "common.h" #include #include #include #include #include namespace NUri { class TParser; namespace NParse { class TRange { public: const char* Beg; ui64 FlagsEncodeMasked; ui64 FlagsAllPlaintext; ui32 Encode; ui32 Decode; public: TRange(const char* beg = nullptr) : Beg(beg) , FlagsEncodeMasked(0) , FlagsAllPlaintext(0) , Encode(0) , Decode(0) { } void Reset(const char* beg = nullptr) { *this = TRange(beg); } void AddRange(const TRange& range, ui64 mask); void AddFlag(const char* ptr, ui64 mask, ui64 flag) { if (0 != flag) AddFlagImpl(ptr, mask, flag, flag); } void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) { if (0 != flag) AddFlagImpl(ptr, mask, flag & ~exclflag, flag); } void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) { if (0 != flag) AddFlagImpl(ptr, mask, flag, flag, exclmask); } void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) { if (0 != flag) AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask); } private: void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) { AddFlagAllPlaintextImpl(ptr, plainflag); AddFlagEncodeMaskedImpl(encflag & mask); } void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) { AddFlagAllPlaintextImpl(ptr, plainflag); if (0 == (mask & exclmask)) AddFlagEncodeMaskedImpl(encflag & mask); } void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) { if (nullptr == Beg) Beg = ptr; FlagsAllPlaintext |= flag; } void AddFlagEncodeMaskedImpl(ui64 flag) { if (0 == flag) return; FlagsEncodeMasked |= flag; if (flag & TFeature::FeaturesMaybeEncode) ++Encode; else if (flag & TFeature::FeaturesDecode) ++Decode; } }; } class TSection : protected NParse::TRange { private: friend class TParser; private: const char* End; TSection(const char* beg = nullptr) : NParse::TRange(beg) , End(nullptr) { } void Reset() { Enter(nullptr); } void Reset(const char* pc) { Y_ASSERT(!Beg || !pc || Beg < pc); Reset(); } void Enter(const char* pc) { *this = TSection(pc); } bool Leave(const char* pc) { Y_ASSERT(Beg); End = pc; return true; } void Set(const TStringBuf& buf) { Enter(buf.data()); Leave(buf.data() + buf.length()); } public: bool IsSet() const { return End; } TStringBuf Get() const { return TStringBuf(Beg, End); } size_t Len() const { return End - Beg; } size_t DecodedLen() const { return Len() - 2 * Decode; } size_t EncodedLen() const { return 2 * Encode + DecodedLen(); } ui32 GetEncode() const { return Encode; } ui32 GetDecode() const { return Decode; } ui64 GetFlagsEncode() const { return FlagsEncodeMasked; } ui64 GetFlagsAllPlaintext() const { return FlagsAllPlaintext; } }; class TParser { public: TSection Sections[TField::FieldUrlMAX]; TScheme::EKind Scheme; const TParseFlags Flags; const TStringBuf UriStr; TState::EParsed State; ECharset Enc; public: TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8) : Scheme(TScheme::SchemeEmpty) , Flags(flags | TFeature::FeatureDecodeANY) , UriStr(uri) , State(TState::ParsedEmpty) , Enc(enc) , HexValue(0) , PctBegin(nullptr) { Y_ASSERT(0 == (Flags & TFeature::FeaturePathOperation) // can't define all of them || TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath)); State = ParseImpl(); } public: const TSection& Get(TField::EField fld) const { return Sections[fld]; } TSection& GetMutable(TField::EField fld) { return Sections[fld]; } bool Has(TField::EField fld) const { return Get(fld).IsSet(); } bool IsNetPath() const { return Has(TField::FieldHost) && 2 < UriStr.length() && '/' == UriStr[0] && '/' == UriStr[1]; } bool IsRootless() const { return Has(TField::FieldScheme) && !Has(TField::FieldHost) && (!Has(TField::FieldPath) || '/' != Get(TField::FieldPath).Get()[0]); } // for RFC 2396 compatibility bool IsOpaque() const { return IsRootless(); } static ui64 GetFieldFlags(TField::EField fld, const TParseFlags& flags) { return FieldFlags[fld] & flags; } ui64 GetFieldFlags(TField::EField fld) const { return GetFieldFlags(fld, Flags); } protected: static const TParseFlags FieldFlags[TField::FieldUrlMAX]; TSection::TRange CurRange; unsigned HexValue; const char* PctBegin; #ifdef DO_PRN IOutputStream& PrintAddr(const char* ptr) const { return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] "; } IOutputStream& PrintHead(const char* ptr, const char* func) const { return PrintAddr(ptr) << func << " "; } IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const { return PrintHead(ptr, func) << fld; } IOutputStream& PrintTail(const TStringBuf& val) const { return Cdbg << " [" << val << "]" << Endl; } IOutputStream& PrintTail(const char* beg, const char* end) const { return PrintTail(TStringBuf(beg, end)); } #endif void ResetSection(TField::EField fld, const char* pc = nullptr) { #ifdef DO_PRN PrintHead(pc, __FUNCTION__, fld); PrintTail(pc); #endif Sections[fld].Reset(pc); } void storeSection(const TStringBuf& val, TField::EField fld) { #ifdef DO_PRN PrintHead(val.data(), __FUNCTION__, fld); PrintTail(val); #endif Sections[fld].Set(val); } void startSection(const char* pc, TField::EField fld) { #ifdef DO_PRN PrintHead(pc, __FUNCTION__, fld); PrintTail(pc); #endif copyRequirements(pc); Sections[fld].Enter(pc); } void finishSection(const char* pc, TField::EField fld) { #ifdef DO_PRN PrintHead(pc, __FUNCTION__, fld); PrintTail(pc); #endif if (Sections[fld].Leave(pc)) copyRequirements(pc); } void setRequirement(const char* ptr, ui64 flags) { #ifdef DO_PRN PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); PrintTail(ptr); #endif CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags); } void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) { #ifdef DO_PRN PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) << " & exclflag=" << IntToString<16>(exclflag) << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); PrintTail(ptr); #endif CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag); } void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) { #ifdef DO_PRN PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags) << " & exclmask=" << IntToString<16>(exclmask) << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra); PrintTail(ptr); #endif CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask); } void copyRequirementsImpl(const char* ptr); void copyRequirements(const char* ptr) { PctEnd(ptr); if (nullptr != CurRange.Beg && CurRange.Beg != ptr) copyRequirementsImpl(ptr); } void HexDigit(const char* ptr, char c) { Y_UNUSED(ptr); HexAdd(c - '0'); } void HexUpper(const char* ptr, char c) { setRequirementUnless(ptr, TFeature::FeatureToLower, TFeature::FeatureUpperEncoded); HexAdd(c - 'A' + 10); } void HexLower(const char* ptr, char c) { setRequirement(ptr, TFeature::FeatureUpperEncoded); HexAdd(c - 'a' + 10); } void HexAdd(unsigned val) { HexValue <<= 4; HexValue += val; } void HexReset() { HexValue = 0; } void HexSet(const char* ptr); void PctEndImpl(const char* ptr); void PctEnd(const char* ptr) { if (nullptr != PctBegin && ptr != PctBegin) PctEndImpl(ptr); } void PctBeg(const char* ptr) { PctEnd(ptr); HexReset(); PctBegin = ptr; } void checkSectionCollision(TField::EField fld1, TField::EField fld2) { if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) { Sections[fld1].Reset(); } } bool doParse(const char* str_beg, size_t length); TState::EParsed ParseImpl(); }; }