123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- #pragma once
- // #define DO_PRN
- #include <cstddef>
- #include "common.h"
- #include <library/cpp/charset/doccodes.h>
- #include <util/generic/strbuf.h>
- #include <util/stream/output.h>
- #include <util/string/cast.h>
- #include <util/system/yassert.h>
- namespace NUri {
- class TParser;
- namespace NParse {
- class TRange {
- public:
- const char* Beg;
- ui64 FlagsEncodeMasked;
- ui64 FlagsAllPlaintext;
- ui32 Encode;
- ui32 Decode;
- public:
- TRange(const char* beg = nullptr)
- : Beg(beg)
- , FlagsEncodeMasked(0)
- , FlagsAllPlaintext(0)
- , Encode(0)
- , Decode(0)
- {
- }
- void Reset(const char* beg = nullptr) {
- *this = TRange(beg);
- }
- void AddRange(const TRange& range, ui64 mask);
- void AddFlag(const char* ptr, ui64 mask, ui64 flag) {
- if (0 != flag)
- AddFlagImpl(ptr, mask, flag, flag);
- }
- void AddFlagExcept(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag) {
- if (0 != flag)
- AddFlagImpl(ptr, mask, flag & ~exclflag, flag);
- }
- void AddFlagUnless(const char* ptr, ui64 mask, ui64 flag, ui64 exclmask) {
- if (0 != flag)
- AddFlagImpl(ptr, mask, flag, flag, exclmask);
- }
- void AddFlag(const char* ptr, ui64 mask, ui64 flag, ui64 exclflag, ui64 exclmask) {
- if (0 != flag)
- AddFlagImpl(ptr, mask, flag & ~exclflag, flag, exclmask);
- }
- private:
- void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag) {
- AddFlagAllPlaintextImpl(ptr, plainflag);
- AddFlagEncodeMaskedImpl(encflag & mask);
- }
- void AddFlagImpl(const char* ptr, ui64 mask, ui64 plainflag, ui64 encflag, ui64 exclmask) {
- AddFlagAllPlaintextImpl(ptr, plainflag);
- if (0 == (mask & exclmask))
- AddFlagEncodeMaskedImpl(encflag & mask);
- }
- void AddFlagAllPlaintextImpl(const char* ptr, ui64 flag) {
- if (nullptr == Beg)
- Beg = ptr;
- FlagsAllPlaintext |= flag;
- }
- void AddFlagEncodeMaskedImpl(ui64 flag) {
- if (0 == flag)
- return;
- FlagsEncodeMasked |= flag;
- if (flag & TFeature::FeaturesMaybeEncode)
- ++Encode;
- else if (flag & TFeature::FeaturesDecode)
- ++Decode;
- }
- };
- }
- class TSection
- : protected NParse::TRange {
- private:
- friend class TParser;
- private:
- const char* End;
- TSection(const char* beg = nullptr)
- : NParse::TRange(beg)
- , End(nullptr)
- {
- }
- void Reset() {
- Enter(nullptr);
- }
- void Reset(const char* pc) {
- Y_ASSERT(!Beg || !pc || Beg < pc);
- Reset();
- }
- void Enter(const char* pc) {
- *this = TSection(pc);
- }
- bool Leave(const char* pc) {
- Y_ASSERT(Beg);
- End = pc;
- return true;
- }
- void Set(const TStringBuf& buf) {
- Enter(buf.data());
- Leave(buf.data() + buf.length());
- }
- public:
- bool IsSet() const {
- return End;
- }
- TStringBuf Get() const {
- return TStringBuf(Beg, End);
- }
- size_t Len() const {
- return End - Beg;
- }
- size_t DecodedLen() const {
- return Len() - 2 * Decode;
- }
- size_t EncodedLen() const {
- return 2 * Encode + DecodedLen();
- }
- ui32 GetEncode() const {
- return Encode;
- }
- ui32 GetDecode() const {
- return Decode;
- }
- ui64 GetFlagsEncode() const {
- return FlagsEncodeMasked;
- }
- ui64 GetFlagsAllPlaintext() const {
- return FlagsAllPlaintext;
- }
- };
- class TParser {
- public:
- TSection Sections[TField::FieldUrlMAX];
- TScheme::EKind Scheme;
- const TParseFlags Flags;
- const TStringBuf UriStr;
- TState::EParsed State;
- ECharset Enc;
- public:
- TParser(const TParseFlags& flags, const TStringBuf& uri, ECharset enc = CODES_UTF8)
- : Scheme(TScheme::SchemeEmpty)
- , Flags(flags | TFeature::FeatureDecodeANY)
- , UriStr(uri)
- , State(TState::ParsedEmpty)
- , Enc(enc)
- , HexValue(0)
- , PctBegin(nullptr)
- {
- Y_ASSERT(0 == (Flags & TFeature::FeaturePathOperation)
- // can't define all of them
- || TFeature::FeaturesPath != (Flags & TFeature::FeaturesPath));
- State = ParseImpl();
- }
- public:
- const TSection& Get(TField::EField fld) const {
- return Sections[fld];
- }
- TSection& GetMutable(TField::EField fld) {
- return Sections[fld];
- }
- bool Has(TField::EField fld) const {
- return Get(fld).IsSet();
- }
- bool IsNetPath() const {
- return Has(TField::FieldHost) && 2 < UriStr.length() && '/' == UriStr[0] && '/' == UriStr[1];
- }
- bool IsRootless() const {
- return Has(TField::FieldScheme) && !Has(TField::FieldHost) && (!Has(TField::FieldPath) || '/' != Get(TField::FieldPath).Get()[0]);
- }
- // for RFC 2396 compatibility
- bool IsOpaque() const {
- return IsRootless();
- }
- static ui64 GetFieldFlags(TField::EField fld, const TParseFlags& flags) {
- return FieldFlags[fld] & flags;
- }
- ui64 GetFieldFlags(TField::EField fld) const {
- return GetFieldFlags(fld, Flags);
- }
- protected:
- static const TParseFlags FieldFlags[TField::FieldUrlMAX];
- TSection::TRange CurRange;
- unsigned HexValue;
- const char* PctBegin;
- #ifdef DO_PRN
- IOutputStream& PrintAddr(const char* ptr) const {
- return Cdbg << "[" << IntToString<16>(ui64(ptr)) << "] ";
- }
- IOutputStream& PrintHead(const char* ptr, const char* func) const {
- return PrintAddr(ptr) << func << " ";
- }
- IOutputStream& PrintHead(const char* ptr, const char* func, const TField::EField& fld) const {
- return PrintHead(ptr, func) << fld;
- }
- IOutputStream& PrintTail(const TStringBuf& val) const {
- return Cdbg << " [" << val << "]" << Endl;
- }
- IOutputStream& PrintTail(const char* beg, const char* end) const {
- return PrintTail(TStringBuf(beg, end));
- }
- #endif
- void ResetSection(TField::EField fld, const char* pc = nullptr) {
- #ifdef DO_PRN
- PrintHead(pc, __FUNCTION__, fld);
- PrintTail(pc);
- #endif
- Sections[fld].Reset(pc);
- }
- void storeSection(const TStringBuf& val, TField::EField fld) {
- #ifdef DO_PRN
- PrintHead(val.data(), __FUNCTION__, fld);
- PrintTail(val);
- #endif
- Sections[fld].Set(val);
- }
- void startSection(const char* pc, TField::EField fld) {
- #ifdef DO_PRN
- PrintHead(pc, __FUNCTION__, fld);
- PrintTail(pc);
- #endif
- copyRequirements(pc);
- Sections[fld].Enter(pc);
- }
- void finishSection(const char* pc, TField::EField fld) {
- #ifdef DO_PRN
- PrintHead(pc, __FUNCTION__, fld);
- PrintTail(pc);
- #endif
- if (Sections[fld].Leave(pc))
- copyRequirements(pc);
- }
- void setRequirement(const char* ptr, ui64 flags) {
- #ifdef DO_PRN
- PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
- << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra);
- PrintTail(ptr);
- #endif
- CurRange.AddFlag(ptr, Flags.Allow | Flags.Extra, flags);
- }
- void setRequirementExcept(const char* ptr, ui64 flags, ui64 exclflag) {
- #ifdef DO_PRN
- PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
- << " & exclflag=" << IntToString<16>(exclflag)
- << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra);
- PrintTail(ptr);
- #endif
- CurRange.AddFlagExcept(ptr, Flags.Allow | Flags.Extra, flags, exclflag);
- }
- void setRequirementUnless(const char* ptr, ui64 flags, ui64 exclmask) {
- #ifdef DO_PRN
- PrintHead(ptr, __FUNCTION__) << IntToString<16>(flags)
- << " & exclmask=" << IntToString<16>(exclmask)
- << " & mask=" << IntToString<16>(Flags.Allow | Flags.Extra);
- PrintTail(ptr);
- #endif
- CurRange.AddFlagUnless(ptr, Flags.Allow | Flags.Extra, flags, exclmask);
- }
- void copyRequirementsImpl(const char* ptr);
- void copyRequirements(const char* ptr) {
- PctEnd(ptr);
- if (nullptr != CurRange.Beg && CurRange.Beg != ptr)
- copyRequirementsImpl(ptr);
- }
- void HexDigit(const char* ptr, char c) {
- Y_UNUSED(ptr);
- HexAdd(c - '0');
- }
- void HexUpper(const char* ptr, char c) {
- setRequirementUnless(ptr, TFeature::FeatureToLower, TFeature::FeatureUpperEncoded);
- HexAdd(c - 'A' + 10);
- }
- void HexLower(const char* ptr, char c) {
- setRequirement(ptr, TFeature::FeatureUpperEncoded);
- HexAdd(c - 'a' + 10);
- }
- void HexAdd(unsigned val) {
- HexValue <<= 4;
- HexValue += val;
- }
- void HexReset() {
- HexValue = 0;
- }
- void HexSet(const char* ptr);
- void PctEndImpl(const char* ptr);
- void PctEnd(const char* ptr) {
- if (nullptr != PctBegin && ptr != PctBegin)
- PctEndImpl(ptr);
- }
- void PctBeg(const char* ptr) {
- PctEnd(ptr);
- HexReset();
- PctBegin = ptr;
- }
- void checkSectionCollision(TField::EField fld1, TField::EField fld2) {
- if (Sections[fld1].IsSet() && Sections[fld2].IsSet() && Sections[fld1].Beg == Sections[fld2].Beg) {
- Sections[fld1].Reset();
- }
- }
- bool doParse(const char* str_beg, size_t length);
- TState::EParsed ParseImpl();
- };
- }
|