123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626 |
- #pragma once
- #include "common.h"
- #include "encode.h"
- #include <library/cpp/charset/doccodes.h>
- #include <util/generic/buffer.h>
- #include <util/generic/ptr.h>
- #include <util/generic/singleton.h>
- #include <util/generic/string.h>
- #include <util/memory/alloc.h>
- #include <util/stream/mem.h>
- #include <util/stream/output.h>
- #include <util/stream/str.h>
- #include <util/system/yassert.h>
- #include <cstdlib>
- namespace NUri {
- /********************************************************/
- class TUri
- : public TFeature,
- public TField,
- public TScheme,
- public TState {
- public:
- enum TLinkType {
- LinkIsBad,
- LinkBadAbs,
- LinkIsFragment,
- LinkIsLocal,
- LinkIsGlobal
- };
- private:
- TBuffer Buffer;
- TStringBuf Fields[FieldAllMAX];
- ui32 FieldsSet;
- ui16 Port;
- ui16 DefaultPort;
- TScheme::EKind Scheme;
- /// contains fields out of buffer (and possibly not null-terminated)
- ui32 FieldsDirty;
- private:
- void Alloc(size_t len) {
- Dealloc(); // to prevent copy below
- Buffer.Resize(len);
- }
- void Dealloc() {
- Buffer.Clear();
- }
- void ClearImpl() {
- Port = 0;
- FieldsSet = 0;
- Scheme = SchemeEmpty;
- FieldsDirty = 0;
- }
- void CopyData(const TUri& url) {
- FieldsSet = url.FieldsSet;
- Port = url.Port;
- DefaultPort = url.DefaultPort;
- Scheme = url.Scheme;
- FieldsDirty = url.FieldsDirty;
- }
- void CopyImpl(const TUri& url) {
- for (int i = 0; i < FieldAllMAX; ++i)
- Fields[i] = url.Fields[i];
- RewriteImpl();
- }
- private:
- static ui32 FldFlag(EField fld) {
- return 1 << fld;
- }
- public:
- static bool FldIsValid(EField fld) {
- return 0 <= fld && FieldAllMAX > fld;
- }
- bool FldSetCmp(ui32 chk, ui32 exp) const {
- return (FieldsSet & chk) == exp;
- }
- bool FldSetCmp(ui32 chk) const {
- return FldSetCmp(chk, chk);
- }
- bool FldIsSet(EField fld) const {
- return !FldSetCmp(FldFlag(fld), 0);
- }
- private:
- void FldMarkSet(EField fld) {
- FieldsSet |= FldFlag(fld);
- }
- void FldMarkUnset(EField fld) {
- FieldsSet &= ~FldFlag(fld);
- }
- // use when we know the field is dirty or RewriteImpl will be called
- void FldSetNoDirty(EField fld, const TStringBuf& value) {
- Fields[fld] = value;
- FldMarkSet(fld);
- }
- void FldSet(EField fld, const TStringBuf& value) {
- FldSetNoDirty(fld, value);
- FldMarkDirty(fld);
- }
- const TStringBuf& FldGet(EField fld) const {
- return Fields[fld];
- }
- private:
- /// depending on value, clears or sets it
- void FldChkSet(EField fld, const TStringBuf& value) {
- if (value.IsInited())
- FldSet(fld, value);
- else
- FldClr(fld);
- }
- void FldChkSet(EField fld, const TUri& other) {
- FldChkSet(fld, other.GetField(fld));
- }
- /// set only if initialized
- bool FldTrySet(EField fld, const TStringBuf& value) {
- const bool ok = value.IsInited();
- if (ok)
- FldSet(fld, value);
- return ok;
- }
- bool FldTrySet(EField fld, const TUri& other) {
- return FldTrySet(fld, other.GetField(fld));
- }
- private:
- /// copies the value if it fits
- bool FldTryCpy(EField fld, const TStringBuf& value);
- // main method: sets the field value, possibly copies, etc.
- bool FldSetImpl(EField fld, TStringBuf value, bool strconst = false, bool nocopy = false);
- public: // clear a field
- void FldClr(EField fld) {
- Fields[fld].Clear();
- FldMarkUnset(fld);
- FldMarkClean(fld);
- }
- bool FldTryClr(EField field) {
- const bool ok = FldIsSet(field);
- if (ok)
- FldClr(field);
- return ok;
- }
- public: // set a field value: might leave state dirty and require a Rewrite()
- // copies if fits and not dirty, sets and marks dirty otherwise
- bool FldMemCpy(EField field, const TStringBuf& value) {
- return FldSetImpl(field, value, false);
- }
- // uses directly, marks dirty
- /// @note client MUST guarantee value will be alive until Rewrite is called
- bool FldMemSet(EField field, const TStringBuf& value) {
- return FldSetImpl(field, value, false, true);
- }
- // uses directly, doesn't mark dirty (value scope exceeds "this")
- bool FldMemUse(EField field, const TStringBuf& value) {
- return FldSetImpl(field, value, true);
- }
- // uses directly, doesn't mark dirty
- template <size_t size>
- bool FldMemSet(EField field, const char (&value)[size]) {
- static_assert(size > 0);
- return FldSetImpl(field, TStringBuf(value, size - 1), true);
- }
- // duplicate one field to another
- bool FldDup(EField src, EField dst) {
- if (!FldIsSet(src) || !FldIsValid(dst))
- return false;
- FldSetNoDirty(dst, FldGet(src));
- if (FldIsDirty(src))
- FldMarkDirty(dst);
- else
- FldMarkClean(dst);
- return true;
- }
- // move one field to another
- bool FldMov(EField src, EField dst) {
- if (!FldDup(src, dst))
- return false;
- FldClr(src);
- return true;
- }
- private:
- bool IsInBuffer(const char* buf) const {
- return buf >= Buffer.data() && buf < Buffer.data() + Buffer.size();
- }
- public:
- bool FldIsDirty() const {
- return 0 != FieldsDirty;
- }
- bool FldIsDirty(EField fld) const {
- return 0 != (FieldsDirty & FldFlag(fld));
- }
- private:
- void FldMarkDirty(EField fld) {
- FieldsDirty |= FldFlag(fld);
- }
- void FldMarkClean(EField fld) {
- FieldsDirty &= ~FldFlag(fld);
- }
- void RewriteImpl();
- public:
- static TState::EParsed CheckHost(const TStringBuf& host);
- // convert a [potential] IDN to ascii
- static TMallocPtr<char> IDNToAscii(const wchar32* idna);
- static TMallocPtr<char> IDNToAscii(const TStringBuf& host, ECharset enc = CODES_UTF8);
- // convert hosts with percent-encoded or extended chars
- // returns non-empty string if host can be converted to ASCII with given parameters
- static TStringBuf HostToAscii(TStringBuf host, TMallocPtr<char>& buf, bool hasExtended, bool allowIDN, ECharset enc = CODES_UTF8);
- // returns host if already ascii, or non-empty if it can be converted
- static TStringBuf HostToAscii(const TStringBuf& host, TMallocPtr<char>& buf, bool allowIDN, ECharset enc = CODES_UTF8);
- public:
- explicit TUri(unsigned defaultPort = 0)
- : FieldsSet(0)
- , Port(0)
- , DefaultPort(static_cast<ui16>(defaultPort))
- , Scheme(SchemeEmpty)
- , FieldsDirty(0)
- {
- }
- TUri(const TStringBuf& host, ui16 port, const TStringBuf& path, const TStringBuf& query = TStringBuf(), const TStringBuf& scheme = "http", unsigned defaultPort = 0, const TStringBuf& hashbang = TStringBuf());
- TUri(const TUri& url)
- : FieldsSet(url.FieldsSet)
- , Port(url.Port)
- , DefaultPort(url.DefaultPort)
- , Scheme(url.Scheme)
- , FieldsDirty(url.FieldsDirty)
- {
- CopyImpl(url);
- }
- ~TUri() {
- Clear();
- }
- void Copy(const TUri& url) {
- if (&url != this) {
- CopyData(url);
- CopyImpl(url);
- }
- }
- void Clear() {
- Dealloc();
- ClearImpl();
- }
- ui32 GetFieldMask() const {
- return FieldsSet;
- }
- ui32 GetUrlFieldMask() const {
- return GetFieldMask() & FlagUrlFields;
- }
- ui32 GetDirtyMask() const {
- return FieldsDirty;
- }
- void CheckMissingFields();
- // Process methods
- void Rewrite() {
- if (FldIsDirty())
- RewriteImpl();
- }
- private:
- TState::EParsed AssignImpl(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty);
- TState::EParsed ParseImpl(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeEmpty, ECharset enc = CODES_UTF8);
- public:
- TState::EParsed Assign(const TParser& parser, TScheme::EKind defscheme = SchemeEmpty) {
- const TState::EParsed ret = AssignImpl(parser, defscheme);
- if (ParsedOK == ret)
- Rewrite();
- return ret;
- }
- TState::EParsed ParseUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
- const TState::EParsed ret = ParseImpl(url, flags, maxlen, SchemeEmpty, enc);
- if (ParsedOK == ret)
- Rewrite();
- return ret;
- }
- // parses absolute URIs
- // prepends default scheme (unless unknown) if URI has none
- TState::EParsed ParseAbsUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, TScheme::EKind defscheme = SchemeUnknown, ECharset enc = CODES_UTF8);
- TState::EParsed ParseAbsOrHttpUri(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
- return ParseAbsUri(url, flags, maxlen, SchemeHTTP, enc);
- }
- TState::EParsed Parse(const TStringBuf& url, const TUri& base, const TParseFlags& flags = FeaturesDefault, ui32 maxlen = 0, ECharset enc = CODES_UTF8);
- TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault) {
- return ParseUri(url, flags);
- }
- TState::EParsed Parse(const TStringBuf& url, const TParseFlags& flags, const TStringBuf& base_url, ui32 maxlen = 0, ECharset enc = CODES_UTF8);
- TState::EParsed ParseAbs(const TStringBuf& url, const TParseFlags& flags = FeaturesDefault, const TStringBuf& base_url = TStringBuf(), ui32 maxlen = 0, ECharset enc = CODES_UTF8) {
- const TState::EParsed result = Parse(url, flags, base_url, maxlen, enc);
- return ParsedOK != result || IsValidGlobal() ? result : ParsedBadFormat;
- }
- // correctAbs works with head "/.." portions:
- // 1 - reject URL
- // 0 - keep portions
- // -1 - ignore portions
- void Merge(const TUri& base, int correctAbs = -1);
- TLinkType Normalize(const TUri& base, const TStringBuf& link, const TStringBuf& codebase = TStringBuf(), ui64 careFlags = FeaturesDefault, ECharset enc = CODES_UTF8);
- private:
- int PrintFlags(int flags) const {
- if (0 == (FlagUrlFields & flags))
- flags |= FlagUrlFields;
- return flags;
- }
- protected:
- size_t PrintSize(ui32 flags) const;
- // Output method, prints to stream
- IOutputStream& PrintImpl(IOutputStream& out, int flags) const;
- char* PrintImpl(char* str, size_t size, int flags) const {
- TMemoryOutput out(str, size);
- PrintImpl(out, flags) << '\0';
- return str;
- }
- static bool IsAbsPath(const TStringBuf& path) {
- return 1 <= path.length() && path[0] == '/';
- }
- bool IsAbsPathImpl() const {
- return IsAbsPath(GetField(FieldPath));
- }
- public:
- // Output method, prints to stream
- IOutputStream& Print(IOutputStream& out, int flags = FlagUrlFields) const {
- return PrintImpl(out, PrintFlags(flags));
- }
- // Output method, print to str, allocate memory if str is NULL
- // Should be deprecated
- char* Print(char* str, size_t size, int flags = FlagUrlFields) const {
- return nullptr == str ? Serialize(flags) : Serialize(str, size, flags);
- }
- char* Serialize(char* str, size_t size, int flags = FlagUrlFields) const {
- Y_ASSERT(str);
- flags = PrintFlags(flags);
- const size_t printSize = PrintSize(flags) + 1;
- return printSize > size ? nullptr : PrintImpl(str, size, flags);
- }
- char* Serialize(int flags = FlagUrlFields) const {
- flags = PrintFlags(flags);
- const size_t size = PrintSize(flags) + 1;
- return PrintImpl(static_cast<char*>(malloc(size)), size, flags);
- }
- // Output method to str
- void Print(TString& str, int flags = FlagUrlFields) const {
- flags = PrintFlags(flags);
- str.reserve(str.length() + PrintSize(flags));
- TStringOutput out(str);
- PrintImpl(out, flags);
- }
- TString PrintS(int flags = FlagUrlFields) const {
- TString str;
- Print(str, flags);
- return str;
- }
- // Only non-default scheme and port are printed
- char* PrintHost(char* str, size_t size) const {
- return Print(str, size, (Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
- }
- TString PrintHostS() const {
- return PrintS((Scheme != SchemeHTTP ? FlagScheme : 0) | FlagHostPort);
- }
- // Info methods
- int Compare(const TUri& A, int flags = FlagUrlFields) const;
- int CompareField(EField fld, const TUri& url) const;
- const TStringBuf& GetField(EField fld) const {
- return FldIsValid(fld) && FldIsSet(fld) ? FldGet(fld) : Default<TStringBuf>();
- }
- ui16 GetPort() const {
- return 0 == Port ? DefaultPort : Port;
- }
- const TStringBuf& GetHost() const {
- if (GetFieldMask() & FlagHostAscii)
- return FldGet(FieldHostAscii);
- if (GetFieldMask() & FlagHost)
- return FldGet(FieldHost);
- return Default<TStringBuf>();
- }
- bool UseHostAscii() {
- return FldMov(FieldHostAscii, FieldHost);
- }
- TScheme::EKind GetScheme() const {
- return Scheme;
- }
- const TSchemeInfo& GetSchemeInfo() const {
- return TSchemeInfo::Get(Scheme);
- }
- bool IsNull(ui32 flags = FlagScheme | FlagHost | FlagPath) const {
- return !FldSetCmp(flags);
- }
- bool IsNull(EField fld) const {
- return !FldIsSet(fld);
- }
- bool IsValidAbs() const {
- if (IsNull(FlagScheme | FlagHost | FlagPath))
- return false;
- return IsAbsPathImpl();
- }
- bool IsValidGlobal() const {
- if (IsNull(FlagScheme | FlagHost))
- return false;
- if (IsNull(FlagPath))
- return true;
- return IsAbsPathImpl();
- }
- bool IsRootless() const {
- return FldSetCmp(FlagScheme | FlagHost | FlagPath, FlagScheme | FlagPath) && !IsAbsPathImpl();
- }
- // for RFC 2396 compatibility
- bool IsOpaque() const {
- return IsRootless();
- }
- // Inline helpers
- TUri& operator=(const TUri& u) {
- Copy(u);
- return *this;
- }
- bool operator!() const {
- return IsNull();
- }
- bool Equal(const TUri& A, int flags = FlagUrlFields) const {
- return (Compare(A, flags) == 0);
- }
- bool Less(const TUri& A, int flags = FlagUrlFields) const {
- return (Compare(A, flags) < 0);
- }
- bool operator==(const TUri& A) const {
- return Equal(A, FlagNoFrag);
- }
- bool operator!=(const TUri& A) const {
- return !Equal(A, FlagNoFrag);
- }
- bool operator<(const TUri& A) const {
- return Less(A, FlagNoFrag);
- }
- bool IsSameDocument(const TUri& other) const {
- // pre: both *this and 'other' should be normalized to valid abs
- Y_ASSERT(IsValidAbs());
- return Equal(other, FlagNoFrag);
- }
- bool IsLocal(const TUri& other) const {
- // pre: both *this and 'other' should be normalized to valid abs
- Y_ASSERT(IsValidAbs() && other.IsValidAbs());
- return Equal(other, FlagScheme | FlagHostPort);
- }
- TLinkType Locality(const TUri& other) const {
- if (IsSameDocument(other))
- return LinkIsFragment;
- else if (IsLocal(other))
- return LinkIsLocal;
- return LinkIsGlobal;
- }
- static IOutputStream& ReEncodeField(IOutputStream& out, const TStringBuf& val, EField fld, ui64 flags = FeaturesEncodeDecode) {
- return NEncode::TEncoder::ReEncode(out, val, NEncode::TEncodeMapper(flags, fld));
- }
- static IOutputStream& ReEncodeToField(IOutputStream& out, const TStringBuf& val, EField srcfld, ui64 srcflags, EField dstfld, ui64 dstflags) {
- return NEncode::TEncoder::ReEncodeTo(out, val, NEncode::TEncodeMapper(srcflags, srcfld), NEncode::TEncodeToMapper(dstflags, dstfld));
- }
- static IOutputStream& ReEncode(IOutputStream& out, const TStringBuf& val, ui64 flags = FeaturesEncodeDecode) {
- return ReEncodeField(out, val, FieldAllMAX, flags);
- }
- static int PathOperationFlag(const TParseFlags& flags) {
- return flags & FeaturePathDenyRootParent ? 1
- : flags & FeaturePathStripRootParent ? -1 : 0;
- }
- static bool PathOperation(char*& pathBeg, char*& pathEnd, int correctAbs);
- private:
- const TSchemeInfo& SetSchemeImpl(const TSchemeInfo& info) {
- Scheme = info.Kind;
- DefaultPort = info.Port;
- if (!info.Str.empty())
- FldSetNoDirty(FieldScheme, info.Str);
- return info;
- }
- const TSchemeInfo& SetSchemeImpl(TScheme::EKind scheme) {
- return SetSchemeImpl(TSchemeInfo::Get(scheme));
- }
- public:
- const TSchemeInfo& SetScheme(const TSchemeInfo& info) {
- SetSchemeImpl(info);
- if (!info.Str.empty())
- FldMarkClean(FieldScheme);
- return info;
- }
- const TSchemeInfo& SetScheme(TScheme::EKind scheme) {
- return SetScheme(TSchemeInfo::Get(scheme));
- }
- };
- class TUriUpdate {
- TUri& Uri_;
- public:
- TUriUpdate(TUri& uri)
- : Uri_(uri)
- {
- }
- ~TUriUpdate() {
- Uri_.Rewrite();
- }
- public:
- bool Set(TField::EField field, const TStringBuf& value) {
- return Uri_.FldMemSet(field, value);
- }
- template <size_t size>
- bool Set(TField::EField field, const char (&value)[size]) {
- return Uri_.FldMemSet(field, value);
- }
- void Clr(TField::EField field) {
- Uri_.FldClr(field);
- }
- };
- const char* LinkTypeToString(const TUri::TLinkType& t);
- }
- Y_DECLARE_OUT_SPEC(inline, NUri::TUri, out, url) {
- url.Print(out);
- }
- Y_DECLARE_OUT_SPEC(inline, NUri::TUri::TLinkType, out, t) {
- out << NUri::LinkTypeToString(t);
- }
|