#pragma once #include #include namespace NUrl { /** * Splits URL to host and path * Example: * auto [host, path] = SplitUrlToHostAndPath(url); * * @param[in] url any URL * @param[out] parsed host and path */ struct TSplitUrlToHostAndPathResult { TStringBuf host; TStringBuf path; }; Y_PURE_FUNCTION TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url Y_LIFETIME_BOUND); bool HasLowerHost(const TStringBuf url); TStringBuf CutHttpWwwPrefixes(const TStringBuf url Y_LIFETIME_BOUND); TString MakeLowerHost(const TStringBuf url, size_t shift = 0); TString MakeNormalized(const TStringBuf url); } // namespace NUrl Y_PURE_FUNCTION size_t GetHttpPrefixSize(const char* url, bool ignorehttps = false) noexcept; Y_PURE_FUNCTION size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps = false) noexcept; Y_PURE_FUNCTION size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps = false) noexcept; Y_PURE_FUNCTION size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps = false) noexcept; /** BEWARE of TStringBuf! You can not use operator ~ or c_str() like in TString !!!!!!!!!!!! */ Y_PURE_FUNCTION size_t GetSchemePrefixSize(const TStringBuf url) noexcept; Y_PURE_FUNCTION TStringBuf GetSchemePrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept; //! removes protocol prefixes 'http://' and 'https://' from given URL //! @note if URL has no prefix or some other prefix the function does nothing //! @param url URL from which the prefix should be removed //! @param ignorehttps if true, leaves https:// //! @return a new URL without protocol prefix Y_PURE_FUNCTION TStringBuf CutHttpPrefix(const TStringBuf url Y_LIFETIME_BOUND, bool ignorehttps = false) noexcept; Y_PURE_FUNCTION TWtringBuf CutHttpPrefix(const TWtringBuf url Y_LIFETIME_BOUND, bool ignorehttps = false) noexcept; Y_PURE_FUNCTION TStringBuf CutSchemePrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept; //! adds specified scheme prefix if URL has no scheme //! @note if URL has scheme prefix already the function returns unchanged URL TString AddSchemePrefix(const TString& url, const TStringBuf scheme); //! Same as `AddSchemePrefix(url, "http")`. TString AddSchemePrefix(const TString& url); Y_PURE_FUNCTION TStringBuf GetHost(const TStringBuf url Y_LIFETIME_BOUND) noexcept; Y_PURE_FUNCTION TStringBuf GetHostAndPort(const TStringBuf url Y_LIFETIME_BOUND) noexcept; Y_PURE_FUNCTION TStringBuf GetSchemeHost(const TStringBuf url Y_LIFETIME_BOUND, bool trimHttp = true) noexcept; Y_PURE_FUNCTION TStringBuf GetSchemeHostAndPort(const TStringBuf url Y_LIFETIME_BOUND, bool trimHttp = true, bool trimDefaultPort = true) noexcept; /** * Splits URL to host and path * * @param[in] url any URL * @param[out] host parsed host * @param[out] path parsed path */ void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path); void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path); /** * Separates URL into url prefix, query (aka cgi params list), and fragment (aka part after #) * * @param[in] url any URL * @param[out] sanitizedUrl parsed URL without query and fragment parts * @param[out] query parsed query * @param[out] fragment parsed fragment */ void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment); /** * Extracts scheme, host and port from URL. * * Port will be parsed from URL with checks against ui16 overflow. If URL doesn't * contain port it will be determined by one of the known schemes (currently * https:// and http:// only). * Given parameters will not be modified if URL has no appropriate components. * * @param[in] url any URL * @param[out] scheme URL scheme * @param[out] host host name * @param[out] port parsed port number * @return false if present port number cannot be parsed into ui16 * true otherwise. */ bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port); /** * Extracts scheme, host and port from URL. * * This function perform the same actions as TryGetSchemeHostAndPort(), but in * case of impossibility to parse port number throws yexception. * * @param[in] url any URL * @param[out] scheme URL scheme * @param[out] host host name * @param[out] port parsed port number * @throws yexception if present port number cannot be parsed into ui16. */ void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port); Y_PURE_FUNCTION TStringBuf GetPathAndQuery(const TStringBuf url Y_LIFETIME_BOUND, bool trimFragment = true) noexcept; /** * Extracts host from url and cuts http(https) protocol prefix and port if any. * @param[in] url any URL * @return host without port and http(https) prefix. */ Y_PURE_FUNCTION TStringBuf GetOnlyHost(const TStringBuf url Y_LIFETIME_BOUND) noexcept; Y_PURE_FUNCTION TStringBuf GetParentDomain(const TStringBuf host Y_LIFETIME_BOUND, size_t level) noexcept; // ("www.ya.ru", 2) -> "ya.ru" Y_PURE_FUNCTION TStringBuf GetZone(const TStringBuf host Y_LIFETIME_BOUND) noexcept; Y_PURE_FUNCTION TStringBuf CutWWWPrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept; Y_PURE_FUNCTION TStringBuf CutWWWNumberedPrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept; /** * Cuts 'm.' prefix from url if and only if the url starts with it * Example: 'm.some-domain.com' -> 'some-domain.com'. * 'http://m.some-domain.com' is not changed * * @param[in] url any URL * @return url without 'm.' or 'M.' prefix. */ Y_PURE_FUNCTION TStringBuf CutMPrefix(const TStringBuf url Y_LIFETIME_BOUND) noexcept; Y_PURE_FUNCTION TStringBuf GetDomain(const TStringBuf host Y_LIFETIME_BOUND) noexcept; // should not be used size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size); size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport = 80); Y_PURE_FUNCTION TStringBuf RemoveFinalSlash(TStringBuf str Y_LIFETIME_BOUND) noexcept; TStringBuf CutUrlPrefixes(TStringBuf url Y_LIFETIME_BOUND) noexcept; bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf token) noexcept;