url.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. #pragma once
  2. #include <util/generic/fwd.h>
  3. #include <util/generic/strbuf.h>
  4. namespace NUrl {
  5. /**
  6. * Splits URL to host and path
  7. * Example:
  8. * auto [host, path] = SplitUrlToHostAndPath(url);
  9. *
  10. * @param[in] url any URL
  11. * @param[out] <host, path> parsed host and path
  12. */
  13. struct TSplitUrlToHostAndPathResult {
  14. TStringBuf host;
  15. TStringBuf path;
  16. };
  17. Y_PURE_FUNCTION
  18. TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url);
  19. bool HasLowerHost(const TStringBuf &url);
  20. TStringBuf CutHttpWwwPrefixes(const TStringBuf &url);
  21. TString MakeLowerHost(const TStringBuf &url, size_t shift = 0);
  22. TString MakeNormalized(const TStringBuf &url);
  23. } // namespace NUrl
  24. Y_PURE_FUNCTION
  25. size_t GetHttpPrefixSize(const char* url, bool ignorehttps = false) noexcept;
  26. Y_PURE_FUNCTION
  27. size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps = false) noexcept;
  28. Y_PURE_FUNCTION
  29. size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps = false) noexcept;
  30. Y_PURE_FUNCTION
  31. size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps = false) noexcept;
  32. /** BEWARE of TStringBuf! You can not use operator ~ or c_str() like in TString
  33. !!!!!!!!!!!! */
  34. Y_PURE_FUNCTION
  35. size_t GetSchemePrefixSize(const TStringBuf url) noexcept;
  36. Y_PURE_FUNCTION
  37. TStringBuf GetSchemePrefix(const TStringBuf url) noexcept;
  38. //! removes protocol prefixes 'http://' and 'https://' from given URL
  39. //! @note if URL has no prefix or some other prefix the function does nothing
  40. //! @param url URL from which the prefix should be removed
  41. //! @param ignorehttps if true, leaves https://
  42. //! @return a new URL without protocol prefix
  43. Y_PURE_FUNCTION
  44. TStringBuf CutHttpPrefix(const TStringBuf url, bool ignorehttps = false) noexcept;
  45. Y_PURE_FUNCTION
  46. TWtringBuf CutHttpPrefix(const TWtringBuf url, bool ignorehttps = false) noexcept;
  47. Y_PURE_FUNCTION
  48. TStringBuf CutSchemePrefix(const TStringBuf url) noexcept;
  49. //! adds specified scheme prefix if URL has no scheme
  50. //! @note if URL has scheme prefix already the function returns unchanged URL
  51. TString AddSchemePrefix(const TString& url, const TStringBuf scheme);
  52. //! Same as `AddSchemePrefix(url, "http")`.
  53. TString AddSchemePrefix(const TString& url);
  54. Y_PURE_FUNCTION
  55. TStringBuf GetHost(const TStringBuf url) noexcept;
  56. Y_PURE_FUNCTION
  57. TStringBuf GetHostAndPort(const TStringBuf url) noexcept;
  58. Y_PURE_FUNCTION
  59. TStringBuf GetSchemeHost(const TStringBuf url, bool trimHttp = true) noexcept;
  60. Y_PURE_FUNCTION
  61. TStringBuf GetSchemeHostAndPort(const TStringBuf url, bool trimHttp = true, bool trimDefaultPort = true) noexcept;
  62. /**
  63. * Splits URL to host and path
  64. *
  65. * @param[in] url any URL
  66. * @param[out] host parsed host
  67. * @param[out] path parsed path
  68. */
  69. void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path);
  70. void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path);
  71. /**
  72. * Separates URL into url prefix, query (aka cgi params list), and fragment (aka part after #)
  73. *
  74. * @param[in] url any URL
  75. * @param[out] sanitizedUrl parsed URL without query and fragment parts
  76. * @param[out] query parsed query
  77. * @param[out] fragment parsed fragment
  78. */
  79. void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment);
  80. /**
  81. * Extracts scheme, host and port from URL.
  82. *
  83. * Port will be parsed from URL with checks against ui16 overflow. If URL doesn't
  84. * contain port it will be determined by one of the known schemes (currently
  85. * https:// and http:// only).
  86. * Given parameters will not be modified if URL has no appropriate components.
  87. *
  88. * @param[in] url any URL
  89. * @param[out] scheme URL scheme
  90. * @param[out] host host name
  91. * @param[out] port parsed port number
  92. * @return false if present port number cannot be parsed into ui16
  93. * true otherwise.
  94. */
  95. bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
  96. /**
  97. * Extracts scheme, host and port from URL.
  98. *
  99. * This function perform the same actions as TryGetSchemeHostAndPort(), but in
  100. * case of impossibility to parse port number throws yexception.
  101. *
  102. * @param[in] url any URL
  103. * @param[out] scheme URL scheme
  104. * @param[out] host host name
  105. * @param[out] port parsed port number
  106. * @throws yexception if present port number cannot be parsed into ui16.
  107. */
  108. void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
  109. Y_PURE_FUNCTION
  110. TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment = true) noexcept;
  111. /**
  112. * Extracts host from url and cuts http(https) protocol prefix and port if any.
  113. * @param[in] url any URL
  114. * @return host without port and http(https) prefix.
  115. */
  116. Y_PURE_FUNCTION
  117. TStringBuf GetOnlyHost(const TStringBuf url) noexcept;
  118. Y_PURE_FUNCTION
  119. TStringBuf GetParentDomain(const TStringBuf host, size_t level) noexcept; // ("www.ya.ru", 2) -> "ya.ru"
  120. Y_PURE_FUNCTION
  121. TStringBuf GetZone(const TStringBuf host) noexcept;
  122. Y_PURE_FUNCTION
  123. TStringBuf CutWWWPrefix(const TStringBuf url) noexcept;
  124. Y_PURE_FUNCTION
  125. TStringBuf CutWWWNumberedPrefix(const TStringBuf url) noexcept;
  126. /**
  127. * Cuts 'm.' prefix from url if and only if the url starts with it
  128. * Example: 'm.some-domain.com' -> 'some-domain.com'.
  129. * 'http://m.some-domain.com' is not changed
  130. *
  131. * @param[in] url any URL
  132. * @return url without 'm.' or 'M.' prefix.
  133. */
  134. Y_PURE_FUNCTION
  135. TStringBuf CutMPrefix(const TStringBuf url) noexcept;
  136. Y_PURE_FUNCTION
  137. TStringBuf GetDomain(const TStringBuf host) noexcept; // should not be used
  138. size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size);
  139. size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport = 80);
  140. Y_PURE_FUNCTION
  141. TStringBuf RemoveFinalSlash(TStringBuf str) noexcept;
  142. TStringBuf CutUrlPrefixes(TStringBuf url) noexcept;
  143. bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept;