url.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #pragma once
  2. #include <util/generic/fwd.h>
  3. #include <util/generic/strbuf.h>
  4. namespace NUrl {
  5. /**
  6. * Splits URL to host and path
  7. * Example:
  8. * auto [host, path] = SplitUrlToHostAndPath(url);
  9. *
  10. * @param[in] url any URL
  11. * @param[out] <host, path> parsed host and path
  12. */
  13. struct TSplitUrlToHostAndPathResult {
  14. TStringBuf host;
  15. TStringBuf path;
  16. };
  17. Y_PURE_FUNCTION
  18. TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url);
  19. } // namespace NUrl
  20. Y_PURE_FUNCTION
  21. size_t GetHttpPrefixSize(const char* url, bool ignorehttps = false) noexcept;
  22. Y_PURE_FUNCTION
  23. size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps = false) noexcept;
  24. Y_PURE_FUNCTION
  25. size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps = false) noexcept;
  26. Y_PURE_FUNCTION
  27. size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps = false) noexcept;
  28. /** BEWARE of TStringBuf! You can not use operator ~ or c_str() like in TString
  29. !!!!!!!!!!!! */
  30. Y_PURE_FUNCTION
  31. size_t GetSchemePrefixSize(const TStringBuf url) noexcept;
  32. Y_PURE_FUNCTION
  33. TStringBuf GetSchemePrefix(const TStringBuf url) noexcept;
  34. //! removes protocol prefixes 'http://' and 'https://' from given URL
  35. //! @note if URL has no prefix or some other prefix the function does nothing
  36. //! @param url URL from which the prefix should be removed
  37. //! @param ignorehttps if true, leaves https://
  38. //! @return a new URL without protocol prefix
  39. Y_PURE_FUNCTION
  40. TStringBuf CutHttpPrefix(const TStringBuf url, bool ignorehttps = false) noexcept;
  41. Y_PURE_FUNCTION
  42. TWtringBuf CutHttpPrefix(const TWtringBuf url, bool ignorehttps = false) noexcept;
  43. Y_PURE_FUNCTION
  44. TStringBuf CutSchemePrefix(const TStringBuf url) noexcept;
  45. //! adds specified scheme prefix if URL has no scheme
  46. //! @note if URL has scheme prefix already the function returns unchanged URL
  47. TString AddSchemePrefix(const TString& url, const TStringBuf scheme);
  48. //! Same as `AddSchemePrefix(url, "http")`.
  49. TString AddSchemePrefix(const TString& url);
  50. Y_PURE_FUNCTION
  51. TStringBuf GetHost(const TStringBuf url) noexcept;
  52. Y_PURE_FUNCTION
  53. TStringBuf GetHostAndPort(const TStringBuf url) noexcept;
  54. Y_PURE_FUNCTION
  55. TStringBuf GetSchemeHost(const TStringBuf url, bool trimHttp = true) noexcept;
  56. Y_PURE_FUNCTION
  57. TStringBuf GetSchemeHostAndPort(const TStringBuf url, bool trimHttp = true, bool trimDefaultPort = true) noexcept;
  58. /**
  59. * Splits URL to host and path
  60. *
  61. * @param[in] url any URL
  62. * @param[out] host parsed host
  63. * @param[out] path parsed path
  64. */
  65. void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path);
  66. void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path);
  67. /**
  68. * Separates URL into url prefix, query (aka cgi params list), and fragment (aka part after #)
  69. *
  70. * @param[in] url any URL
  71. * @param[out] sanitizedUrl parsed URL without query and fragment parts
  72. * @param[out] query parsed query
  73. * @param[out] fragment parsed fragment
  74. */
  75. void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment);
  76. /**
  77. * Extracts scheme, host and port from URL.
  78. *
  79. * Port will be parsed from URL with checks against ui16 overflow. If URL doesn't
  80. * contain port it will be determined by one of the known schemes (currently
  81. * https:// and http:// only).
  82. * Given parameters will not be modified if URL has no appropriate components.
  83. *
  84. * @param[in] url any URL
  85. * @param[out] scheme URL scheme
  86. * @param[out] host host name
  87. * @param[out] port parsed port number
  88. * @return false if present port number cannot be parsed into ui16
  89. * true otherwise.
  90. */
  91. bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
  92. /**
  93. * Extracts scheme, host and port from URL.
  94. *
  95. * This function perform the same actions as TryGetSchemeHostAndPort(), but in
  96. * case of impossibility to parse port number throws yexception.
  97. *
  98. * @param[in] url any URL
  99. * @param[out] scheme URL scheme
  100. * @param[out] host host name
  101. * @param[out] port parsed port number
  102. * @throws yexception if present port number cannot be parsed into ui16.
  103. */
  104. void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
  105. Y_PURE_FUNCTION
  106. TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment = true) noexcept;
  107. /**
  108. * Extracts host from url and cuts http(https) protocol prefix and port if any.
  109. * @param[in] url any URL
  110. * @return host without port and http(https) prefix.
  111. */
  112. Y_PURE_FUNCTION
  113. TStringBuf GetOnlyHost(const TStringBuf url) noexcept;
  114. Y_PURE_FUNCTION
  115. TStringBuf GetParentDomain(const TStringBuf host, size_t level) noexcept; // ("www.ya.ru", 2) -> "ya.ru"
  116. Y_PURE_FUNCTION
  117. TStringBuf GetZone(const TStringBuf host) noexcept;
  118. Y_PURE_FUNCTION
  119. TStringBuf CutWWWPrefix(const TStringBuf url) noexcept;
  120. Y_PURE_FUNCTION
  121. TStringBuf CutWWWNumberedPrefix(const TStringBuf url) noexcept;
  122. /**
  123. * Cuts 'm.' prefix from url if and only if the url starts with it
  124. * Example: 'm.some-domain.com' -> 'some-domain.com'.
  125. * 'http://m.some-domain.com' is not changed
  126. *
  127. * @param[in] url any URL
  128. * @return url without 'm.' or 'M.' prefix.
  129. */
  130. Y_PURE_FUNCTION
  131. TStringBuf CutMPrefix(const TStringBuf url) noexcept;
  132. Y_PURE_FUNCTION
  133. TStringBuf GetDomain(const TStringBuf host) noexcept; // should not be used
  134. size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size);
  135. size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport = 80);
  136. Y_PURE_FUNCTION
  137. TStringBuf RemoveFinalSlash(TStringBuf str) noexcept;
  138. TStringBuf CutUrlPrefixes(TStringBuf url) noexcept;
  139. bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept;