url.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #pragma once
  2. #include <util/generic/fwd.h>
  3. #include <util/generic/strbuf.h>
  4. namespace NUrl {
  5. /**
  6. * Splits URL to host and path
  7. * Example:
  8. * auto [host, path] = SplitUrlToHostAndPath(url);
  9. *
  10. * @param[in] url any URL
  11. * @param[out] <host, path> parsed host and path
  12. */
  13. struct TSplitUrlToHostAndPathResult {
  14. TStringBuf host;
  15. TStringBuf path;
  16. };
  17. Y_PURE_FUNCTION
  18. TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url);
  19. } // namespace NUrl
  20. Y_PURE_FUNCTION
  21. size_t GetHttpPrefixSize(const char* url, bool ignorehttps = false) noexcept;
  22. Y_PURE_FUNCTION
  23. size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps = false) noexcept;
  24. Y_PURE_FUNCTION
  25. size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps = false) noexcept;
  26. Y_PURE_FUNCTION
  27. size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps = false) noexcept;
  28. /** BEWARE of TStringBuf! You can not use operator ~ or c_str() like in TString
  29. !!!!!!!!!!!! */
  30. Y_PURE_FUNCTION
  31. size_t GetSchemePrefixSize(const TStringBuf url) noexcept;
  32. Y_PURE_FUNCTION
  33. TStringBuf GetSchemePrefix(const TStringBuf url) noexcept;
  34. //! removes protocol prefixes 'http://' and 'https://' from given URL
  35. //! @note if URL has no prefix or some other prefix the function does nothing
  36. //! @param url URL from which the prefix should be removed
  37. //! @param ignorehttps if true, leaves https://
  38. //! @return a new URL without protocol prefix
  39. Y_PURE_FUNCTION
  40. TStringBuf CutHttpPrefix(const TStringBuf url, bool ignorehttps = false) noexcept;
  41. Y_PURE_FUNCTION
  42. TWtringBuf CutHttpPrefix(const TWtringBuf url, bool ignorehttps = false) noexcept;
  43. Y_PURE_FUNCTION
  44. TStringBuf CutSchemePrefix(const TStringBuf url) noexcept;
  45. //! adds specified scheme prefix if URL has no scheme
  46. //! @note if URL has scheme prefix already the function returns unchanged URL
  47. TString AddSchemePrefix(const TString& url, const TStringBuf scheme);
  48. //! Same as `AddSchemePrefix(url, "http")`.
  49. TString AddSchemePrefix(const TString& url);
  50. Y_PURE_FUNCTION
  51. TStringBuf GetHost(const TStringBuf url) noexcept;
  52. Y_PURE_FUNCTION
  53. TStringBuf GetHostAndPort(const TStringBuf url) noexcept;
  54. Y_PURE_FUNCTION
  55. TStringBuf GetSchemeHostAndPort(const TStringBuf url, bool trimHttp = true, bool trimDefaultPort = true) noexcept;
  56. /**
  57. * Splits URL to host and path
  58. *
  59. * @param[in] url any URL
  60. * @param[out] host parsed host
  61. * @param[out] path parsed path
  62. */
  63. void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path);
  64. void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path);
  65. /**
  66. * Separates URL into url prefix, query (aka cgi params list), and fragment (aka part after #)
  67. *
  68. * @param[in] url any URL
  69. * @param[out] sanitizedUrl parsed URL without query and fragment parts
  70. * @param[out] query parsed query
  71. * @param[out] fragment parsed fragment
  72. */
  73. void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment);
  74. /**
  75. * Extracts scheme, host and port from URL.
  76. *
  77. * Port will be parsed from URL with checks against ui16 overflow. If URL doesn't
  78. * contain port it will be determined by one of the known schemes (currently
  79. * https:// and http:// only).
  80. * Given parameters will not be modified if URL has no appropriate components.
  81. *
  82. * @param[in] url any URL
  83. * @param[out] scheme URL scheme
  84. * @param[out] host host name
  85. * @param[out] port parsed port number
  86. * @return false if present port number cannot be parsed into ui16
  87. * true otherwise.
  88. */
  89. bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
  90. /**
  91. * Extracts scheme, host and port from URL.
  92. *
  93. * This function perform the same actions as TryGetSchemeHostAndPort(), but in
  94. * case of impossibility to parse port number throws yexception.
  95. *
  96. * @param[in] url any URL
  97. * @param[out] scheme URL scheme
  98. * @param[out] host host name
  99. * @param[out] port parsed port number
  100. * @throws yexception if present port number cannot be parsed into ui16.
  101. */
  102. void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
  103. Y_PURE_FUNCTION
  104. TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment = true) noexcept;
  105. /**
  106. * Extracts host from url and cuts http(https) protocol prefix and port if any.
  107. * @param[in] url any URL
  108. * @return host without port and http(https) prefix.
  109. */
  110. Y_PURE_FUNCTION
  111. TStringBuf GetOnlyHost(const TStringBuf url) noexcept;
  112. Y_PURE_FUNCTION
  113. TStringBuf GetParentDomain(const TStringBuf host, size_t level) noexcept; // ("www.ya.ru", 2) -> "ya.ru"
  114. Y_PURE_FUNCTION
  115. TStringBuf GetZone(const TStringBuf host) noexcept;
  116. Y_PURE_FUNCTION
  117. TStringBuf CutWWWPrefix(const TStringBuf url) noexcept;
  118. Y_PURE_FUNCTION
  119. TStringBuf CutWWWNumberedPrefix(const TStringBuf url) noexcept;
  120. /**
  121. * Cuts 'm.' prefix from url if and only if the url starts with it
  122. * Example: 'm.some-domain.com' -> 'some-domain.com'.
  123. * 'http://m.some-domain.com' is not changed
  124. *
  125. * @param[in] url any URL
  126. * @return url without 'm.' or 'M.' prefix.
  127. */
  128. Y_PURE_FUNCTION
  129. TStringBuf CutMPrefix(const TStringBuf url) noexcept;
  130. Y_PURE_FUNCTION
  131. TStringBuf GetDomain(const TStringBuf host) noexcept; // should not be used
  132. size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size);
  133. size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport = 80);
  134. Y_PURE_FUNCTION
  135. TStringBuf RemoveFinalSlash(TStringBuf str) noexcept;
  136. TStringBuf CutUrlPrefixes(TStringBuf url) noexcept;
  137. bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept;