URI.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/core/http/URI.h>
  6. #include <aws/core/utils/memory/stl/AWSSet.h>
  7. #include <aws/core/utils/logging/LogMacros.h>
  8. #include <cstdlib>
  9. #include <cctype>
  10. #include <cassert>
  11. #include <algorithm>
  12. #include <iomanip>
  13. using namespace Aws::Http;
  14. using namespace Aws::Utils;
  15. namespace Aws
  16. {
  17. namespace Http
  18. {
  19. const char* SEPARATOR = "://";
  20. bool s_compliantRfc3986Encoding = false;
  21. void SetCompliantRfc3986Encoding(bool compliant) { s_compliantRfc3986Encoding = compliant; }
  22. Aws::String urlEncodeSegment(const Aws::String& segment)
  23. {
  24. // consolidates legacy escaping logic into one local method
  25. if (s_compliantRfc3986Encoding)
  26. {
  27. return StringUtils::URLEncode(segment.c_str());
  28. }
  29. else
  30. {
  31. Aws::StringStream ss;
  32. ss << std::hex << std::uppercase;
  33. for(unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
  34. {
  35. // RFC 3986 §2.3 unreserved characters
  36. if (StringUtils::IsAlnum(c))
  37. {
  38. ss << c;
  39. continue;
  40. }
  41. switch(c)
  42. {
  43. // §2.3 unreserved characters
  44. // The path section of the URL allows unreserved characters to appear unescaped
  45. case '-': case '_': case '.': case '~':
  46. // RFC 3986 §2.2 Reserved characters
  47. // NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
  48. // discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
  49. case '$': case '&': case ',':
  50. case ':': case '=': case '@':
  51. ss << c;
  52. break;
  53. default:
  54. ss << '%' << std::setfill('0') << std::setw(2) << (int)c << std::setw(0);
  55. }
  56. }
  57. return ss.str();
  58. }
  59. }
  60. } // namespace Http
  61. } // namespace Aws
  62. URI::URI() : m_scheme(Scheme::HTTP), m_port(HTTP_DEFAULT_PORT), m_pathHasTrailingSlash(false)
  63. {
  64. }
  65. URI::URI(const Aws::String& uri) : m_scheme(Scheme::HTTP), m_port(HTTP_DEFAULT_PORT)
  66. {
  67. ParseURIParts(uri);
  68. }
  69. URI::URI(const char* uri) : m_scheme(Scheme::HTTP), m_port(HTTP_DEFAULT_PORT)
  70. {
  71. ParseURIParts(uri);
  72. }
  73. URI& URI::operator =(const Aws::String& uri)
  74. {
  75. this->ParseURIParts(uri);
  76. return *this;
  77. }
  78. URI& URI::operator =(const char* uri)
  79. {
  80. this->ParseURIParts(uri);
  81. return *this;
  82. }
  83. bool URI::operator ==(const URI& other) const
  84. {
  85. return CompareURIParts(other);
  86. }
  87. bool URI::operator ==(const Aws::String& other) const
  88. {
  89. return CompareURIParts(other);
  90. }
  91. bool URI::operator ==(const char* other) const
  92. {
  93. return CompareURIParts(other);
  94. }
  95. bool URI::operator !=(const URI& other) const
  96. {
  97. return !(*this == other);
  98. }
  99. bool URI::operator !=(const Aws::String& other) const
  100. {
  101. return !(*this == other);
  102. }
  103. bool URI::operator !=(const char* other) const
  104. {
  105. return !(*this == other);
  106. }
  107. void URI::SetScheme(Scheme value)
  108. {
  109. assert(value == Scheme::HTTP || value == Scheme::HTTPS);
  110. if (value == Scheme::HTTP)
  111. {
  112. m_port = m_port == HTTPS_DEFAULT_PORT || m_port == 0 ? HTTP_DEFAULT_PORT : m_port;
  113. m_scheme = value;
  114. }
  115. else if (value == Scheme::HTTPS)
  116. {
  117. m_port = m_port == HTTP_DEFAULT_PORT || m_port == 0 ? HTTPS_DEFAULT_PORT : m_port;
  118. m_scheme = value;
  119. }
  120. }
  121. Aws::String URI::URLEncodePathRFC3986(const Aws::String& path)
  122. {
  123. if (path.empty())
  124. {
  125. return path;
  126. }
  127. const Aws::Vector<Aws::String> pathParts = StringUtils::Split(path, '/');
  128. Aws::StringStream ss;
  129. ss << std::hex << std::uppercase;
  130. // escape characters appearing in a URL path according to RFC 3986
  131. for (const auto& segment : pathParts)
  132. {
  133. ss << '/' << urlEncodeSegment(segment);
  134. }
  135. // if the last character was also a slash, then add that back here.
  136. if (path.back() == '/')
  137. {
  138. ss << '/';
  139. }
  140. return ss.str();
  141. }
  142. Aws::String URI::URLEncodePath(const Aws::String& path)
  143. {
  144. Aws::Vector<Aws::String> pathParts = StringUtils::Split(path, '/');
  145. Aws::StringStream ss;
  146. for (Aws::Vector<Aws::String>::iterator iter = pathParts.begin(); iter != pathParts.end(); ++iter)
  147. {
  148. ss << '/' << StringUtils::URLEncode(iter->c_str());
  149. }
  150. //if the last character was also a slash, then add that back here.
  151. if (path.length() > 0 && path[path.length() - 1] == '/')
  152. {
  153. ss << '/';
  154. }
  155. if (path.length() > 0 && path[0] != '/')
  156. {
  157. return ss.str().substr(1);
  158. }
  159. else
  160. {
  161. return ss.str();
  162. }
  163. }
  164. Aws::String URI::GetPath() const
  165. {
  166. Aws::String path = "";
  167. for (auto const& segment : m_pathSegments)
  168. {
  169. path.push_back('/');
  170. path.append(segment);
  171. }
  172. if (m_pathSegments.empty() || m_pathHasTrailingSlash)
  173. {
  174. path.push_back('/');
  175. }
  176. return path;
  177. }
  178. Aws::String URI::GetURLEncodedPath() const
  179. {
  180. Aws::StringStream ss;
  181. for (auto const& segment : m_pathSegments)
  182. {
  183. ss << '/' << StringUtils::URLEncode(segment.c_str());
  184. }
  185. if (m_pathSegments.empty() || m_pathHasTrailingSlash)
  186. {
  187. ss << '/';
  188. }
  189. return ss.str();
  190. }
  191. Aws::String URI::GetURLEncodedPathRFC3986() const
  192. {
  193. Aws::StringStream ss;
  194. ss << std::hex << std::uppercase;
  195. // escape characters appearing in a URL path according to RFC 3986
  196. // (mostly; there is some non-standards legacy support that can be disabled)
  197. for (const auto& segment : m_pathSegments)
  198. {
  199. ss << '/' << urlEncodeSegment(segment);
  200. }
  201. if (m_pathSegments.empty() || m_pathHasTrailingSlash)
  202. {
  203. ss << '/';
  204. }
  205. return ss.str();
  206. }
  207. void URI::SetPath(const Aws::String& value)
  208. {
  209. m_pathSegments.clear();
  210. AddPathSegments(value);
  211. }
  212. //ugh, this isn't even part of the canonicalization spec. It is part of how our services have implemented their signers though....
  213. //it doesn't really hurt anything to reorder it though, so go ahead and sort the values for parameters with the same key
  214. void InsertValueOrderedParameter(QueryStringParameterCollection& queryParams, const Aws::String& key, const Aws::String& value)
  215. {
  216. auto entriesAtKey = queryParams.equal_range(key);
  217. for (auto& entry = entriesAtKey.first; entry != entriesAtKey.second; ++entry)
  218. {
  219. if (entry->second > value)
  220. {
  221. queryParams.emplace_hint(entry, key, value);
  222. return;
  223. }
  224. }
  225. queryParams.emplace(key, value);
  226. }
  227. QueryStringParameterCollection URI::GetQueryStringParameters(bool decode) const
  228. {
  229. Aws::String queryString = GetQueryString();
  230. QueryStringParameterCollection parameterCollection;
  231. //if we actually have a query string
  232. if (queryString.size() > 0)
  233. {
  234. size_t currentPos = 1, locationOfNextDelimiter = 1;
  235. //while we have params left to parse
  236. while (currentPos < queryString.size())
  237. {
  238. //find next key/value pair
  239. locationOfNextDelimiter = queryString.find('&', currentPos);
  240. Aws::String keyValuePair;
  241. //if this isn't the last parameter
  242. if (locationOfNextDelimiter != Aws::String::npos)
  243. {
  244. keyValuePair = queryString.substr(currentPos, locationOfNextDelimiter - currentPos);
  245. }
  246. //if it is the last parameter
  247. else
  248. {
  249. keyValuePair = queryString.substr(currentPos);
  250. }
  251. //split on =
  252. size_t locationOfEquals = keyValuePair.find('=');
  253. Aws::String key = keyValuePair.substr(0, locationOfEquals);
  254. Aws::String value = keyValuePair.substr(locationOfEquals + 1);
  255. if(decode)
  256. {
  257. InsertValueOrderedParameter(parameterCollection, StringUtils::URLDecode(key.c_str()), StringUtils::URLDecode(value.c_str()));
  258. }
  259. else
  260. {
  261. InsertValueOrderedParameter(parameterCollection, key, value);
  262. }
  263. currentPos += keyValuePair.size() + 1;
  264. }
  265. }
  266. return parameterCollection;
  267. }
  268. void URI::CanonicalizeQueryString()
  269. {
  270. QueryStringParameterCollection sortedParameters = GetQueryStringParameters(false);
  271. Aws::StringStream queryStringStream;
  272. bool first = true;
  273. if(sortedParameters.size() > 0)
  274. {
  275. queryStringStream << "?";
  276. }
  277. if(m_queryString.find('=') != std::string::npos)
  278. {
  279. for (QueryStringParameterCollection::iterator iter = sortedParameters.begin();
  280. iter != sortedParameters.end(); ++iter)
  281. {
  282. if (!first)
  283. {
  284. queryStringStream << "&";
  285. }
  286. first = false;
  287. queryStringStream << iter->first.c_str() << "=" << iter->second.c_str();
  288. }
  289. m_queryString = queryStringStream.str();
  290. }
  291. }
  292. void URI::AddQueryStringParameter(const char* key, const Aws::String& value)
  293. {
  294. if (m_queryString.size() <= 0)
  295. {
  296. m_queryString.append("?");
  297. }
  298. else
  299. {
  300. m_queryString.append("&");
  301. }
  302. m_queryString.append(StringUtils::URLEncode(key) + "=" + StringUtils::URLEncode(value.c_str()));
  303. }
  304. void URI::AddQueryStringParameter(const Aws::Map<Aws::String, Aws::String>& queryStringPairs)
  305. {
  306. for(const auto& entry: queryStringPairs)
  307. {
  308. AddQueryStringParameter(entry.first.c_str(), entry.second);
  309. }
  310. }
  311. void URI::SetQueryString(const Aws::String& str)
  312. {
  313. m_queryString = "";
  314. if (str.empty()) return;
  315. if (str.front() != '?')
  316. {
  317. m_queryString.append("?").append(str);
  318. }
  319. else
  320. {
  321. m_queryString = str;
  322. }
  323. }
  324. Aws::String URI::GetURIString(bool includeQueryString) const
  325. {
  326. assert(m_authority.size() > 0);
  327. Aws::StringStream ss;
  328. ss << SchemeMapper::ToString(m_scheme) << SEPARATOR << m_authority;
  329. if (m_scheme == Scheme::HTTP && m_port != HTTP_DEFAULT_PORT)
  330. {
  331. ss << ":" << m_port;
  332. }
  333. else if (m_scheme == Scheme::HTTPS && m_port != HTTPS_DEFAULT_PORT)
  334. {
  335. ss << ":" << m_port;
  336. }
  337. if (!m_pathSegments.empty())
  338. {
  339. ss << GetURLEncodedPathRFC3986();
  340. }
  341. if(includeQueryString)
  342. {
  343. ss << m_queryString;
  344. }
  345. return ss.str();
  346. }
  347. void URI::ParseURIParts(const Aws::String& uri)
  348. {
  349. ExtractAndSetScheme(uri);
  350. ExtractAndSetAuthority(uri);
  351. ExtractAndSetPort(uri);
  352. ExtractAndSetPath(uri);
  353. ExtractAndSetQueryString(uri);
  354. }
  355. void URI::ExtractAndSetScheme(const Aws::String& uri)
  356. {
  357. size_t posOfSeparator = uri.find(SEPARATOR);
  358. if (posOfSeparator != Aws::String::npos)
  359. {
  360. Aws::String schemePortion = uri.substr(0, posOfSeparator);
  361. SetScheme(SchemeMapper::FromString(schemePortion.c_str()));
  362. }
  363. else
  364. {
  365. SetScheme(Scheme::HTTP);
  366. }
  367. }
  368. void URI::ExtractAndSetAuthority(const Aws::String& uri)
  369. {
  370. size_t authorityStart = uri.find(SEPARATOR);
  371. if (authorityStart == Aws::String::npos)
  372. {
  373. authorityStart = 0;
  374. }
  375. else
  376. {
  377. authorityStart += 3;
  378. }
  379. size_t posEndOfAuthority=0;
  380. // are we extracting an ipv6 address?
  381. if (uri.length() > authorityStart && uri.at(authorityStart) == '[')
  382. {
  383. posEndOfAuthority = uri.find(']', authorityStart);
  384. if (posEndOfAuthority == Aws::String::npos) {
  385. AWS_LOGSTREAM_ERROR("Uri", "Malformed uri: " << uri.c_str());
  386. }
  387. else
  388. {
  389. ++posEndOfAuthority;
  390. }
  391. }
  392. else
  393. {
  394. size_t posOfEndOfAuthorityPort = uri.find(':', authorityStart);
  395. size_t posOfEndOfAuthoritySlash = uri.find('/', authorityStart);
  396. size_t posOfEndOfAuthorityQuery = uri.find('?', authorityStart);
  397. posEndOfAuthority = (std::min)({posOfEndOfAuthorityPort, posOfEndOfAuthoritySlash, posOfEndOfAuthorityQuery});
  398. }
  399. if (posEndOfAuthority == Aws::String::npos)
  400. {
  401. posEndOfAuthority = uri.length();
  402. }
  403. SetAuthority(uri.substr(authorityStart, posEndOfAuthority - authorityStart));
  404. }
  405. void URI::ExtractAndSetPort(const Aws::String& uri)
  406. {
  407. size_t authorityStart = uri.find(SEPARATOR);
  408. if(authorityStart == Aws::String::npos)
  409. {
  410. authorityStart = 0;
  411. }
  412. else
  413. {
  414. authorityStart += 3;
  415. }
  416. size_t portSearchStart = authorityStart;
  417. // are we extracting an ipv6 address?
  418. if (uri.length() > portSearchStart && uri.at(portSearchStart) == '[')
  419. {
  420. size_t posEndOfAuthority = uri.find(']', portSearchStart);
  421. if (posEndOfAuthority == Aws::String::npos) {
  422. AWS_LOGSTREAM_ERROR("Uri", "Malformed uri: " << uri.c_str());
  423. }
  424. else
  425. {
  426. portSearchStart = posEndOfAuthority;
  427. }
  428. }
  429. size_t positionOfPortDelimiter = uri.find(':', portSearchStart);
  430. bool hasPort = positionOfPortDelimiter != Aws::String::npos;
  431. if ((uri.find('/', portSearchStart) < positionOfPortDelimiter) || (uri.find('?', portSearchStart) < positionOfPortDelimiter))
  432. {
  433. hasPort = false;
  434. }
  435. if (hasPort)
  436. {
  437. Aws::String strPort;
  438. size_t i = positionOfPortDelimiter + 1;
  439. char currentDigit = uri[i];
  440. while (std::isdigit(currentDigit))
  441. {
  442. strPort += currentDigit;
  443. currentDigit = uri[++i];
  444. }
  445. SetPort(static_cast<uint16_t>(atoi(strPort.c_str())));
  446. }
  447. }
  448. void URI::ExtractAndSetPath(const Aws::String& uri)
  449. {
  450. size_t authorityStart = uri.find(SEPARATOR);
  451. if (authorityStart == Aws::String::npos)
  452. {
  453. authorityStart = 0;
  454. }
  455. else
  456. {
  457. authorityStart += 3;
  458. }
  459. size_t pathEnd = uri.find('?');
  460. if (pathEnd == Aws::String::npos)
  461. {
  462. pathEnd = uri.length();
  463. }
  464. Aws::String authorityAndPath = uri.substr(authorityStart, pathEnd - authorityStart);
  465. size_t pathStart = authorityAndPath.find('/');
  466. if (pathStart != Aws::String::npos)
  467. {
  468. SetPath(authorityAndPath.substr(pathStart, pathEnd - pathStart));
  469. }
  470. else
  471. {
  472. SetPath("/");
  473. }
  474. }
  475. void URI::ExtractAndSetQueryString(const Aws::String& uri)
  476. {
  477. size_t queryStart = uri.find('?');
  478. if (queryStart != Aws::String::npos)
  479. {
  480. m_queryString = uri.substr(queryStart);
  481. }
  482. }
  483. Aws::String URI::GetFormParameters() const
  484. {
  485. if(m_queryString.length() == 0)
  486. {
  487. return "";
  488. }
  489. else
  490. {
  491. return m_queryString.substr(1);
  492. }
  493. }
  494. bool URI::CompareURIParts(const URI& other) const
  495. {
  496. return m_scheme == other.m_scheme && m_authority == other.m_authority && GetPath() == other.GetPath() && m_queryString == other.m_queryString;
  497. }