uri-ru_ut.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #include "uri_ut.h"
  2. #include <library/cpp/charset/recyr.hh>
  3. #include <library/cpp/html/entity/htmlentity.h>
  4. #include <util/system/maxlen.h>
  5. namespace NUri {
  6. namespace {
  7. TString AsWin1251(const TString& s) {
  8. return Recode(CODES_UTF8, CODES_WIN, s);
  9. }
  10. TString AsKoi8(const TString& s) {
  11. return Recode(CODES_UTF8, CODES_KOI8, s);
  12. }
  13. }
  14. Y_UNIT_TEST_SUITE(URLTestRU) {
  15. Y_UNIT_TEST(test_httpURL2) {
  16. TUri url;
  17. UNIT_ASSERT_VALUES_EQUAL(url.Parse("g:h"), TState::ParsedBadScheme);
  18. UNIT_ASSERT_VALUES_EQUAL(url.Parse("http:g"), TState::ParsedBadFormat);
  19. UNIT_ASSERT_VALUES_EQUAL(url.Parse("/../g"), TState::ParsedBadPath);
  20. const char* const UpCaseUrl = "http://www.TEST.Ru:80/InDex.html";
  21. UNIT_ASSERT_VALUES_EQUAL(url.Parse(UpCaseUrl), TState::ParsedOK);
  22. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://www.TEST.Ru/InDex.html");
  23. UNIT_ASSERT_VALUES_EQUAL(url.Parse(UpCaseUrl, TFeature::FeaturesDefault | TFeature::FeatureToLower), TState::ParsedOK);
  24. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://www.test.ru/InDex.html");
  25. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagScheme), "http:");
  26. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagScheme | TField::FlagHost), "http://www.test.ru");
  27. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHost), "www.test.ru");
  28. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHost | TField::FlagPath), "www.test.ru/InDex.html");
  29. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagQuery), "");
  30. UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.TEST.Ru:90/InDex.html"), TState::ParsedOK);
  31. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostPort | TField::FlagPath), "www.TEST.Ru:90/InDex.html");
  32. UNIT_ASSERT_VALUES_EQUAL(url.Parse("www.ya.ru/index.html"), TState::ParsedOK);
  33. UNIT_ASSERT(!url.IsValidAbs());
  34. UNIT_ASSERT(url.IsNull(TField::FlagHost));
  35. UNIT_ASSERT(!url.IsNull(TField::FlagPath));
  36. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagPath), "www.ya.ru/index.html");
  37. UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10")), TState::ParsedOK);
  38. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"));
  39. UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"),
  40. TFeature::FeaturesDefault | TFeature::FeatureEncodeExtendedASCII),
  41. TState::ParsedOK);
  42. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(),
  43. AsWin1251("www.TEST.Ru/%D4%C5%D3%D4\\'\".html?%D4%C5%D3%D4\\'\"=%D4%C5%D3%D4+\\'\"%10"));
  44. UNIT_ASSERT_VALUES_EQUAL(url.Parse(AsWin1251("www.TEST.Ru/ФЕУФ\\'\".html?ФЕУФ\\'\"=ФЕУФ+\\'\"%10"),
  45. TFeature::FeaturesDefault | TFeature::FeatureEncodeForSQL),
  46. TState::ParsedOK);
  47. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), AsWin1251("www.TEST.Ru/ФЕУФ%5C%27%22.html?ФЕУФ%5C%27%22=ФЕУФ+%5C%27%22%10"));
  48. UNIT_ASSERT_VALUES_EQUAL(url.Parse("q/%33%26%13%2f%2b%30%20",
  49. TFeature::FeaturesDefault | TFeature::FeatureDecodeStandard),
  50. TState::ParsedOK);
  51. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "q/3%26%13/%2B0%20");
  52. UNIT_ASSERT_VALUES_EQUAL(url.Parse("http://www.prime-tass.ru/news/0/{656F5BAE-6677-4762-9BED-9E3B77E72055}.uif"),
  53. TState::ParsedOK);
  54. UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path"), TState::ParsedOK);
  55. UNIT_ASSERT_VALUES_EQUAL(url.Parse("//server/path", TFeature::FeaturesRobot), TState::ParsedOK);
  56. }
  57. const TString links[] = {
  58. "viewforum.php?f=1&amp;sid=b4568481b67b1d7683bea78634b2e240", "viewforum.php?f=1&sid=b4568481b67b1d7683bea78634b2e240",
  59. "./viewtopic.php?p=74&amp;sid=6#p74", "./viewtopic.php?p=74&sid=6#p74",
  60. "viewtopic.php?p=9313&amp;sid=8#9313", "viewtopic.php?p=9313&sid=8#9313",
  61. "profile.php?mode=viewprofile&u=-1#drafts&amp;sid=a6e5989cee27adb5996bfff044af04ca", "profile.php?mode=viewprofile&u=-1#drafts&sid=a6e5989cee27adb5996bfff044af04ca",
  62. "images\nil.jpg", "images%0Ail.jpg",
  63. "http://caedebaturque.termez.su\r\n/?article=218", "http://caedebaturque.termez.su%0D%0A/?article=218",
  64. AsKoi8("javascript:window.external.AddFavorite(\'http://www.humor.look.ru/\',\'Злобные Деды Морозы!!!\')"), "javascript:window.external.AddFavorite(\'http://www.humor.look.ru/\',\'%FA%CC%CF%C2%CE%D9%C5%20%E4%C5%C4%D9%20%ED%CF%D2%CF%DA%D9!!!\')",
  65. "search.php?search_author=%CB%FE%E4%EC%E8%EB%E0+%C3%F3%F1%E5%E2%E0&amp;showresults=posts&amp;sid=8", "search.php?search_author=%CB%FE%E4%EC%E8%EB%E0+%C3%F3%F1%E5%E2%E0&showresults=posts&sid=8",
  66. AsWin1251("/Search/author/?q=Штрибель Х.В."), "/Search/author/?q=%D8%F2%F0%E8%E1%E5%EB%FC%20%D5.%C2.",
  67. AsWin1251("javascript:ins(\'ГОРШОК\')"), "javascript:ins(\'%C3%CE%D0%D8%CE%CA\')",
  68. AsWin1251("?l=я"), "?l=%FF",
  69. AsWin1251("content.php?id=3392&theme=Цена"), "content.php?id=3392&theme=%D6%E5%ED%E0",
  70. "/a-mp3/stype-1/?search=А", "/a-mp3/stype-1/?search=%D0%90",
  71. "/a-mp3/stype-1/?search=Б", "/a-mp3/stype-1/?search=%D0%91",
  72. "/a-mp3/stype-1/?search=В", "/a-mp3/stype-1/?search=%D0%92",
  73. "/a-mp3/stype-1/?search=Г", "/a-mp3/stype-1/?search=%D0%93",
  74. "/a-mp3/stype-1/?search=Д", "/a-mp3/stype-1/?search=%D0%94",
  75. "/a-mp3/stype-1/?search=Е", "/a-mp3/stype-1/?search=%D0%95",
  76. "/a-mp3/stype-1/?search=Ж", "/a-mp3/stype-1/?search=%D0%96",
  77. "/a-mp3/stype-1/?search=З", "/a-mp3/stype-1/?search=%D0%97",
  78. // %98 is not defined in CP1251 so don't put it here explicitly
  79. "/a-mp3/stype-1/?search=\xD0\x98", "/a-mp3/stype-1/?search=%D0%98",
  80. "/a-mp3/stype-1/?search=Й", "/a-mp3/stype-1/?search=%D0%99",
  81. "/a-mp3/stype-1/?search=К", "/a-mp3/stype-1/?search=%D0%9A",
  82. "/a-mp3/stype-1/?search=Л", "/a-mp3/stype-1/?search=%D0%9B",
  83. "/a-mp3/stype-1/?search=М", "/a-mp3/stype-1/?search=%D0%9C",
  84. "/a-mp3/stype-1/?search=Н", "/a-mp3/stype-1/?search=%D0%9D",
  85. "/a-mp3/stype-1/?search=О", "/a-mp3/stype-1/?search=%D0%9E",
  86. "/a-mp3/stype-1/?search=П", "/a-mp3/stype-1/?search=%D0%9F",
  87. "/a-mp3/stype-1/?search=\xD0", "/a-mp3/stype-1/?search=%D0",
  88. "/a-mp3/stype-1/?search=С", "/a-mp3/stype-1/?search=%D0%A1",
  89. "/a-mp3/stype-1/?search=Т", "/a-mp3/stype-1/?search=%D0%A2",
  90. "/a-mp3/stype-1/?search=У", "/a-mp3/stype-1/?search=%D0%A3",
  91. "/a-mp3/stype-1/?search=Ф", "/a-mp3/stype-1/?search=%D0%A4",
  92. "/a-mp3/stype-1/?search=Х", "/a-mp3/stype-1/?search=%D0%A5",
  93. "/a-mp3/stype-1/?search=Ц", "/a-mp3/stype-1/?search=%D0%A6",
  94. "/a-mp3/stype-1/?search=Ч", "/a-mp3/stype-1/?search=%D0%A7",
  95. "/a-mp3/stype-1/?search=Ш", "/a-mp3/stype-1/?search=%D0%A8",
  96. "/a-mp3/stype-1/?search=Щ", "/a-mp3/stype-1/?search=%D0%A9",
  97. "/a-mp3/stype-1/?search=Ы", "/a-mp3/stype-1/?search=%D0%AB",
  98. "/a-mp3/stype-1/?search=Э", "/a-mp3/stype-1/?search=%D0%AD",
  99. "/a-mp3/stype-1/?search=Ю", "/a-mp3/stype-1/?search=%D0%AE",
  100. "/a-mp3/stype-1/?search=Я", "/a-mp3/stype-1/?search=%D0%AF",
  101. "javascript:emoticon(\":&#39;(\")", "javascript:emoticon(\":\'(\")",
  102. "javascript:emoticon(\'&gt;:o\')", "javascript:emoticon(\'>:o\')",
  103. "javascript:emoticon(\']:-&gt;\')", "javascript:emoticon(\']:->\')",
  104. "javascript:emoticon(\':-&#33;\')", "javascript:emoticon(\':-!\')",
  105. "javascript:emoticon(\'@}-&gt;--\')", "javascript:emoticon(\'@}->--\')",
  106. "http&#58;//www.is-ufa.ru/price2/price_IS.rar", "http://www.is-ufa.ru/price2/price_IS.rar",
  107. "&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#105;&#110;&#102;&#111;&#64;&#101;&#116;&#101;&#109;&#46;&#100;&#101;", "mailto:info@etem.de",
  108. "&quot;http://www.fubix.ru&quot;", "\"http://www.fubix.ru\"",
  109. AsWin1251("mailto:&#107;&#97;&#109;&#112;&#97;&#64;&#117;&#107;&#114;&#46;&#110;&#101;&#116;?subject=Арабский язык"), "mailto:kampa@ukr.net?subject=%C0%F0%E0%E1%F1%EA%E8%E9%20%FF%E7%FB%EA",
  110. {}};
  111. Y_UNIT_TEST(testHtLinkDecode) {
  112. char decodedlink[URL_MAXLEN + 10];
  113. for (int i = 0; links[i]; i += 2) {
  114. UNIT_ASSERT(HtLinkDecode(links[i].c_str(), decodedlink, sizeof(decodedlink)));
  115. UNIT_ASSERT_VALUES_EQUAL(decodedlink, links[i + 1]);
  116. }
  117. }
  118. Y_UNIT_TEST(testRuIDNA) {
  119. {
  120. #define DEC "\xD7\xE5\xF0\xE5\xEf\xEE\xE2\xE5\xF6.\xF0\xF4" /* "Череповец.рф" in Windows-1251 */
  121. #define ENC "%D7%E5%F0%E5%EF%EE%E2%E5%F6.%F0%F4"
  122. // punycode corresponds to lowercase
  123. #define PNC "xn--b1afab7bff7cb.xn--p1ai"
  124. TTest test = {
  125. "http://" ENC "/" ENC "?" ENC "#" ENC, TParseFlags(TFeature::FeaturesAll | TFeature::FeatureAllowHostIDN, TFeature::FeatureDecodeExtendedASCII), TState::ParsedOK, "http", "", "", DEC, 80, "/" ENC, ENC, ENC, ""};
  126. TUri url;
  127. URL_TEST_ENC(url, test, CODES_WIN);
  128. UNIT_ASSERT_VALUES_EQUAL(url.GetField(TField::FieldHostAscii), PNC);
  129. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(), "http://" DEC "/" ENC "?" ENC "#" ENC);
  130. UNIT_ASSERT_VALUES_EQUAL(url.PrintS(TField::FlagHostAscii), "http://" PNC "/" ENC "?" ENC "#" ENC);
  131. #undef PNC
  132. #undef DEC
  133. #undef ENC
  134. }
  135. }
  136. // Regression test for SEARCH-11283
  137. Y_UNIT_TEST(RegressionTest11283) {
  138. TStringBuf url = "http://xn--n1aaa.пидорасы.com/";
  139. TUri uri;
  140. TState::EParsed er = uri.Parse(url, NUri::TParseFlags(NUri::TFeature::FeaturesRobot | NUri::TFeature::FeatureNoRelPath));
  141. UNIT_ASSERT_VALUES_EQUAL(er, TState::ParsedOK);
  142. TStringBuf host = uri.GetHost();
  143. // Should be properly null-terminated
  144. UNIT_ASSERT_VALUES_EQUAL(host.size(), strlen(host.data()));
  145. }
  146. }
  147. }