httpfsm_ut.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. #include "httpfsm.h"
  2. #include "library-htfetch_ut_hreflang_in.h"
  3. #include "library-htfetch_ut_hreflang_out.h"
  4. #include <util/generic/ptr.h>
  5. #include <library/cpp/charset/doccodes.h>
  6. #include <library/cpp/testing/unittest/registar.h>
  7. class THttpHeaderParserTestSuite: public TTestBase {
  8. UNIT_TEST_SUITE(THttpHeaderParserTestSuite);
  9. UNIT_TEST(TestRequestHeader);
  10. UNIT_TEST(TestSplitRequestHeader);
  11. UNIT_TEST(TestTrailingData);
  12. UNIT_TEST(TestProxyRequestHeader);
  13. UNIT_TEST(TestIncorrectRequestHeader);
  14. UNIT_TEST(TestLastModified);
  15. UNIT_TEST(TestLastModifiedCorrupted);
  16. UNIT_TEST(TestResponseHeaderOnRequest);
  17. UNIT_TEST(TestRequestHeaderOnResponse);
  18. UNIT_TEST(TestXRobotsTagUnknownTags);
  19. UNIT_TEST(TestXRobotsTagMyBot);
  20. UNIT_TEST(TestXRobotsTagOtherBot);
  21. UNIT_TEST(TestXRobotsTagUnavailableAfterAware);
  22. UNIT_TEST(TestXRobotsTagUnavailableAfterWorks);
  23. UNIT_TEST(TestXRobotsTagOverridePriority);
  24. UNIT_TEST(TestXRobotsTagDoesNotBreakCharset);
  25. UNIT_TEST(TestXRobotsTagAllowsMultiline);
  26. UNIT_TEST(TestRelCanonical);
  27. UNIT_TEST(TestHreflang);
  28. UNIT_TEST(TestHreflangOnLongInput);
  29. UNIT_TEST(TestMimeType);
  30. UNIT_TEST(TestRepeatedContentEncoding);
  31. UNIT_TEST_SUITE_END();
  32. private:
  33. THolder<THttpHeaderParser> httpHeaderParser;
  34. private:
  35. void TestStart();
  36. void TestFinish();
  37. public:
  38. void TestRequestHeader();
  39. void TestSplitRequestHeader();
  40. void TestTrailingData();
  41. void TestProxyRequestHeader();
  42. void TestIncorrectRequestHeader();
  43. void TestLastModified();
  44. void TestLastModifiedCorrupted();
  45. void TestResponseHeaderOnRequest();
  46. void TestRequestHeaderOnResponse();
  47. void TestXRobotsTagUnknownTags();
  48. void TestXRobotsTagMyBot();
  49. void TestXRobotsTagOtherBot();
  50. void TestXRobotsTagUnavailableAfterAware();
  51. void TestXRobotsTagUnavailableAfterWorks();
  52. void TestXRobotsTagOverridePriority();
  53. void TestXRobotsTagDoesNotBreakCharset();
  54. void TestXRobotsTagAllowsMultiline();
  55. void TestRelCanonical();
  56. void TestHreflang();
  57. void TestHreflangOnLongInput();
  58. void TestMimeType();
  59. void TestRepeatedContentEncoding();
  60. };
  61. void THttpHeaderParserTestSuite::TestStart() {
  62. httpHeaderParser.Reset(new THttpHeaderParser());
  63. }
  64. void THttpHeaderParserTestSuite::TestFinish() {
  65. httpHeaderParser.Reset();
  66. }
  67. void THttpHeaderParserTestSuite::TestRequestHeader() {
  68. TestStart();
  69. THttpRequestHeader httpRequestHeader;
  70. httpHeaderParser->Init(&httpRequestHeader);
  71. const char* request = "GET /search?q=hi HTTP/1.1\r\n"
  72. "Host: www.google.ru:8080\r\n\r\n";
  73. i32 result = httpHeaderParser->Execute(request, strlen(request));
  74. UNIT_ASSERT_EQUAL(result, 2);
  75. UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET);
  76. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, "www.google.ru:8080"), 0);
  77. UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, "/search?q=hi");
  78. UNIT_ASSERT_EQUAL(httpRequestHeader.GetUrl(), "http://www.google.ru:8080/search?q=hi");
  79. UNIT_ASSERT_EQUAL(httpHeaderParser->lastchar - request + 1,
  80. (i32)strlen(request));
  81. UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_response_timeout,
  82. DEFAULT_RESPONSE_TIMEOUT);
  83. UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_request_priority,
  84. DEFAULT_REQUEST_PRIORITY);
  85. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, ""), 0);
  86. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, ""), 0);
  87. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, ""), 0);
  88. TestFinish();
  89. UNIT_ASSERT_EQUAL(httpRequestHeader.max_age, DEFAULT_MAX_AGE);
  90. }
  91. void THttpHeaderParserTestSuite::TestSplitRequestHeader() {
  92. TestStart();
  93. const char* request =
  94. "GET /search?q=hi HTTP/1.1\r\n"
  95. "Host: www.google.ru:8080 \r\n"
  96. "\r\n";
  97. const size_t rlen = strlen(request);
  98. for (size_t n1 = 0; n1 < rlen; n1++) {
  99. for (size_t n2 = n1; n2 < rlen; n2++) {
  100. TString s1{request, 0, n1};
  101. TString s2{request, n1, n2 - n1};
  102. TString s3{request, n2, rlen - n2};
  103. UNIT_ASSERT_EQUAL(s1 + s2 + s3, request);
  104. THttpRequestHeader httpRequestHeader;
  105. UNIT_ASSERT(0 == httpHeaderParser->Init(&httpRequestHeader));
  106. i32 result = httpHeaderParser->Execute(s1);
  107. UNIT_ASSERT_EQUAL(result, 1);
  108. result = httpHeaderParser->Execute(s2);
  109. UNIT_ASSERT_EQUAL(result, 1);
  110. result = httpHeaderParser->Execute(s3);
  111. UNIT_ASSERT_EQUAL(result, 2);
  112. UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET);
  113. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, "www.google.ru:8080"), 0);
  114. UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, "/search?q=hi");
  115. }
  116. }
  117. TestFinish();
  118. }
  119. void THttpHeaderParserTestSuite::TestTrailingData() {
  120. TestStart();
  121. THttpRequestHeader httpRequestHeader;
  122. UNIT_ASSERT(0 == httpHeaderParser->Init(&httpRequestHeader));
  123. const char* request =
  124. "GET /search?q=hi HTTP/1.1\r\n"
  125. "Host: www.google.ru:8080\r\n"
  126. "\r\n"
  127. "high.ru";
  128. i32 result = httpHeaderParser->Execute(request, strlen(request));
  129. UNIT_ASSERT_EQUAL(result, 2);
  130. UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET);
  131. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, "www.google.ru:8080"), 0);
  132. UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri, "/search?q=hi");
  133. UNIT_ASSERT_EQUAL(TString(httpHeaderParser->lastchar + 1), "high.ru");
  134. UNIT_ASSERT_EQUAL(httpRequestHeader.http_minor, 1);
  135. UNIT_ASSERT_EQUAL(httpRequestHeader.transfer_chunked, -1);
  136. UNIT_ASSERT_EQUAL(httpRequestHeader.content_length, -1);
  137. UNIT_ASSERT_EQUAL(httpRequestHeader.connection_closed, -1);
  138. TestFinish();
  139. }
  140. void THttpHeaderParserTestSuite::TestProxyRequestHeader() {
  141. TestStart();
  142. THttpRequestHeader httpRequestHeader;
  143. httpHeaderParser->Init(&httpRequestHeader);
  144. const char* request =
  145. "GET http://www.google.ru:8080/search?q=hi HTTP/1.1\r\n"
  146. "X-Yandex-Response-Timeout: 1000\r\n"
  147. "X-Yandex-Request-Priority: 2\r\n"
  148. "X-Yandex-Sourcename: orange\r\n"
  149. "X-Yandex-Requesttype: userproxy\r\n"
  150. "X-Yandex-FetchOptions: d;c\r\n"
  151. "Cache-control: max-age=100\r\n"
  152. "If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT\r\n"
  153. "User-Agent: Yandex/1.01.001 (compatible; Win16; I)\r\n"
  154. "From: webadmin@yandex.ru\r\n\r\n";
  155. i32 result = httpHeaderParser->Execute(request, strlen(request));
  156. UNIT_ASSERT_EQUAL(result, 2);
  157. UNIT_ASSERT_EQUAL(httpRequestHeader.http_method, HTTP_METHOD_GET);
  158. UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_response_timeout, 1000);
  159. UNIT_ASSERT_EQUAL(httpRequestHeader.x_yandex_request_priority, 2);
  160. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_sourcename, "orange"), 0);
  161. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_requesttype, "userproxy"), 0);
  162. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.x_yandex_fetchoptions, "d;c"), 0);
  163. UNIT_ASSERT_EQUAL(httpRequestHeader.max_age, 100);
  164. UNIT_ASSERT_VALUES_EQUAL(httpRequestHeader.if_modified_since,
  165. TInstant::ParseIso8601Deprecated("1994-10-29 19:43:31Z").TimeT());
  166. UNIT_ASSERT_EQUAL(httpRequestHeader.request_uri,
  167. "http://www.google.ru:8080/search?q=hi");
  168. UNIT_ASSERT(httpRequestHeader.GetUrl() ==
  169. "http://www.google.ru:8080/search?q=hi");
  170. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.host, ""), 0);
  171. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.from, "webadmin@yandex.ru"), 0);
  172. UNIT_ASSERT_EQUAL(strcmp(httpRequestHeader.user_agent,
  173. "Yandex/1.01.001 (compatible; Win16; I)"),
  174. 0);
  175. UNIT_ASSERT_EQUAL(httpHeaderParser->lastchar - request + 1,
  176. (i32)strlen(request));
  177. TestFinish();
  178. }
  179. void THttpHeaderParserTestSuite::TestIncorrectRequestHeader() {
  180. TestStart();
  181. THttpRequestHeader httpRequestHeader;
  182. httpHeaderParser->Init(&httpRequestHeader);
  183. const char* request = "GET /search?q=hi HTP/1.1\r\n"
  184. "Host: www.google.ru:8080\r\n\r\n";
  185. i32 result = httpHeaderParser->Execute(request, strlen(request));
  186. UNIT_ASSERT(result != 2);
  187. TestFinish();
  188. }
  189. void THttpHeaderParserTestSuite::TestLastModified() {
  190. TestStart();
  191. THttpHeader h;
  192. UNIT_ASSERT(0 == httpHeaderParser->Init(&h));
  193. const char* headers =
  194. "HTTP/1.1 200 OK\r\n"
  195. "Content-Type: text/html\r\n"
  196. "Last-Modified: Thu, 13 Aug 2009 14:27:08 GMT\r\n\r\n";
  197. UNIT_ASSERT(2 == httpHeaderParser->Execute(headers, strlen(headers)));
  198. UNIT_ASSERT_VALUES_EQUAL(
  199. TInstant::ParseIso8601Deprecated("2009-08-13 14:27:08Z").TimeT(),
  200. h.http_time);
  201. TestFinish();
  202. }
  203. void THttpHeaderParserTestSuite::TestLastModifiedCorrupted() {
  204. TestStart();
  205. THttpHeader h;
  206. UNIT_ASSERT(0 == httpHeaderParser->Init(&h));
  207. const char* headers =
  208. "HTTP/1.1 200 OK\r\n"
  209. "Content-Type: text/html\r\n"
  210. "Last-Modified: Thu, 13 Aug 2009 14:\r\n\r\n";
  211. UNIT_ASSERT(2 == httpHeaderParser->Execute(headers, strlen(headers)));
  212. UNIT_ASSERT(h.http_time < 0); // XXX: don't understand what is the proper value
  213. TestFinish();
  214. }
  215. void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() {
  216. TestStart();
  217. THttpHeader httpHeader;
  218. httpHeaderParser->Init(&httpHeader);
  219. const char* headers =
  220. "HTTP/1.1 200 OK\r\n"
  221. "Content-Type: text/html\r\n"
  222. "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n";
  223. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  224. UNIT_ASSERT_EQUAL(result, 2);
  225. UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3);
  226. UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");
  227. TestFinish();
  228. }
  229. void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() {
  230. TestStart();
  231. THttpHeader httpHeader;
  232. httpHeaderParser->Init(&httpHeader);
  233. const char* headers =
  234. "HTTP/1.1 200 OK\r\n"
  235. "Content-Type: text/html\r\n"
  236. "x-robots-tag: yandex: noindex, nofollow\r\n"
  237. "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n";
  238. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  239. UNIT_ASSERT_EQUAL(result, 2);
  240. UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);
  241. UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");
  242. TestFinish();
  243. }
  244. void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() {
  245. TestStart();
  246. THttpHeader httpHeader;
  247. httpHeaderParser->Init(&httpHeader);
  248. const char* headers =
  249. "HTTP/1.1 200 OK\r\n"
  250. "Content-Type: text/html\r\n"
  251. "x-robots-tag: google: noindex, nofollow\r\n"
  252. "x-robots-tag: googlebot: noarchive, noodp\r\n"
  253. "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n";
  254. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  255. UNIT_ASSERT_EQUAL(result, 2);
  256. UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0);
  257. UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx");
  258. TestFinish();
  259. }
  260. void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() {
  261. TestStart();
  262. THttpHeader httpHeader;
  263. httpHeaderParser->Init(&httpHeader);
  264. // проверяем только что unavailable_after ничего не ломает
  265. const char* headers =
  266. "HTTP/1.1 200 OK\r\n"
  267. "Content-Type: text/html\r\n"
  268. "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n"
  269. "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n";
  270. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  271. UNIT_ASSERT_EQUAL(result, 2);
  272. UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);
  273. UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");
  274. TestFinish();
  275. }
  276. void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() {
  277. TestStart();
  278. THttpHeader httpHeader;
  279. httpHeaderParser->Init(&httpHeader);
  280. // пока не поддерживается
  281. const char* headers =
  282. "HTTP/1.1 200 OK\r\n"
  283. "Content-Type: text/html\r\n"
  284. "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n";
  285. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  286. UNIT_ASSERT_EQUAL(result, 2);
  287. //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1);
  288. //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx");
  289. TestFinish();
  290. }
  291. void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() {
  292. TestStart();
  293. THttpHeader httpHeader;
  294. httpHeaderParser->Init(&httpHeader);
  295. const char* headers =
  296. "HTTP/1.1 200 OK\r\n"
  297. "Content-Type: text/html\r\n"
  298. "x-robots-tag: all, none\r\n\r\n";
  299. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  300. UNIT_ASSERT_EQUAL(result, 2);
  301. UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "11xxx");
  302. UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3); // NOTE legacy behavior, should be 0 as `all` overrides
  303. TestFinish();
  304. }
  305. void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() {
  306. TestStart();
  307. THttpHeader httpHeader;
  308. httpHeaderParser->Init(&httpHeader);
  309. const char* headers =
  310. "HTTP/1.1 200 OK\r\n"
  311. "X-Robots-Tag: noarchive\r\n"
  312. "Content-Type: application/json; charset=utf-8\r\n\r\n";
  313. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  314. UNIT_ASSERT_EQUAL(result, 2);
  315. UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
  316. UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
  317. TestFinish();
  318. }
  319. void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() {
  320. TestStart();
  321. THttpHeader httpHeader;
  322. httpHeaderParser->Init(&httpHeader);
  323. const char* headers =
  324. "HTTP/1.1 200 OK\r\n"
  325. "X-Robots-Tag\r\n"
  326. " :\r\n"
  327. " unavailable_since\r\n"
  328. " :\r\n"
  329. " ,\r\n"
  330. " unavailable_since\r\n"
  331. " :\r\n"
  332. " 01 Jan 2000\r\n"
  333. " 00:00 UTC\r\n"
  334. " ,\r\n"
  335. " yandexbot\r\n"
  336. " :\r\n"
  337. " noindex\r\n"
  338. " ,\r\n"
  339. " garbage\r\n"
  340. " ,\r\n"
  341. " nofollow\r\n"
  342. " ,\r\n"
  343. " other\r\n"
  344. " bot\r\n"
  345. " :\r\n"
  346. " noarchive\r\n"
  347. " ,\r\n"
  348. "Content-Type: application/json; charset=utf-8\r\n\r\n";
  349. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  350. UNIT_ASSERT_EQUAL(result, 2);
  351. UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");
  352. UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
  353. UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
  354. TestFinish();
  355. }
  356. void THttpHeaderParserTestSuite::TestHreflang() {
  357. TestStart();
  358. THttpHeader httpHeader;
  359. httpHeaderParser->Init(&httpHeader);
  360. const char* headers =
  361. "HTTP/1.1 200 OK\r\n"
  362. "Content-Type: text/html\r\n"
  363. "link: <http://www.high.ru/>; rel='alternate'; hreflang='x-default'\r\n"
  364. "link: <http://www.high.ru/en.html> ;rel = 'alternate' ;hreflang = en_GB \r\n"
  365. "link: <http://www.high.ru/ru.html>;hreflang = ru_RU.KOI8-r ;rel = 'alternate' \r\n"
  366. "\r\n";
  367. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  368. UNIT_ASSERT_VALUES_EQUAL(result, 2);
  369. // UNIT_ASSERT_VALUES_EQUAL(strcmp(httpHeader.hreflangs, "x-default http://www.high.ru/;"), 0);
  370. UNIT_ASSERT_VALUES_EQUAL(httpHeader.hreflangs, "x-default http://www.high.ru/\ten_GB http://www.high.ru/en.html\tru_RU.KOI8-r http://www.high.ru/ru.html");
  371. TestFinish();
  372. }
  373. void THttpHeaderParserTestSuite::TestHreflangOnLongInput() {
  374. TestStart();
  375. THttpHeader httpHeader;
  376. httpHeaderParser->Init(&httpHeader);
  377. TStringBuf testInput(hreflang_ut_in);
  378. TStringBuf testOut(hreflang_ut_out);
  379. i32 result = httpHeaderParser->Execute(testInput.data(), testInput.size());
  380. UNIT_ASSERT_VALUES_EQUAL(result, 2);
  381. UNIT_ASSERT_VALUES_EQUAL(httpHeader.hreflangs, testOut);
  382. TestFinish();
  383. }
  384. void THttpHeaderParserTestSuite::TestRelCanonical() {
  385. TestStart();
  386. THttpHeader httpHeader;
  387. httpHeaderParser->Init(&httpHeader);
  388. const char* headers =
  389. "HTTP/1.1 200 OK\r\n"
  390. "Content-Type: text/html\r\n"
  391. "Link: <http://yandex.ru>; rel = \"canonical\"\r\n\r\n";
  392. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  393. UNIT_ASSERT_EQUAL(result, 2);
  394. UNIT_ASSERT_EQUAL(httpHeader.rel_canonical, "http://yandex.ru");
  395. TestFinish();
  396. }
  397. void THttpHeaderParserTestSuite::TestResponseHeaderOnRequest() {
  398. TestStart();
  399. THttpHeader httpHeader;
  400. httpHeaderParser->Init(&httpHeader);
  401. const char* request = "GET /search?q=hi HTP/1.1\r\n"
  402. "Host: www.google.ru:8080\r\n\r\n";
  403. i32 result = httpHeaderParser->Execute(request, strlen(request));
  404. UNIT_ASSERT_EQUAL(result, -3);
  405. TestFinish();
  406. }
  407. void THttpHeaderParserTestSuite::TestRequestHeaderOnResponse() {
  408. TestStart();
  409. THttpRequestHeader httpRequestHeader;
  410. httpHeaderParser->Init(&httpRequestHeader);
  411. const char* response = "HTTP/1.1 200 OK\r\n"
  412. "Content-Type: text/html\r\n"
  413. "Last-Modified: Thu, 13 Aug 2009 14:\r\n\r\n";
  414. i32 result = httpHeaderParser->Execute(response, strlen(response));
  415. UNIT_ASSERT_EQUAL(result, -3);
  416. TestFinish();
  417. }
  418. void THttpHeaderParserTestSuite::TestMimeType() {
  419. TestStart();
  420. THttpHeader httpHeader;
  421. httpHeaderParser->Init(&httpHeader);
  422. const char* headers =
  423. "HTTP/1.1 200 OK\r\n"
  424. "Content-Type: application/json; charset=utf-8\r\n\r\n";
  425. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  426. UNIT_ASSERT_EQUAL(result, 2);
  427. UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
  428. UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
  429. TestFinish();
  430. }
  431. void THttpHeaderParserTestSuite::TestRepeatedContentEncoding() {
  432. TestStart();
  433. THttpHeader httpHeader;
  434. httpHeaderParser->Init(&httpHeader);
  435. const char *headers =
  436. "HTTP/1.1 200 OK\r\n"
  437. "Server: nginx\r\n"
  438. "Date: Mon, 15 Oct 2018 10:40:44 GMT\r\n"
  439. "Content-Type: text/plain\r\n"
  440. "Transfer-Encoding: chunked\r\n"
  441. "Connection: keep-alive\r\n"
  442. "Last-Modified: Mon, 15 Oct 2018 03:48:54 GMT\r\n"
  443. "ETag: W/\"5bc40e26-a956d\"\r\n"
  444. "X-Autoru-LB: lb-03-sas.prod.vertis.yandex.net\r\n"
  445. "Content-Encoding: gzip\r\n"
  446. "Content-Encoding: gzip\r\n"
  447. "X-UA-Bot: 1\r\n"
  448. "\r\n";
  449. i32 result = httpHeaderParser->Execute(headers, strlen(headers));
  450. UNIT_ASSERT_EQUAL(result, 2);
  451. UNIT_ASSERT_EQUAL(httpHeader.error, 0);
  452. UNIT_ASSERT_EQUAL(httpHeader.compression_method, 3);
  453. TestFinish();
  454. }
  455. UNIT_TEST_SUITE_REGISTRATION(THttpHeaderParserTestSuite);
  456. Y_UNIT_TEST_SUITE(TestHttpChunkParser) {
  457. static THttpChunkParser initParser() {
  458. THttpChunkParser parser;
  459. parser.Init();
  460. return parser;
  461. }
  462. static THttpChunkParser parseByteByByte(const TStringBuf& blob, const TVector<int>& states) {
  463. UNIT_ASSERT(states.size() <= blob.size());
  464. THttpChunkParser parser{initParser()};
  465. for (size_t n = 0; n < states.size(); n++) {
  466. const TStringBuf d{blob, n, 1};
  467. int code = parser.Execute(d.data(), d.size());
  468. Cout << TString(d).Quote() << " " << code << Endl;
  469. UNIT_ASSERT_EQUAL(code, states[n]);
  470. }
  471. return parser;
  472. }
  473. static THttpChunkParser parseBytesWithLastState(const TStringBuf& blob, const int last_state) {
  474. TVector<int> states(blob.size() - 1, 1);
  475. states.push_back(last_state);
  476. return parseByteByByte(blob, states);
  477. }
  478. Y_UNIT_TEST(TestWithoutEolHead) {
  479. const TStringBuf blob{
  480. "4\r\n"
  481. "____\r\n"};
  482. TVector<int> states{
  483. -1, /* 1, -1,
  484. 1, -1, 1, -1, 1, -1 */};
  485. // as soon as error happens parser state should be considered
  486. // undefined, state is meaningless after the very first `-1`
  487. // moreover, testenv produces `states[1] == -1` for this input and
  488. // my local build produces `states[1] == 1`.
  489. parseByteByByte(blob, states);
  490. }
  491. Y_UNIT_TEST(TestTrivialChunk) {
  492. const TStringBuf blob{
  493. "\r\n"
  494. "4\r\n"};
  495. THttpChunkParser parser(parseBytesWithLastState(blob, 2));
  496. UNIT_ASSERT_EQUAL(parser.chunk_length, 4);
  497. UNIT_ASSERT_EQUAL(parser.cnt64, 4);
  498. }
  499. Y_UNIT_TEST(TestNegative) {
  500. const TStringBuf blob{
  501. "\r\n"
  502. "-1"};
  503. TVector<int> states{
  504. 1, 1,
  505. -1,
  506. /* 1 */};
  507. parseByteByByte(blob, states);
  508. }
  509. Y_UNIT_TEST(TestLeadingZero) {
  510. const TStringBuf blob{
  511. "\r\n"
  512. "042\r\n"};
  513. THttpChunkParser parser(parseBytesWithLastState(blob, 2));
  514. UNIT_ASSERT_EQUAL(parser.chunk_length, 0x42);
  515. }
  516. Y_UNIT_TEST(TestIntOverflow) {
  517. const TStringBuf blob{
  518. "\r\n"
  519. "deadbeef"};
  520. THttpChunkParser parser(parseBytesWithLastState(blob, -2));
  521. UNIT_ASSERT_EQUAL(parser.chunk_length, 0);
  522. UNIT_ASSERT_EQUAL(parser.cnt64, 0xdeadbeef);
  523. }
  524. Y_UNIT_TEST(TestTrivialChunkWithTail) {
  525. const TStringBuf blob{
  526. "\r\n"
  527. "4\r\n"
  528. "_" // first byte of the chunk
  529. };
  530. TVector<int> states{
  531. 1, 1,
  532. 1, 1, 2,
  533. -1};
  534. parseByteByByte(blob, states);
  535. }
  536. Y_UNIT_TEST(TestLastChunk) {
  537. // NB: current parser does not permit whitespace before `foo`,
  538. // but I've never seen the feature in real-life traffic
  539. const TStringBuf blob{
  540. "\r\n"
  541. "000 ;foo = bar \r\n"
  542. "Trailer: bar\r\n"
  543. "\r\n"};
  544. THttpChunkParser parser(parseBytesWithLastState(blob, 2));
  545. UNIT_ASSERT_EQUAL(parser.chunk_length, 0);
  546. }
  547. }