httpfsm.rl6 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. #include <stdio.h>
  2. #include <time.h>
  3. #include <library/cpp/charset/doccodes.h>
  4. #include <library/cpp/charset/codepage.h>
  5. #include <library/cpp/http/misc/httpcodes.h>
  6. #include <util/datetime/base.h>
  7. #include <util/generic/ylimits.h>
  8. #include <algorithm> // max
  9. #include <library/cpp/http/fetch/httpheader.h>
  10. #include <library/cpp/http/fetch/httpfsm.h>
  11. #ifdef _MSC_VER
  12. #pragma warning(disable: 4702) // unreachable code
  13. #endif
  14. #define c(i) I = i;
  15. #define m(i) I = std::max(I, (long)i);
  16. static inline int X(unsigned char c) {
  17. return (c >= 'A' ? ((c & 0xdf) - 'A' + 10) : (c - '0'));
  18. }
  19. template <typename x>
  20. static inline void guard(x &val) {
  21. val = (val >= -1) ? -4 - val : -2; // f(-2) = -2
  22. }
  23. template <typename x>
  24. static inline void setguarded(x &val, long cnt) {
  25. val = (val == -4 - -1 || cnt == -4 -val) ? cnt : -2;
  26. }
  27. ////////////////////////////////////////////////////////////////////
  28. /// HTTP PARSER
  29. ////////////////////////////////////////////////////////////////////
  30. %%{
  31. machine http_header_parser;
  32. include HttpDateTimeParser "../../../../util/datetime/parser.rl6";
  33. alphtype unsigned char;
  34. ################# 2.2 Basic Rules #################
  35. eol = '\r'? '\n';
  36. ws = [ \t];
  37. lw = '\r'? '\n'? ws;
  38. separator = [()<>@,;:\\"/\[\]?={}];
  39. token_char = [!-~] - separator; # http tokens chars
  40. url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
  41. text_char = ws | 33..126 | 128..255;
  42. any_text_char = any - [\r\n];
  43. lws = lw*;
  44. eoh = lws eol;
  45. token = token_char+;
  46. ex_token = (token_char | ws)* token_char;
  47. text = (text_char | lw)*;
  48. any_text = (any_text_char | lw)*;
  49. def = lws ':' lws;
  50. action clear_buf { buflen = 0; }
  51. action update_buf { if (buflen < sizeof(buf)) buf[buflen++] = fc; }
  52. ###################################################
  53. ############ response status line #################
  54. action set_minor { base_hd->http_minor = I; }
  55. action set_status {
  56. if (hd) {
  57. hd->http_status = I;
  58. }
  59. if (request_hd) {
  60. return -3;
  61. }
  62. }
  63. status_code = int3;
  64. http_major = int;
  65. http_minor = int;
  66. reason_phrase = ws+ text_char*;
  67. http_version = "http/"i http_major '.' http_minor %set_minor;
  68. response_status_line = http_version ws+ status_code reason_phrase? eol %set_status;
  69. ############ request status line #################
  70. action set_request_uri {
  71. if (request_hd && buflen < FETCHER_URL_MAX) {
  72. if (!request_hd->request_uri.empty()) {
  73. return -2;
  74. }
  75. request_hd->request_uri =TStringBuf(buf, buflen);
  76. }
  77. }
  78. action set_http_method {
  79. if (request_hd) {
  80. request_hd->http_method = I;
  81. }
  82. if (hd) {
  83. return -3;
  84. }
  85. }
  86. http_extension_method = token;
  87. http_method = ("options"i %{c(0)} @1
  88. | "get"i %{c(1)} @1
  89. | "head"i %{c(2)} @1
  90. | "post"i %{c(3)} @1
  91. | "put"i %{c(4)} @1
  92. | "delete"i %{c(5)} @1
  93. | "trace"i %{c(6)} @1
  94. | "connect"i %{c(7)} @1
  95. | http_extension_method %{c(8)} $0)
  96. %set_http_method;
  97. request_uri = (token_char | separator)+ >clear_buf $update_buf
  98. %set_request_uri;
  99. request_status_line = http_method ws+ request_uri ws+ http_version eoh;
  100. ################# connection ######################
  101. action beg_connection { guard(base_hd->connection_closed); I = -1; }
  102. action set_connection { setguarded(base_hd->connection_closed, I); }
  103. c_token = "close"i %{m(1)}
  104. | "keep-alive"i %{m(0)};
  105. c_tokenlist = c_token (lws ',' lws c_token)?;
  106. connection = "connection"i def %beg_connection c_tokenlist eoh %set_connection;
  107. ################# content-encoding ################
  108. action beg_content_encoding { I = HTTP_COMPRESSION_ERROR; }
  109. action set_content_encoding { base_hd->compression_method =
  110. ((base_hd->compression_method == HTTP_COMPRESSION_UNSET ||
  111. base_hd->compression_method == I) ?
  112. I : (int)HTTP_COMPRESSION_ERROR); }
  113. ce_tokenlist = "identity"i %{c(HTTP_COMPRESSION_IDENTITY)}
  114. | "gzip"i %{c(HTTP_COMPRESSION_GZIP)}
  115. | "x-gzip"i %{c(HTTP_COMPRESSION_GZIP)}
  116. | "deflate"i %{c(HTTP_COMPRESSION_DEFLATE)}
  117. | "compress"i %{c(HTTP_COMPRESSION_COMPRESS)}
  118. | "x-compress"i %{c(HTTP_COMPRESSION_COMPRESS)};
  119. content_encoding = "content-encoding"i def %beg_content_encoding ce_tokenlist eoh %set_content_encoding;
  120. ################# transfer-encoding ###############
  121. action beg_encoding { guard(base_hd->transfer_chunked); }
  122. action set_encoding { setguarded(base_hd->transfer_chunked, I); }
  123. e_tokenlist = "identity"i %{c(0)}
  124. | "chunked"i %{c(1)};
  125. transfer_encoding = "transfer-encoding"i def %beg_encoding e_tokenlist eoh %set_encoding;
  126. ################# content-length ##################
  127. action beg_content_length { guard(base_hd->content_length); }
  128. action set_content_length { setguarded(base_hd->content_length, I); }
  129. content_length = "content-length"i def %beg_content_length int eoh %set_content_length;
  130. ################# content-range ###################
  131. action beg_content_range_start { guard(base_hd->content_range_start); I = -1; }
  132. action set_content_range_start { setguarded(base_hd->content_range_start, I); }
  133. action beg_content_range_end { guard(base_hd->content_range_end); I = -1; }
  134. action set_content_range_end { setguarded(base_hd->content_range_end, I); }
  135. action beg_content_range_el { guard(base_hd->content_range_entity_length); I = -1; }
  136. action set_content_range_el { setguarded(base_hd->content_range_entity_length, I); }
  137. content_range = "content-range"i def "bytes"i sp %beg_content_range_start int '-' %set_content_range_start
  138. %beg_content_range_end int '/' %set_content_range_end
  139. %beg_content_range_el int eoh %set_content_range_el;
  140. ################# accept-ranges ###################
  141. action beg_accept_ranges {
  142. if (hd) {
  143. guard(hd->accept_ranges);
  144. I = -1;
  145. }
  146. }
  147. action set_accept_ranges { if (hd) setguarded(hd->accept_ranges, I); }
  148. ar_tokenlist = "bytes"i %{c(1)}
  149. | "none"i %{c(0)};
  150. accept_ranges = "accept-ranges"i def %beg_accept_ranges ar_tokenlist eoh %set_accept_ranges;
  151. ################# content-type ####################
  152. action beg_mime { guard(base_hd->mime_type); }
  153. action set_mime { setguarded(base_hd->mime_type, I); }
  154. action set_charset {
  155. if (buflen < FETCHER_URL_MAX) {
  156. buf[buflen++] = 0;
  157. base_hd->charset = EncodingHintByName((const char*)buf);
  158. }
  159. }
  160. mime_type = "text/plain"i %{c(MIME_TEXT)}
  161. | "text/html"i %{c(MIME_HTML)}
  162. | "application/pdf"i %{c(MIME_PDF)}
  163. | "application/rtf"i %{c(MIME_RTF)}
  164. | "text/rtf"i %{c(MIME_RTF)}
  165. | "application/msword"i %{c(MIME_DOC)}
  166. | "audio/mpeg"i %{c(MIME_MPEG)}
  167. | "text/xml"i %{c(MIME_XML)}
  168. | "application/xml"i %{c(MIME_XML)}
  169. | "application/rss+xml"i %{c(MIME_RSS)}
  170. | "application/rdf+xml"i %{c(MIME_RSS)}
  171. | "application/atom+xml"i %{c(MIME_RSS)}
  172. | "text/vnd.wap.wml"i %{c(MIME_WML)}
  173. | "application/x-shockwave-flash"i %{c(MIME_SWF)}
  174. | "application/vnd.ms-excel"i %{c(MIME_XLS)}
  175. | "application/vnd.ms-powerpoint"i %{c(MIME_PPT)}
  176. | "image/jpeg"i %{c(MIME_IMAGE_JPG)}
  177. | "image/jpg"i %{c(MIME_IMAGE_JPG)}
  178. | "image/pjpeg"i %{c(MIME_IMAGE_PJPG)}
  179. | "image/png"i %{c(MIME_IMAGE_PNG)}
  180. | "image/gif"i %{c(MIME_IMAGE_GIF)}
  181. | "application/xhtml+xml"i %{c(MIME_XHTMLXML)}
  182. | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"i %{c(MIME_DOCX)}
  183. | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"i %{c(MIME_XLSX)}
  184. | "application/vnd.openxmlformats-officedocument.presentationml.presentation"i %{c(MIME_PPTX)}
  185. | "application/vnd.oasis.opendocument.text"i %{c(MIME_ODT)}
  186. | "application/vnd.oasis.opendocument.presentation"i %{c(MIME_ODP)}
  187. | "application/vnd.oasis.opendocument.spreadsheet"i %{c(MIME_ODS)}
  188. | "application/vnd.oasis.opendocument.graphics"i %{c(MIME_ODG)}
  189. | "image/x-ms-bmp"i %{c(MIME_IMAGE_BMP)}
  190. | "image/bmp"i %{c(MIME_IMAGE_BMP)}
  191. | "audio/x-wav"i %{c(MIME_WAV)}
  192. | ( "application/x-tar"i | "application/x-ustar"i | "application/x-gtar"i | "application/zip"i | "application/x-archive"i
  193. | "application/x-bzip2"i | "application/x-rar"i ) %{c(MIME_ARCHIVE)}
  194. | "application/x-dosexec"i %{c(MIME_EXE)}
  195. | "application/x-gzip"i %{c(MIME_GZIP)}
  196. | "application/json"i %{c(MIME_JSON)}
  197. | ("application/javascript"i | "text/javascript"i) %{c(MIME_JAVASCRIPT)}
  198. | "application/vnd.android.package-archive"i %{c(MIME_APK)}
  199. | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)}
  200. ;
  201. charset_name = token_char+ >clear_buf $update_buf;
  202. mime_param = "charset"i ws* '=' ws* '"'? charset_name '"'? %set_charset @2
  203. | token ws* '=' ws* '"'? token '"'? @1
  204. | text $0;
  205. mime_parms = (lws ';' lws mime_param)*;
  206. content_type = "content-type"i def %beg_mime mime_type mime_parms eoh %set_mime;
  207. ################# last modified ###################
  208. action beg_modtime { guard(base_hd->http_time); }
  209. action set_modtime {
  210. setguarded(base_hd->http_time, DateTimeFields.ToTimeT(-1));
  211. }
  212. last_modified = "last-modified"i def %beg_modtime http_date eoh %set_modtime;
  213. ################# location ########################
  214. action set_location {
  215. while (buflen > 0 && (buf[buflen - 1] == ' ' || buf[buflen - 1] == '\t')) {
  216. buflen --;
  217. }
  218. if (hd && buflen < FETCHER_URL_MAX) {
  219. hd->location = TStringBuf(buf, buflen);
  220. }
  221. }
  222. action set_status_303{ if (hd) hd->http_status = 303; }
  223. url = url_char+ >clear_buf $update_buf;
  224. loc_url = any_text_char+ >clear_buf $update_buf;
  225. location = "location"i def loc_url eoh %set_location;
  226. refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location;
  227. ################# x-robots-tag ################
  228. action set_x_robots {
  229. if (hd && AcceptingXRobots) {
  230. if (I > 0)
  231. hd->x_robots_tag |= I;
  232. int pos = (I > 0 ? I : -I);
  233. for (size_t i = 0; i < 5; ++i)
  234. if (abs(pos) & (1 << i)) // permissive flags take priority
  235. hd->x_robots_state[i] = (I < 0) ? '1' : (hd->x_robots_state[i] != '1') ? '0' : '1';
  236. }
  237. }
  238. action accept_x_robots {
  239. AcceptingXRobots = (bool)I;
  240. }
  241. x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)}
  242. | "noindex"i %{c(1)} | "index"i %{c(-1)}
  243. | "nofollow"i %{c(2)} | "follow"i %{c(-2)}
  244. | "noarchive"i %{c(4)} | "archive"i %{c(-4)}
  245. | "noyaca"i %{c(16)}
  246. | "noodp"i %{c(8)};
  247. any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*;
  248. any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*;
  249. unavailable_after_directive = "unavailable_after"i def any_value;
  250. yandex_robot = "yandex"i | "yandexbot"i;
  251. other_robot = any_key - "unavailable_after"i - yandex_robot;
  252. robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)};
  253. x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? );
  254. x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh;
  255. ################# rel_canonical ###############
  256. action set_canonical {
  257. if (hd && buflen < FETCHER_URL_MAX) {
  258. hd->rel_canonical = TStringBuf(buf, buflen);
  259. }
  260. }
  261. rel_canonical = "link"i def '<' url ">;"i lws "rel"i lws '=' lws "\"canonical\"" eoh %set_canonical;
  262. ################# hreflang ###############
  263. action set_hreflang {
  264. bool first = (hreflangpos == hd->hreflangs);
  265. size_t len2 = (first ? 0 : 1) + langlen + 1 + buflen;
  266. if (langlen && len2 < hreflangspace) {
  267. if (!first) {
  268. *(hreflangpos++) = '\t';
  269. }
  270. memcpy(hreflangpos, langstart, langlen);
  271. hreflangpos += langlen;
  272. *(hreflangpos++) = ' ';
  273. memcpy(hreflangpos, buf, buflen);
  274. hreflangpos += buflen;
  275. *(hreflangpos) = 0;
  276. hreflangspace -= len2;
  277. }
  278. }
  279. action start_lang {
  280. langstart = fpc;
  281. langlen = 0;
  282. }
  283. action end_lang {
  284. langlen = fpc - langstart;
  285. }
  286. hreflang_token = (token_char - ['])+;
  287. quote = ['"]?; #"
  288. lang = hreflang_token >start_lang %end_lang;
  289. hreflang = "link"i def '<' url '>' lws ";" lws
  290. ( ( "rel"i lws '=' lws quote "alternate" quote lws ';' lws "hreflang"i lws '=' lws quote lang quote )
  291. | ( "hreflang"i lws '=' lws quote lang quote lws ';' lws "rel"i lws '=' lws quote "alternate" quote ) )
  292. eoh %set_hreflang;
  293. ################# squid_error #################
  294. action set_squid_error {
  295. hd->squid_error = 1;
  296. }
  297. squid_error = "X-Yandex-Squid-Error"i def any_text eoh %set_squid_error;
  298. ################# auth ########################
  299. action init_auth {
  300. if (auth_hd)
  301. auth_hd->use_auth=true;
  302. }
  303. action update_auth_buf
  304. { if (auth_hd && buflen < sizeof(buf)) buf[buflen++] = *fpc; }
  305. quoted_str = /"/ (text_char - /"/)* /"/ >2;
  306. auth_quoted_str = ( /"/ ( ( text_char - /"/ )* >clear_buf $update_auth_buf ) /"/ ) > 2;
  307. # do not support auth-int, too heavy procedure
  308. qop_auth_option = "auth"i @1 %{if(auth_hd) auth_hd->qop_auth = true; };
  309. qop_option = ( qop_auth_option @1 ) | (( token-"auth"i) $0 );
  310. auth_good_param = ( "nonce"i /=/ auth_quoted_str )
  311. %{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
  312. buf[buflen++] = 0;
  313. auth_hd->nonce = strdup((const char*)buf);
  314. }}
  315. | ( "realm"i /=/ auth_quoted_str )
  316. %{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
  317. buf[buflen++] = 0;
  318. auth_hd->realm = strdup((const char*)buf);
  319. }}
  320. | ( "opaque"i /=/ auth_quoted_str )
  321. %{if (auth_hd && buflen < FETCHER_URL_MAX-1) {
  322. buf[buflen++] = 0;
  323. auth_hd->opaque = strdup((const char*)buf);
  324. }}
  325. | "stale"i /=/ "true"i
  326. %{if (auth_hd) auth_hd->stale = true; }
  327. | "algorithm"i /=/ "md5"i /-/ "sess"i
  328. %{if (auth_hd) auth_hd->algorithm = 1; }
  329. | ( "qop"i /="/ qop_option (ws* "," ws* qop_option)* /"/);
  330. auth_param = auth_good_param @1 |
  331. ( (token - ( "nonce"i | "opaque"i | "realm"i | "qop"i ) )
  332. /=/ (token | quoted_str ) ) $0;
  333. auth_params = auth_param ( ws* /,/ ws* auth_param )*;
  334. digest_challenge = ("digest"i %init_auth ws+ auth_params) |
  335. ((token-"digest"i) text);
  336. auth = "www-authenticate"i def digest_challenge eoh;
  337. ###################### host #######################
  338. action set_host {
  339. if (request_hd && buflen < HOST_MAX) {
  340. buf[buflen++] = 0;
  341. if (request_hd->host[0] != 0) {
  342. return -2;
  343. }
  344. memcpy(request_hd->host, buf, buflen);
  345. }
  346. }
  347. host = (url_char | [:])* >clear_buf $update_buf;
  348. host_header = "host"i def host eoh %set_host;
  349. ###################### from #######################
  350. action set_from {
  351. if (request_hd && buflen < MAXWORD_LEN) {
  352. buf[buflen++] = 0;
  353. if (request_hd->from[0] != 0) {
  354. return -2;
  355. }
  356. memcpy(request_hd->from, buf, buflen);
  357. }
  358. }
  359. mailbox = (token "@" token) >clear_buf $update_buf;
  360. from_header = "from"i def mailbox eoh %set_from;
  361. ################### user-agent ####################
  362. action set_user_agent {
  363. if (request_hd && buflen < MAXWORD_LEN) {
  364. buf[buflen++] = 0;
  365. if (request_hd->user_agent[0] != 0) {
  366. return -2;
  367. }
  368. memcpy(request_hd->user_agent, buf, buflen);
  369. }
  370. }
  371. user_agent = any_text_char* >clear_buf $update_buf;
  372. user_agent_header = "user-agent"i def user_agent eoh %set_user_agent;
  373. ############### x-yandex-langregion ################
  374. action set_langregion {
  375. if (request_hd && buflen < MAX_LANGREGION_LEN) {
  376. buf[buflen++] = 0;
  377. if (request_hd->x_yandex_langregion[0] != 0) {
  378. return -2;
  379. }
  380. memcpy(request_hd->x_yandex_langregion, buf, buflen);
  381. }
  382. }
  383. langregion = any_text_char* >clear_buf $update_buf;
  384. langregion_header = "x-yandex-langregion"i def langregion eoh %set_langregion;
  385. ############### x-yandex-sourcename ################
  386. action set_sourcename {
  387. if (request_hd && buflen < MAXWORD_LEN) {
  388. buf[buflen++] = 0;
  389. if (request_hd->x_yandex_sourcename[0] != 0) {
  390. return -2;
  391. }
  392. memcpy(request_hd->x_yandex_sourcename, buf, buflen);
  393. }
  394. }
  395. sourcename = any_text_char* >clear_buf $update_buf;
  396. sourcename_header = "x-yandex-sourcename"i def sourcename eoh %set_sourcename;
  397. ############### x-yandex-requesttype ###############
  398. action set_requesttype {
  399. if (request_hd && buflen < MAXWORD_LEN) {
  400. buf[buflen++] = 0;
  401. if (request_hd->x_yandex_requesttype[0] != 0) {
  402. return -2;
  403. }
  404. memcpy(request_hd->x_yandex_requesttype, buf, buflen);
  405. }
  406. }
  407. requesttype = any_text_char* >clear_buf $update_buf;
  408. requesttype_header = "x-yandex-requesttype"i def requesttype eoh %set_requesttype;
  409. ################ x-yandex-fetchoptions ###############
  410. action set_fetchoptions {
  411. if (request_hd && buflen < MAXWORD_LEN) {
  412. buf[buflen++] = 0;
  413. if (request_hd->x_yandex_fetchoptions[0] != 0) {
  414. return -2;
  415. }
  416. memcpy(request_hd->x_yandex_fetchoptions, buf, buflen);
  417. }
  418. }
  419. fetchoptions = any_text_char* >clear_buf $update_buf;
  420. fetchoptions_header = "x-yandex-fetchoptions"i def fetchoptions eoh %set_fetchoptions;
  421. ################ if-modified-since ################
  422. action set_if_modified_since {
  423. if (request_hd) {
  424. request_hd->if_modified_since = DateTimeFields.ToTimeT(-1);
  425. }
  426. }
  427. if_modified_since = "if-modified-since"i def http_date eoh
  428. %set_if_modified_since;
  429. ################ retry-after ################
  430. action set_retry_after_withdate {
  431. if (hd) {
  432. hd->retry_after = DateTimeFields.ToTimeT(-1);
  433. }
  434. }
  435. action set_retry_after_withdelta {
  436. if (hd) {
  437. hd->retry_after = TInstant::Now().Seconds() + I;
  438. }
  439. }
  440. retry_after_withdate = "retry-after"i def http_date eoh
  441. %set_retry_after_withdate;
  442. retry_after_withdelta = "retry-after"i def int eoh
  443. %set_retry_after_withdelta;
  444. ############## request-cache-control ##############
  445. action SETMAXAGE { if (request_hd) request_hd->max_age = I; }
  446. delta_seconds = int;
  447. cache_extension = token ("=" (token | quoted_str))?;
  448. request_cache_directive = "no-cache"i
  449. | "no-store"i
  450. | ("max-age"i "=" delta_seconds %SETMAXAGE)
  451. | ("max-stale"i ("=" delta_seconds)?)
  452. | ("min-fresh"i "=" delta_seconds)
  453. | "non-transform"i
  454. | "only-if-cached"i
  455. | cache_extension;
  456. request_cache_control = "cache-control"i def request_cache_directive eoh;
  457. ############ x-yandex-response-timeout #############
  458. action set_response_timeout {
  459. if (request_hd) {
  460. request_hd->x_yandex_response_timeout = I;
  461. }
  462. }
  463. response_timeout = "x-yandex-response-timeout"i def int eoh
  464. %set_response_timeout;
  465. ############ x-yandex-request-priority #############
  466. action set_request_priority {
  467. if (request_hd) {
  468. request_hd->x_yandex_request_priority = I;
  469. }
  470. }
  471. request_priority = "x-yandex-request-priority"i def int eoh
  472. %set_request_priority;
  473. ################# message header ##################
  474. other_header = ( ex_token - "www-authenticate"i ) def any_text eoh;
  475. message_header = other_header $0
  476. | connection @1
  477. | content_encoding @1
  478. | transfer_encoding @1
  479. | content_length @1
  480. | content_type @1
  481. | last_modified @1
  482. | refresh @1
  483. | content_range @1;
  484. response_header = message_header $0
  485. | auth @1
  486. | accept_ranges @1
  487. | location @1
  488. | x_robots_tag @1
  489. | rel_canonical @1
  490. | hreflang @1
  491. | squid_error @1
  492. | retry_after_withdate @1
  493. | retry_after_withdelta @1;
  494. request_header = message_header $0
  495. | from_header @1
  496. | host_header @1
  497. | user_agent_header @1
  498. | sourcename_header @1
  499. | requesttype_header @1
  500. | langregion_header @1
  501. | fetchoptions_header @1
  502. | if_modified_since @1
  503. | request_cache_control @1
  504. | response_timeout @1
  505. | request_priority @1;
  506. ################# main ############################
  507. action accepted { lastchar = (char*)fpc; return 2; }
  508. main := ((response_status_line ('\r'? response_header)*)
  509. | (request_status_line ('\r' ? request_header)*))
  510. eol @accepted;
  511. }%%
  512. %% write data;
  513. int THttpHeaderParser::execute(unsigned char *inBuf, int len) {
  514. const unsigned char *p = inBuf;
  515. const unsigned char *pe = p + len;
  516. %% write exec;
  517. if (cs == http_header_parser_error)
  518. return -1;
  519. else if (cs == http_header_parser_first_final)
  520. return 0;
  521. else
  522. return 1;
  523. }
  524. void THttpHeaderParser::init() {
  525. %% write init;
  526. }
  527. %%{
  528. machine http_chunk_parser;
  529. alphtype unsigned char;
  530. action clear_hex { cnt64 = 0; }
  531. action update_hex { cnt64 = 16 * cnt64 + X(fc); if(cnt64 > Max<int>()) return -2; }
  532. action set_chunk { chunk_length = static_cast<int>(cnt64); }
  533. action accepted { lastchar = (char*)fpc; return 2; }
  534. eol = '\r'? '\n';
  535. ws = [ \t];
  536. sp = ' ';
  537. lw = '\r'? '\n'? ws;
  538. separator = [()<>@,;:\\"/\[\]?={}];
  539. token_char = [!-~] - separator; # http tokens chars
  540. url_char = [!-~] - ["<>\[\]\\^`{}|]; # uric chars
  541. text_char = ws | 33..127 | 160..255;
  542. lws = lw*;
  543. eoh = lws eol;
  544. token = token_char+;
  545. text = (text_char | lw)*;
  546. def = lws ':' lws;
  547. hex = (xdigit+) >clear_hex $update_hex;
  548. quoted_string = '"' ((text_char - '"') $0 | '\\"' @1)* '"';
  549. chunk_ext_val = token | quoted_string;
  550. chunk_ext_name = token;
  551. chunk_extension = ws* (';' chunk_ext_name ws* '=' ws* chunk_ext_val ws*)*;
  552. entity_header = token def text eoh;
  553. trailer = entity_header*;
  554. chunk = (hex - '0'+) chunk_extension? %set_chunk;
  555. last_chunk = '0'+ chunk_extension? eol trailer;
  556. main := eol (chunk $0 | last_chunk @1) eol @accepted;
  557. }%%
  558. %% write data;
  559. int THttpChunkParser::execute(unsigned char *inBuf, int len) {
  560. const unsigned char *p = inBuf;
  561. const unsigned char *pe = p + len;
  562. %% write exec;
  563. if (cs == http_chunk_parser_error)
  564. return -1;
  565. else if (cs == http_chunk_parser_first_final)
  566. return 0;
  567. else
  568. return 1;
  569. }
  570. void THttpChunkParser::init() {
  571. chunk_length = 0;
  572. %% write init;
  573. }