httpparser.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. #pragma once
  2. #include "httpfsm.h"
  3. #include "httpheader.h"
  4. #include <library/cpp/mime/types/mime.h>
  5. #include <util/system/yassert.h>
  6. #include <library/cpp/http/misc/httpcodes.h>
  7. template <size_t headermax = 100 << 10, size_t bodymax = 1 << 20>
  8. struct TFakeCheck {
  9. bool Check(THttpHeader* /*header*/) {
  10. return false;
  11. }
  12. void CheckDocPart(void* /*buf*/, size_t /*len*/, THttpHeader* /*header*/) {
  13. } //for every part of DocumentBody will be called
  14. void CheckEndDoc(THttpHeader* /*header*/) {
  15. }
  16. size_t GetMaxHeaderSize() {
  17. return headermax;
  18. }
  19. size_t GetMaxBodySize(THttpHeader*) {
  20. return bodymax;
  21. }
  22. };
  23. class THttpParserBase {
  24. public:
  25. enum States {
  26. hp_error,
  27. hp_eof,
  28. hp_in_header,
  29. hp_read_alive,
  30. hp_read_closed,
  31. hp_begin_chunk_header,
  32. hp_chunk_header,
  33. hp_read_chunk
  34. };
  35. States GetState() {
  36. return State;
  37. }
  38. void setAssumeConnectionClosed(int value) {
  39. AssumeConnectionClosed = value;
  40. }
  41. THttpHeader* GetHttpHeader() const {
  42. return Header;
  43. }
  44. protected:
  45. int CheckHeaders() {
  46. if (Header->http_status < HTTP_OK || Header->http_status == HTTP_NO_CONTENT || Header->http_status == HTTP_NOT_MODIFIED) {
  47. Header->content_length = 0;
  48. Header->transfer_chunked = 0;
  49. }
  50. if (Header->transfer_chunked < -1) {
  51. Header->error = HTTP_BAD_ENCODING;
  52. return 1;
  53. } else if (Header->transfer_chunked == -1) {
  54. Header->transfer_chunked = 0;
  55. }
  56. if (!Header->transfer_chunked && Header->content_length < -1) {
  57. Header->error = HTTP_BAD_CONTENT_LENGTH;
  58. return 1;
  59. }
  60. if (Header->http_status == HTTP_OK) {
  61. if (Header->compression_method != HTTP_COMPRESSION_UNSET &&
  62. Header->compression_method != HTTP_COMPRESSION_IDENTITY &&
  63. Header->compression_method != HTTP_COMPRESSION_GZIP &&
  64. Header->compression_method != HTTP_COMPRESSION_DEFLATE)
  65. {
  66. Header->error = HTTP_BAD_CONTENT_ENCODING;
  67. return 1;
  68. }
  69. }
  70. if (Header->connection_closed == -1)
  71. Header->connection_closed = (Header->http_minor == 0 ||
  72. AssumeConnectionClosed);
  73. if (!Header->transfer_chunked && !Header->connection_closed && Header->content_length < 0 && !HeadRequest) {
  74. Header->error = HTTP_LENGTH_UNKNOWN;
  75. return 1;
  76. }
  77. if (Header->http_time < 0)
  78. Header->http_time = 0;
  79. if (Header->mime_type < 0)
  80. Header->mime_type = MIME_UNKNOWN;
  81. return 0;
  82. }
  83. THttpHeaderParser HeaderParser;
  84. THttpChunkParser ChunkParser;
  85. States State;
  86. long ChunkSize;
  87. THttpHeader* Header;
  88. int AssumeConnectionClosed;
  89. bool HeadRequest;
  90. };
  91. template <int isReader, typename TCheck = TFakeCheck<>>
  92. class THttpParserGeneric: public THttpParserBase, public TCheck {
  93. protected:
  94. long ParseGeneric(void*& buf, long& size) {
  95. if (!size) {
  96. switch (State) {
  97. case hp_error:
  98. case hp_eof:
  99. break;
  100. case hp_read_closed:
  101. State = hp_eof;
  102. break;
  103. case hp_in_header:
  104. Header->error = HTTP_HEADER_EOF;
  105. State = hp_error;
  106. break;
  107. case hp_read_alive:
  108. case hp_read_chunk:
  109. if (HeadRequest)
  110. State = hp_eof;
  111. else {
  112. Header->error = HTTP_MESSAGE_EOF;
  113. State = hp_error;
  114. }
  115. break;
  116. case hp_begin_chunk_header:
  117. case hp_chunk_header:
  118. if (HeadRequest)
  119. State = hp_eof;
  120. else {
  121. Header->error = HTTP_CHUNK_EOF;
  122. State = hp_error;
  123. }
  124. break;
  125. }
  126. return 0;
  127. }
  128. while (size) {
  129. int ret;
  130. switch (State) {
  131. case hp_error:
  132. return 0;
  133. case hp_eof:
  134. return 0;
  135. case hp_in_header:
  136. if ((ret = HeaderParser.Execute(buf, size)) < 0) {
  137. Header->error = HTTP_BAD_HEADER_STRING;
  138. State = hp_error;
  139. return 0;
  140. } else if (ret == 2) {
  141. Header->header_size += i32(HeaderParser.lastchar - (char*)buf + 1);
  142. size -= long(HeaderParser.lastchar - (char*)buf + 1);
  143. buf = HeaderParser.lastchar + 1;
  144. State = CheckHeaders() ? hp_error
  145. : Header->transfer_chunked ? hp_begin_chunk_header
  146. : Header->content_length == 0 ? hp_eof
  147. : Header->content_length > 0 ? hp_read_alive
  148. : hp_read_closed;
  149. if (State == hp_begin_chunk_header) {
  150. // unget \n for chunk reader
  151. buf = (char*)buf - 1;
  152. size++;
  153. }
  154. if (isReader)
  155. return size;
  156. } else {
  157. Header->header_size += size;
  158. size = 0;
  159. }
  160. break;
  161. case hp_read_alive:
  162. Header->entity_size += size;
  163. if (Header->entity_size >= Header->content_length) {
  164. State = hp_eof;
  165. }
  166. TCheck::CheckDocPart(buf, size, Header);
  167. if (isReader)
  168. return size;
  169. size = 0;
  170. break;
  171. case hp_read_closed:
  172. Header->entity_size += size;
  173. TCheck::CheckDocPart(buf, size, Header);
  174. if (isReader)
  175. return size;
  176. size = 0;
  177. break;
  178. case hp_begin_chunk_header:
  179. ChunkParser.Init();
  180. State = hp_chunk_header;
  181. [[fallthrough]];
  182. case hp_chunk_header:
  183. if ((ret = ChunkParser.Execute(buf, size)) < 0) {
  184. Header->error = i16(ret == -2 ? HTTP_CHUNK_TOO_LARGE : HTTP_BAD_CHUNK);
  185. State = hp_error;
  186. return 0;
  187. } else if (ret == 2) {
  188. Header->entity_size += i32(ChunkParser.lastchar - (char*)buf + 1);
  189. size -= long(ChunkParser.lastchar - (char*)buf + 1);
  190. buf = ChunkParser.lastchar + 1;
  191. ChunkSize = ChunkParser.chunk_length;
  192. Y_ASSERT(ChunkSize >= 0);
  193. State = ChunkSize ? hp_read_chunk : hp_eof;
  194. } else {
  195. Header->entity_size += size;
  196. size = 0;
  197. }
  198. break;
  199. case hp_read_chunk:
  200. if (size >= ChunkSize) {
  201. Header->entity_size += ChunkSize;
  202. State = hp_begin_chunk_header;
  203. TCheck::CheckDocPart(buf, ChunkSize, Header);
  204. if (isReader)
  205. return ChunkSize;
  206. size -= ChunkSize;
  207. buf = (char*)buf + ChunkSize;
  208. } else {
  209. Header->entity_size += size;
  210. ChunkSize -= size;
  211. TCheck::CheckDocPart(buf, size, Header);
  212. if (isReader)
  213. return size;
  214. size = 0;
  215. }
  216. break;
  217. }
  218. }
  219. return size;
  220. }
  221. };
  222. template <class TCheck = TFakeCheck<>>
  223. class THttpParser: public THttpParserGeneric<0, TCheck> {
  224. typedef THttpParserGeneric<0, TCheck> TBaseT; //sorry avoiding gcc 3.4.6 BUG!
  225. public:
  226. void Init(THttpHeader* H, bool head_request = false) {
  227. TBaseT::Header = H;
  228. TBaseT::HeaderParser.Init(TBaseT::Header);
  229. TBaseT::State = TBaseT::hp_in_header;
  230. TBaseT::AssumeConnectionClosed = 0;
  231. TBaseT::HeadRequest = head_request;
  232. }
  233. void Parse(void* buf, long size) {
  234. TBaseT::ParseGeneric(buf, size);
  235. }
  236. };
  237. class TMemoReader {
  238. public:
  239. int Init(void* buf, long bufsize) {
  240. Buf = buf;
  241. Bufsize = bufsize;
  242. return 0;
  243. }
  244. long Read(void*& buf) {
  245. Y_ASSERT(Bufsize >= 0);
  246. if (!Bufsize) {
  247. Bufsize = -1;
  248. return 0;
  249. }
  250. buf = Buf;
  251. long ret = Bufsize;
  252. Bufsize = 0;
  253. return ret;
  254. }
  255. protected:
  256. long Bufsize;
  257. void* Buf;
  258. };
  259. template <class Reader>
  260. class THttpReader: public THttpParserGeneric<1>, public Reader {
  261. typedef THttpParserGeneric<1> TBaseT;
  262. public:
  263. using TBaseT::AssumeConnectionClosed;
  264. using TBaseT::Header;
  265. using TBaseT::ParseGeneric;
  266. using TBaseT::State;
  267. int Init(THttpHeader* H, int parsHeader, int assumeConnectionClosed = 0, bool headRequest = false) {
  268. Header = H;
  269. Eoferr = 1;
  270. Size = 0;
  271. AssumeConnectionClosed = assumeConnectionClosed;
  272. HeadRequest = headRequest;
  273. return parsHeader ? ParseHeader() : SkipHeader();
  274. }
  275. long Read(void*& buf) {
  276. long Chunk;
  277. do {
  278. if (!Size) {
  279. if (Eoferr != 1)
  280. return Eoferr;
  281. else if ((Size = (long)Reader::Read(Ptr)) < 0) {
  282. Header->error = HTTP_CONNECTION_LOST;
  283. return Eoferr = -1;
  284. }
  285. }
  286. Chunk = ParseGeneric(Ptr, Size);
  287. buf = Ptr;
  288. Ptr = (char*)Ptr + Chunk;
  289. Size -= Chunk;
  290. if (State == hp_eof) {
  291. Size = 0;
  292. Eoferr = 0;
  293. } else if (State == hp_error)
  294. return Eoferr = -1;
  295. } while (!Chunk);
  296. return Chunk;
  297. }
  298. protected:
  299. int ParseHeader() {
  300. HeaderParser.Init(Header);
  301. State = hp_in_header;
  302. while (State == hp_in_header) {
  303. if ((Size = (long)Reader::Read(Ptr)) < 0)
  304. return Eoferr = -1;
  305. ParseGeneric(Ptr, Size);
  306. }
  307. if (State == hp_error)
  308. return Eoferr = -1;
  309. if (State == hp_eof)
  310. Eoferr = 0;
  311. return 0;
  312. }
  313. int SkipHeader() {
  314. long hdrsize = Header->header_size;
  315. while (hdrsize) {
  316. if ((Size = (long)Reader::Read(Ptr)) <= 0)
  317. return Eoferr = -1;
  318. if (Size >= hdrsize) {
  319. Size -= hdrsize;
  320. Ptr = (char*)Ptr + hdrsize;
  321. break;
  322. }
  323. hdrsize -= Size;
  324. }
  325. State = Header->transfer_chunked ? hp_begin_chunk_header
  326. : Header->content_length == 0 ? hp_eof
  327. : Header->content_length > 0 ? hp_read_alive
  328. : hp_read_closed;
  329. Header->entity_size = 0;
  330. if (State == hp_eof)
  331. Eoferr = 0;
  332. else if (State == hp_begin_chunk_header) {
  333. // unget \n for chunk reader
  334. Ptr = (char*)Ptr - 1;
  335. ++Size;
  336. }
  337. return 0;
  338. }
  339. void* Ptr;
  340. long Size;
  341. int Eoferr;
  342. };