h1_decoder.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/common/string.h>
  6. #include <aws/http/private/h1_decoder.h>
  7. #include <aws/http/private/strutil.h>
  8. #include <aws/http/status_code.h>
  9. #include <aws/io/logging.h>
  10. AWS_STATIC_STRING_FROM_LITERAL(s_transfer_coding_chunked, "chunked");
  11. AWS_STATIC_STRING_FROM_LITERAL(s_transfer_coding_compress, "compress");
  12. AWS_STATIC_STRING_FROM_LITERAL(s_transfer_coding_x_compress, "x-compress");
  13. AWS_STATIC_STRING_FROM_LITERAL(s_transfer_coding_deflate, "deflate");
  14. AWS_STATIC_STRING_FROM_LITERAL(s_transfer_coding_gzip, "gzip");
  15. AWS_STATIC_STRING_FROM_LITERAL(s_transfer_coding_x_gzip, "x-gzip");
  16. /* Decoder runs a state machine.
  17. * Each state consumes data until it sets the next state.
  18. * A common state is the "line state", which handles consuming one line ending in CRLF
  19. * and feeding the line to a linestate_fn, which should process data and set the next state.
  20. */
  21. typedef int(state_fn)(struct aws_h1_decoder *decoder, struct aws_byte_cursor *input);
  22. typedef int(linestate_fn)(struct aws_h1_decoder *decoder, struct aws_byte_cursor input);
  23. struct aws_h1_decoder {
  24. /* Implementation data. */
  25. struct aws_allocator *alloc;
  26. struct aws_byte_buf scratch_space;
  27. state_fn *run_state;
  28. linestate_fn *process_line;
  29. int transfer_encoding;
  30. uint64_t content_processed;
  31. uint64_t content_length;
  32. uint64_t chunk_processed;
  33. uint64_t chunk_size;
  34. bool doing_trailers;
  35. bool is_done;
  36. bool body_headers_ignored;
  37. bool body_headers_forbidden;
  38. enum aws_http_header_block header_block;
  39. const void *logging_id;
  40. /* User callbacks and settings. */
  41. struct aws_h1_decoder_vtable vtable;
  42. bool is_decoding_requests;
  43. void *user_data;
  44. };
  45. static int s_linestate_request(struct aws_h1_decoder *decoder, struct aws_byte_cursor input);
  46. static int s_linestate_response(struct aws_h1_decoder *decoder, struct aws_byte_cursor input);
  47. static int s_linestate_header(struct aws_h1_decoder *decoder, struct aws_byte_cursor input);
  48. static int s_linestate_chunk_size(struct aws_h1_decoder *decoder, struct aws_byte_cursor input);
  49. static bool s_scan_for_crlf(struct aws_h1_decoder *decoder, struct aws_byte_cursor input, size_t *bytes_processed) {
  50. AWS_ASSERT(input.len > 0);
  51. /* In a loop, scan for "\n", then look one char back for "\r" */
  52. uint8_t *ptr = input.ptr;
  53. uint8_t *end = input.ptr + input.len;
  54. while (ptr != end) {
  55. uint8_t *newline = (uint8_t *)memchr(ptr, '\n', end - ptr);
  56. if (!newline) {
  57. break;
  58. }
  59. uint8_t prev_char;
  60. if (newline == input.ptr) {
  61. /* If "\n" is first character check scratch_space for previous character */
  62. if (decoder->scratch_space.len > 0) {
  63. prev_char = decoder->scratch_space.buffer[decoder->scratch_space.len - 1];
  64. } else {
  65. prev_char = 0;
  66. }
  67. } else {
  68. prev_char = *(newline - 1);
  69. }
  70. if (prev_char == '\r') {
  71. *bytes_processed = 1 + (newline - input.ptr);
  72. return true;
  73. }
  74. ptr = newline + 1;
  75. }
  76. *bytes_processed = input.len;
  77. return false;
  78. }
  79. /* This state consumes an entire line, then calls a linestate_fn to process the line. */
  80. static int s_state_getline(struct aws_h1_decoder *decoder, struct aws_byte_cursor *input) {
  81. /* If preceding runs of this state failed to find CRLF, their data is stored in the scratch_space
  82. * and new data needs to be combined with the old data for processing. */
  83. bool has_prev_data = decoder->scratch_space.len;
  84. size_t line_length = 0;
  85. bool found_crlf = s_scan_for_crlf(decoder, *input, &line_length);
  86. /* Found end of line! Run the line processor on it */
  87. struct aws_byte_cursor line = aws_byte_cursor_advance(input, line_length);
  88. bool use_scratch = !found_crlf | has_prev_data;
  89. if (AWS_UNLIKELY(use_scratch)) {
  90. if (aws_byte_buf_append_dynamic(&decoder->scratch_space, &line)) {
  91. AWS_LOGF_ERROR(
  92. AWS_LS_HTTP_STREAM,
  93. "id=%p: Internal buffer write failed with error code %d (%s)",
  94. decoder->logging_id,
  95. aws_last_error(),
  96. aws_error_name(aws_last_error()));
  97. return AWS_OP_ERR;
  98. }
  99. /* Line is actually the entire scratch buffer now */
  100. line = aws_byte_cursor_from_buf(&decoder->scratch_space);
  101. }
  102. if (AWS_LIKELY(found_crlf)) {
  103. /* Backup so "\r\n" is not included. */
  104. /* RFC-7230 section 3 Message Format */
  105. AWS_ASSERT(line.len >= 2);
  106. line.len -= 2;
  107. return decoder->process_line(decoder, line);
  108. }
  109. /* Didn't find crlf, we'll continue scanning when more data comes in */
  110. return AWS_OP_SUCCESS;
  111. }
  112. static int s_cursor_split_impl(
  113. struct aws_byte_cursor input,
  114. char split_on,
  115. struct aws_byte_cursor *cursor_array,
  116. size_t num_cursors,
  117. bool error_if_more_splits_possible) {
  118. struct aws_byte_cursor split;
  119. AWS_ZERO_STRUCT(split);
  120. for (size_t i = 0; i < num_cursors; ++i) {
  121. if (!aws_byte_cursor_next_split(&input, split_on, &split)) {
  122. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  123. }
  124. cursor_array[i] = split;
  125. }
  126. if (error_if_more_splits_possible) {
  127. if (aws_byte_cursor_next_split(&input, split_on, &split)) {
  128. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  129. }
  130. } else {
  131. /* Otherwise, the last cursor will contain the remainder of the string */
  132. struct aws_byte_cursor *last_cursor = &cursor_array[num_cursors - 1];
  133. last_cursor->len = (input.ptr + input.len) - last_cursor->ptr;
  134. }
  135. return AWS_OP_SUCCESS;
  136. }
  137. /* Final cursor contains remainder of input. */
  138. static int s_cursor_split_first_n_times(
  139. struct aws_byte_cursor input,
  140. char split_on,
  141. struct aws_byte_cursor *cursor_array,
  142. size_t num_cursors) {
  143. return s_cursor_split_impl(input, split_on, cursor_array, num_cursors, false);
  144. }
  145. /* Error if input could have been split more times */
  146. static int s_cursor_split_exactly_n_times(
  147. struct aws_byte_cursor input,
  148. char split_on,
  149. struct aws_byte_cursor *cursor_array,
  150. size_t num_cursors) {
  151. return s_cursor_split_impl(input, split_on, cursor_array, num_cursors, true);
  152. }
  153. static void s_set_state(struct aws_h1_decoder *decoder, state_fn *state) {
  154. decoder->scratch_space.len = 0;
  155. decoder->run_state = state;
  156. decoder->process_line = NULL;
  157. }
  158. /* Set next state to capture a full line, then call the specified linestate_fn on it */
  159. static void s_set_line_state(struct aws_h1_decoder *decoder, linestate_fn *line_processor) {
  160. s_set_state(decoder, s_state_getline);
  161. decoder->process_line = line_processor;
  162. }
  163. static int s_mark_done(struct aws_h1_decoder *decoder) {
  164. decoder->is_done = true;
  165. return decoder->vtable.on_done(decoder->user_data);
  166. }
  167. /* Reset state, in preparation for processing a new message */
  168. static void s_reset_state(struct aws_h1_decoder *decoder) {
  169. if (decoder->is_decoding_requests) {
  170. s_set_line_state(decoder, s_linestate_request);
  171. } else {
  172. s_set_line_state(decoder, s_linestate_response);
  173. }
  174. decoder->transfer_encoding = 0;
  175. decoder->content_processed = 0;
  176. decoder->content_length = 0;
  177. decoder->chunk_processed = 0;
  178. decoder->chunk_size = 0;
  179. decoder->doing_trailers = false;
  180. decoder->is_done = false;
  181. decoder->body_headers_ignored = false;
  182. decoder->body_headers_forbidden = false;
  183. /* set to normal by default */
  184. decoder->header_block = AWS_HTTP_HEADER_BLOCK_MAIN;
  185. }
  186. static int s_state_unchunked_body(struct aws_h1_decoder *decoder, struct aws_byte_cursor *input) {
  187. size_t processed_bytes = 0;
  188. AWS_FATAL_ASSERT(decoder->content_processed < decoder->content_length); /* shouldn't be possible */
  189. if (input->len > (decoder->content_length - decoder->content_processed)) {
  190. processed_bytes = (size_t)(decoder->content_length - decoder->content_processed);
  191. } else {
  192. processed_bytes = input->len;
  193. }
  194. decoder->content_processed += processed_bytes;
  195. bool finished = decoder->content_processed == decoder->content_length;
  196. struct aws_byte_cursor body = aws_byte_cursor_advance(input, processed_bytes);
  197. int err = decoder->vtable.on_body(&body, finished, decoder->user_data);
  198. if (err) {
  199. return AWS_OP_ERR;
  200. }
  201. if (AWS_LIKELY(finished)) {
  202. err = s_mark_done(decoder);
  203. if (err) {
  204. return AWS_OP_ERR;
  205. }
  206. }
  207. return AWS_OP_SUCCESS;
  208. }
  209. static int s_linestate_chunk_terminator(struct aws_h1_decoder *decoder, struct aws_byte_cursor input) {
  210. /* Expecting CRLF at end of each chunk */
  211. /* RFC-7230 section 4.1 Chunked Transfer Encoding */
  212. if (AWS_UNLIKELY(input.len != 0)) {
  213. AWS_LOGF_ERROR(
  214. AWS_LS_HTTP_STREAM, "id=%p: Incoming chunk is invalid, does not end with CRLF.", decoder->logging_id);
  215. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  216. }
  217. s_set_line_state(decoder, s_linestate_chunk_size);
  218. return AWS_OP_SUCCESS;
  219. }
  220. static int s_state_chunk(struct aws_h1_decoder *decoder, struct aws_byte_cursor *input) {
  221. size_t processed_bytes = 0;
  222. AWS_ASSERT(decoder->chunk_processed < decoder->chunk_size);
  223. if (input->len > (decoder->chunk_size - decoder->chunk_processed)) {
  224. processed_bytes = (size_t)(decoder->chunk_size - decoder->chunk_processed);
  225. } else {
  226. processed_bytes = input->len;
  227. }
  228. decoder->chunk_processed += processed_bytes;
  229. bool finished = decoder->chunk_processed == decoder->chunk_size;
  230. struct aws_byte_cursor body = aws_byte_cursor_advance(input, processed_bytes);
  231. int err = decoder->vtable.on_body(&body, false, decoder->user_data);
  232. if (err) {
  233. return AWS_OP_ERR;
  234. }
  235. if (AWS_LIKELY(finished)) {
  236. s_set_line_state(decoder, s_linestate_chunk_terminator);
  237. }
  238. return AWS_OP_SUCCESS;
  239. }
  240. static int s_linestate_chunk_size(struct aws_h1_decoder *decoder, struct aws_byte_cursor input) {
  241. struct aws_byte_cursor size;
  242. AWS_ZERO_STRUCT(size);
  243. if (!aws_byte_cursor_next_split(&input, ';', &size)) {
  244. AWS_LOGF_ERROR(
  245. AWS_LS_HTTP_STREAM, "id=%p: Incoming chunk is invalid, first line is malformed.", decoder->logging_id);
  246. AWS_LOGF_DEBUG(
  247. AWS_LS_HTTP_STREAM,
  248. "id=%p: Bad chunk line is: '" PRInSTR "'",
  249. decoder->logging_id,
  250. AWS_BYTE_CURSOR_PRI(input));
  251. return AWS_OP_ERR;
  252. }
  253. int err = aws_byte_cursor_utf8_parse_u64_hex(size, &decoder->chunk_size);
  254. if (err) {
  255. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Failed to parse size of incoming chunk.", decoder->logging_id);
  256. AWS_LOGF_DEBUG(
  257. AWS_LS_HTTP_STREAM,
  258. "id=%p: Bad chunk size is: '" PRInSTR "'",
  259. decoder->logging_id,
  260. AWS_BYTE_CURSOR_PRI(size));
  261. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  262. }
  263. decoder->chunk_processed = 0;
  264. /* Empty chunk signifies all chunks have been read. */
  265. if (AWS_UNLIKELY(decoder->chunk_size == 0)) {
  266. struct aws_byte_cursor cursor;
  267. cursor.ptr = NULL;
  268. cursor.len = 0;
  269. err = decoder->vtable.on_body(&cursor, true, decoder->user_data);
  270. if (err) {
  271. return AWS_OP_ERR;
  272. }
  273. /* Expected empty newline and end of message. */
  274. decoder->doing_trailers = true;
  275. s_set_line_state(decoder, s_linestate_header);
  276. return AWS_OP_SUCCESS;
  277. }
  278. /* Skip all chunk extensions, as they are optional. */
  279. /* RFC-7230 section 4.1.1 Chunk Extensions */
  280. s_set_state(decoder, s_state_chunk);
  281. return AWS_OP_SUCCESS;
  282. }
  283. static int s_linestate_header(struct aws_h1_decoder *decoder, struct aws_byte_cursor input) {
  284. int err;
  285. /* The \r\n was just processed by `s_state_getline`. */
  286. /* Empty line signifies end of headers, and beginning of body or end of trailers. */
  287. /* RFC-7230 section 3 Message Format */
  288. if (input.len == 0) {
  289. if (AWS_LIKELY(!decoder->doing_trailers)) {
  290. if (decoder->body_headers_ignored) {
  291. err = s_mark_done(decoder);
  292. if (err) {
  293. return AWS_OP_ERR;
  294. }
  295. } else if (decoder->transfer_encoding & AWS_HTTP_TRANSFER_ENCODING_CHUNKED) {
  296. s_set_line_state(decoder, s_linestate_chunk_size);
  297. } else if (decoder->content_length > 0) {
  298. s_set_state(decoder, s_state_unchunked_body);
  299. } else {
  300. err = s_mark_done(decoder);
  301. if (err) {
  302. return AWS_OP_ERR;
  303. }
  304. }
  305. } else {
  306. /* Empty line means end of message. */
  307. err = s_mark_done(decoder);
  308. if (err) {
  309. return AWS_OP_ERR;
  310. }
  311. }
  312. return AWS_OP_SUCCESS;
  313. }
  314. /* Each header field consists of a case-insensitive field name followed by a colon (":"),
  315. * optional leading whitespace, the field value, and optional trailing whitespace.
  316. * RFC-7230 3.2 */
  317. struct aws_byte_cursor splits[2];
  318. err = s_cursor_split_first_n_times(input, ':', splits, 2); /* value may contain more colons */
  319. if (err) {
  320. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Invalid incoming header, missing colon.", decoder->logging_id);
  321. AWS_LOGF_DEBUG(
  322. AWS_LS_HTTP_STREAM, "id=%p: Bad header is: '" PRInSTR "'", decoder->logging_id, AWS_BYTE_CURSOR_PRI(input));
  323. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  324. }
  325. struct aws_byte_cursor name = splits[0];
  326. if (!aws_strutil_is_http_token(name)) {
  327. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Invalid incoming header, bad name.", decoder->logging_id);
  328. AWS_LOGF_DEBUG(
  329. AWS_LS_HTTP_STREAM, "id=%p: Bad header is: '" PRInSTR "'", decoder->logging_id, AWS_BYTE_CURSOR_PRI(input));
  330. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  331. }
  332. struct aws_byte_cursor value = aws_strutil_trim_http_whitespace(splits[1]);
  333. if (!aws_strutil_is_http_field_value(value)) {
  334. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Invalid incoming header, bad value.", decoder->logging_id);
  335. AWS_LOGF_DEBUG(
  336. AWS_LS_HTTP_STREAM, "id=%p: Bad header is: '" PRInSTR "'", decoder->logging_id, AWS_BYTE_CURSOR_PRI(input));
  337. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  338. }
  339. struct aws_h1_decoded_header header;
  340. header.name = aws_http_str_to_header_name(name);
  341. header.name_data = name;
  342. header.value_data = value;
  343. header.data = input;
  344. switch (header.name) {
  345. case AWS_HTTP_HEADER_CONTENT_LENGTH:
  346. if (decoder->transfer_encoding) {
  347. AWS_LOGF_ERROR(
  348. AWS_LS_HTTP_STREAM,
  349. "id=%p: Incoming headers for both content-length and transfer-encoding received. This is illegal.",
  350. decoder->logging_id);
  351. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  352. }
  353. if (aws_byte_cursor_utf8_parse_u64(header.value_data, &decoder->content_length)) {
  354. AWS_LOGF_ERROR(
  355. AWS_LS_HTTP_STREAM,
  356. "id=%p: Incoming content-length header has invalid value.",
  357. decoder->logging_id);
  358. AWS_LOGF_DEBUG(
  359. AWS_LS_HTTP_STREAM,
  360. "id=%p: Bad content-length value is: '" PRInSTR "'",
  361. decoder->logging_id,
  362. AWS_BYTE_CURSOR_PRI(header.value_data));
  363. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  364. }
  365. if (decoder->body_headers_forbidden && decoder->content_length != 0) {
  366. AWS_LOGF_ERROR(
  367. AWS_LS_HTTP_STREAM,
  368. "id=%p: Incoming headers for content-length received, but it is illegal for this message to have a "
  369. "body",
  370. decoder->logging_id);
  371. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  372. }
  373. break;
  374. case AWS_HTTP_HEADER_TRANSFER_ENCODING: {
  375. if (decoder->content_length) {
  376. AWS_LOGF_ERROR(
  377. AWS_LS_HTTP_STREAM,
  378. "id=%p: Incoming headers for both content-length and transfer-encoding received. This is illegal.",
  379. decoder->logging_id);
  380. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  381. }
  382. if (decoder->body_headers_forbidden) {
  383. AWS_LOGF_ERROR(
  384. AWS_LS_HTTP_STREAM,
  385. "id=%p: Incoming headers for transfer-encoding received, but it is illegal for this message to "
  386. "have a body",
  387. decoder->logging_id);
  388. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  389. }
  390. /* RFC-7230 section 3.3.1 Transfer-Encoding */
  391. /* RFC-7230 section 4.2 Compression Codings */
  392. /* Note that it's possible for multiple Transfer-Encoding headers to exist, in which case the values
  393. * should be appended with those from any previously encountered Transfer-Encoding headers. */
  394. struct aws_byte_cursor split;
  395. AWS_ZERO_STRUCT(split);
  396. while (aws_byte_cursor_next_split(&header.value_data, ',', &split)) {
  397. struct aws_byte_cursor coding = aws_strutil_trim_http_whitespace(split);
  398. int prev_flags = decoder->transfer_encoding;
  399. if (aws_string_eq_byte_cursor_ignore_case(s_transfer_coding_chunked, &coding)) {
  400. decoder->transfer_encoding |= AWS_HTTP_TRANSFER_ENCODING_CHUNKED;
  401. } else if (
  402. aws_string_eq_byte_cursor_ignore_case(s_transfer_coding_compress, &coding) ||
  403. aws_string_eq_byte_cursor_ignore_case(s_transfer_coding_x_compress, &coding)) {
  404. /* A recipient SHOULD consider "x-compress" to be equivalent to "compress". RFC-7230 4.2.1 */
  405. decoder->transfer_encoding |= AWS_HTTP_TRANSFER_ENCODING_DEPRECATED_COMPRESS;
  406. } else if (aws_string_eq_byte_cursor_ignore_case(s_transfer_coding_deflate, &coding)) {
  407. decoder->transfer_encoding |= AWS_HTTP_TRANSFER_ENCODING_DEFLATE;
  408. } else if (
  409. aws_string_eq_byte_cursor_ignore_case(s_transfer_coding_gzip, &coding) ||
  410. aws_string_eq_byte_cursor_ignore_case(s_transfer_coding_x_gzip, &coding)) {
  411. /* A recipient SHOULD consider "x-gzip" to be equivalent to "gzip". RFC-7230 4.2.3 */
  412. decoder->transfer_encoding |= AWS_HTTP_TRANSFER_ENCODING_GZIP;
  413. } else if (coding.len > 0) {
  414. AWS_LOGF_ERROR(
  415. AWS_LS_HTTP_STREAM,
  416. "id=%p: Incoming transfer-encoding header lists unrecognized coding.",
  417. decoder->logging_id);
  418. AWS_LOGF_DEBUG(
  419. AWS_LS_HTTP_STREAM,
  420. "id=%p: Unrecognized coding is: '" PRInSTR "'",
  421. decoder->logging_id,
  422. AWS_BYTE_CURSOR_PRI(coding));
  423. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  424. }
  425. /* If any transfer coding other than chunked is applied to a request payload body, the sender MUST
  426. * apply chunked as the final transfer coding to ensure that the message is properly framed.
  427. * RFC-7230 3.3.1 */
  428. if ((prev_flags & AWS_HTTP_TRANSFER_ENCODING_CHUNKED) && (decoder->transfer_encoding != prev_flags)) {
  429. AWS_LOGF_ERROR(
  430. AWS_LS_HTTP_STREAM,
  431. "id=%p: Incoming transfer-encoding header lists a coding after 'chunked', this is illegal.",
  432. decoder->logging_id);
  433. AWS_LOGF_DEBUG(
  434. AWS_LS_HTTP_STREAM,
  435. "id=%p: Misplaced coding is '" PRInSTR "'",
  436. decoder->logging_id,
  437. AWS_BYTE_CURSOR_PRI(coding));
  438. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  439. }
  440. }
  441. /* TODO: deal with body of indeterminate length, marking it as successful when connection is closed:
  442. *
  443. * A response that has neither chunked transfer coding nor Content-Length is terminated by closure of
  444. * the connection and, thus, is considered complete regardless of the number of message body octets
  445. * received, provided that the header section was received intact.
  446. * RFC-7230 3.4 */
  447. } break;
  448. default:
  449. break;
  450. }
  451. err = decoder->vtable.on_header(&header, decoder->user_data);
  452. if (err) {
  453. return AWS_OP_ERR;
  454. }
  455. s_set_line_state(decoder, s_linestate_header);
  456. return AWS_OP_SUCCESS;
  457. }
  458. static int s_linestate_request(struct aws_h1_decoder *decoder, struct aws_byte_cursor input) {
  459. struct aws_byte_cursor cursors[3];
  460. int err = s_cursor_split_exactly_n_times(input, ' ', cursors, 3); /* extra spaces not allowed */
  461. if (err) {
  462. AWS_LOGF_ERROR(
  463. AWS_LS_HTTP_STREAM, "id=%p: Incoming request line has wrong number of spaces.", decoder->logging_id);
  464. AWS_LOGF_DEBUG(
  465. AWS_LS_HTTP_STREAM,
  466. "id=%p: Bad request line is: '" PRInSTR "'",
  467. decoder->logging_id,
  468. AWS_BYTE_CURSOR_PRI(input));
  469. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  470. }
  471. for (size_t i = 0; i < AWS_ARRAY_SIZE(cursors); ++i) {
  472. if (cursors[i].len == 0) {
  473. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming request line has empty values.", decoder->logging_id);
  474. AWS_LOGF_DEBUG(
  475. AWS_LS_HTTP_STREAM,
  476. "id=%p: Bad request line is: '" PRInSTR "'",
  477. decoder->logging_id,
  478. AWS_BYTE_CURSOR_PRI(input));
  479. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  480. }
  481. }
  482. struct aws_byte_cursor method = cursors[0];
  483. struct aws_byte_cursor uri = cursors[1];
  484. struct aws_byte_cursor version = cursors[2];
  485. if (!aws_strutil_is_http_token(method)) {
  486. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming request has invalid method.", decoder->logging_id);
  487. AWS_LOGF_DEBUG(
  488. AWS_LS_HTTP_STREAM,
  489. "id=%p: Bad request line is: '" PRInSTR "'",
  490. decoder->logging_id,
  491. AWS_BYTE_CURSOR_PRI(input));
  492. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  493. }
  494. if (!aws_strutil_is_http_request_target(uri)) {
  495. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming request has invalid path.", decoder->logging_id);
  496. AWS_LOGF_DEBUG(
  497. AWS_LS_HTTP_STREAM,
  498. "id=%p: Bad request line is: '" PRInSTR "'",
  499. decoder->logging_id,
  500. AWS_BYTE_CURSOR_PRI(input));
  501. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  502. }
  503. struct aws_byte_cursor version_expected = aws_http_version_to_str(AWS_HTTP_VERSION_1_1);
  504. if (!aws_byte_cursor_eq(&version, &version_expected)) {
  505. AWS_LOGF_ERROR(
  506. AWS_LS_HTTP_STREAM, "id=%p: Incoming request uses unsupported HTTP version.", decoder->logging_id);
  507. AWS_LOGF_DEBUG(
  508. AWS_LS_HTTP_STREAM,
  509. "id=%p: Unsupported version is: '" PRInSTR "'",
  510. decoder->logging_id,
  511. AWS_BYTE_CURSOR_PRI(version));
  512. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  513. }
  514. err = decoder->vtable.on_request(aws_http_str_to_method(method), &method, &uri, decoder->user_data);
  515. if (err) {
  516. return AWS_OP_ERR;
  517. }
  518. s_set_line_state(decoder, s_linestate_header);
  519. return AWS_OP_SUCCESS;
  520. }
  521. static bool s_check_info_response_status_code(int code_val) {
  522. return code_val >= 100 && code_val < 200;
  523. }
  524. static int s_linestate_response(struct aws_h1_decoder *decoder, struct aws_byte_cursor input) {
  525. struct aws_byte_cursor cursors[3];
  526. int err = s_cursor_split_first_n_times(input, ' ', cursors, 3); /* phrase may contain spaces */
  527. if (err) {
  528. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming response status line is invalid.", decoder->logging_id);
  529. AWS_LOGF_DEBUG(
  530. AWS_LS_HTTP_STREAM,
  531. "id=%p: Bad status line is: '" PRInSTR "'",
  532. decoder->logging_id,
  533. AWS_BYTE_CURSOR_PRI(input));
  534. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  535. }
  536. struct aws_byte_cursor version = cursors[0];
  537. struct aws_byte_cursor code = cursors[1];
  538. struct aws_byte_cursor phrase = cursors[2];
  539. struct aws_byte_cursor version_1_1_expected = aws_http_version_to_str(AWS_HTTP_VERSION_1_1);
  540. struct aws_byte_cursor version_1_0_expected = aws_http_version_to_str(AWS_HTTP_VERSION_1_0);
  541. if (!aws_byte_cursor_eq(&version, &version_1_1_expected) && !aws_byte_cursor_eq(&version, &version_1_0_expected)) {
  542. AWS_LOGF_ERROR(
  543. AWS_LS_HTTP_STREAM, "id=%p: Incoming response uses unsupported HTTP version.", decoder->logging_id);
  544. AWS_LOGF_DEBUG(
  545. AWS_LS_HTTP_STREAM,
  546. "id=%p: Unsupported version is: '" PRInSTR "'",
  547. decoder->logging_id,
  548. AWS_BYTE_CURSOR_PRI(version));
  549. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  550. }
  551. /* Validate phrase */
  552. if (!aws_strutil_is_http_reason_phrase(phrase)) {
  553. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming response has invalid reason phrase.", decoder->logging_id);
  554. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  555. }
  556. /* Status-code is a 3-digit integer. RFC7230 section 3.1.2 */
  557. uint64_t code_val_u64;
  558. err = aws_byte_cursor_utf8_parse_u64(code, &code_val_u64);
  559. if (err || code.len != 3 || code_val_u64 > 999) {
  560. AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming response has invalid status code.", decoder->logging_id);
  561. AWS_LOGF_DEBUG(
  562. AWS_LS_HTTP_STREAM,
  563. "id=%p: Bad status code is: '" PRInSTR "'",
  564. decoder->logging_id,
  565. AWS_BYTE_CURSOR_PRI(code));
  566. return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
  567. }
  568. int code_val = (int)code_val_u64;
  569. /* RFC-7230 section 3.3 Message Body */
  570. decoder->body_headers_ignored |= code_val == AWS_HTTP_STATUS_CODE_304_NOT_MODIFIED;
  571. decoder->body_headers_forbidden = code_val == AWS_HTTP_STATUS_CODE_204_NO_CONTENT || code_val / 100 == 1;
  572. if (s_check_info_response_status_code(code_val)) {
  573. decoder->header_block = AWS_HTTP_HEADER_BLOCK_INFORMATIONAL;
  574. }
  575. err = decoder->vtable.on_response(code_val, decoder->user_data);
  576. if (err) {
  577. return AWS_OP_ERR;
  578. }
  579. s_set_line_state(decoder, s_linestate_header);
  580. return AWS_OP_SUCCESS;
  581. }
  582. struct aws_h1_decoder *aws_h1_decoder_new(struct aws_h1_decoder_params *params) {
  583. AWS_ASSERT(params);
  584. struct aws_h1_decoder *decoder = aws_mem_acquire(params->alloc, sizeof(struct aws_h1_decoder));
  585. if (!decoder) {
  586. return NULL;
  587. }
  588. AWS_ZERO_STRUCT(*decoder);
  589. decoder->alloc = params->alloc;
  590. decoder->user_data = params->user_data;
  591. decoder->vtable = params->vtable;
  592. decoder->is_decoding_requests = params->is_decoding_requests;
  593. aws_byte_buf_init(&decoder->scratch_space, params->alloc, params->scratch_space_initial_size);
  594. s_reset_state(decoder);
  595. return decoder;
  596. }
  597. void aws_h1_decoder_destroy(struct aws_h1_decoder *decoder) {
  598. if (!decoder) {
  599. return;
  600. }
  601. aws_byte_buf_clean_up(&decoder->scratch_space);
  602. aws_mem_release(decoder->alloc, decoder);
  603. }
  604. int aws_h1_decode(struct aws_h1_decoder *decoder, struct aws_byte_cursor *data) {
  605. AWS_ASSERT(decoder);
  606. AWS_ASSERT(data);
  607. struct aws_byte_cursor backup = *data;
  608. while (data->len && !decoder->is_done) {
  609. int err = decoder->run_state(decoder, data);
  610. if (err) {
  611. /* Reset the data param to how we found it */
  612. *data = backup;
  613. return AWS_OP_ERR;
  614. }
  615. }
  616. if (decoder->is_done) {
  617. s_reset_state(decoder);
  618. }
  619. return AWS_OP_SUCCESS;
  620. }
  621. int aws_h1_decoder_get_encoding_flags(const struct aws_h1_decoder *decoder) {
  622. return decoder->transfer_encoding;
  623. }
  624. uint64_t aws_h1_decoder_get_content_length(const struct aws_h1_decoder *decoder) {
  625. return decoder->content_length;
  626. }
  627. bool aws_h1_decoder_get_body_headers_ignored(const struct aws_h1_decoder *decoder) {
  628. return decoder->body_headers_ignored;
  629. }
  630. enum aws_http_header_block aws_h1_decoder_get_header_block(const struct aws_h1_decoder *decoder) {
  631. return decoder->header_block;
  632. }
  633. void aws_h1_decoder_set_logging_id(struct aws_h1_decoder *decoder, const void *id) {
  634. decoder->logging_id = id;
  635. }
  636. void aws_h1_decoder_set_body_headers_ignored(struct aws_h1_decoder *decoder, bool body_headers_ignored) {
  637. decoder->body_headers_ignored = body_headers_ignored;
  638. }