websocket_decoder.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/http/private/websocket_decoder.h>
  6. #include <aws/common/encoding.h>
  7. #include <inttypes.h>
  8. typedef int(state_fn)(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data);
  9. /* STATE_INIT: Resets things, consumes no data */
  10. static int s_state_init(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  11. (void)data;
  12. AWS_ZERO_STRUCT(decoder->current_frame);
  13. decoder->state = AWS_WEBSOCKET_DECODER_STATE_OPCODE_BYTE;
  14. return AWS_OP_SUCCESS;
  15. }
  16. /* STATE_OPCODE_BYTE: Decode first byte of frame, which has all kinds of goodies in it. */
  17. static int s_state_opcode_byte(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  18. if (data->len == 0) {
  19. return AWS_OP_SUCCESS;
  20. }
  21. uint8_t byte = data->ptr[0];
  22. aws_byte_cursor_advance(data, 1);
  23. /* first 4 bits are all bools */
  24. decoder->current_frame.fin = byte & 0x80;
  25. decoder->current_frame.rsv[0] = byte & 0x40;
  26. decoder->current_frame.rsv[1] = byte & 0x20;
  27. decoder->current_frame.rsv[2] = byte & 0x10;
  28. /* next 4 bits are opcode */
  29. decoder->current_frame.opcode = byte & 0x0F;
  30. /* RFC-6455 Section 5.2 - Opcode
  31. * If an unknown opcode is received, the receiving endpoint MUST _Fail the WebSocket Connection_. */
  32. switch (decoder->current_frame.opcode) {
  33. case AWS_WEBSOCKET_OPCODE_CONTINUATION:
  34. case AWS_WEBSOCKET_OPCODE_TEXT:
  35. case AWS_WEBSOCKET_OPCODE_BINARY:
  36. case AWS_WEBSOCKET_OPCODE_CLOSE:
  37. case AWS_WEBSOCKET_OPCODE_PING:
  38. case AWS_WEBSOCKET_OPCODE_PONG:
  39. break;
  40. default:
  41. AWS_LOGF_ERROR(
  42. AWS_LS_HTTP_WEBSOCKET,
  43. "id=%p: Received frame with unknown opcode 0x%" PRIx8,
  44. (void *)decoder->user_data,
  45. decoder->current_frame.opcode);
  46. return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
  47. }
  48. /* RFC-6455 Section 5.2 Fragmentation
  49. *
  50. * Data frames with the FIN bit clear are considered fragmented and must be followed by
  51. * 1+ CONTINUATION frames, where only the final CONTINUATION frame's FIN bit is set.
  52. *
  53. * Control frames may be injected in the middle of a fragmented message,
  54. * but control frames may not be fragmented themselves.
  55. */
  56. if (aws_websocket_is_data_frame(decoder->current_frame.opcode)) {
  57. bool is_continuation_frame = AWS_WEBSOCKET_OPCODE_CONTINUATION == decoder->current_frame.opcode;
  58. if (decoder->expecting_continuation_data_frame != is_continuation_frame) {
  59. AWS_LOGF_ERROR(
  60. AWS_LS_HTTP_WEBSOCKET,
  61. "id=%p: Fragmentation error. Received start of new message before end of previous message",
  62. (void *)decoder->user_data);
  63. return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
  64. }
  65. decoder->expecting_continuation_data_frame = !decoder->current_frame.fin;
  66. } else {
  67. /* Control frames themselves MUST NOT be fragmented. */
  68. if (!decoder->current_frame.fin) {
  69. AWS_LOGF_ERROR(
  70. AWS_LS_HTTP_WEBSOCKET,
  71. "id=%p: Received fragmented control frame. This is illegal",
  72. (void *)decoder->user_data);
  73. return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
  74. }
  75. }
  76. if (decoder->current_frame.opcode == AWS_WEBSOCKET_OPCODE_TEXT) {
  77. decoder->processing_text_message = true;
  78. }
  79. decoder->state = AWS_WEBSOCKET_DECODER_STATE_LENGTH_BYTE;
  80. return AWS_OP_SUCCESS;
  81. }
  82. /* STATE_LENGTH_BYTE: Decode byte containing length, determine if we need to decode extended length. */
  83. static int s_state_length_byte(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  84. if (data->len == 0) {
  85. return AWS_OP_SUCCESS;
  86. }
  87. uint8_t byte = data->ptr[0];
  88. aws_byte_cursor_advance(data, 1);
  89. /* first bit is a bool */
  90. decoder->current_frame.masked = byte & 0x80;
  91. /* remaining 7 bits are payload length */
  92. decoder->current_frame.payload_length = byte & 0x7F;
  93. if (decoder->current_frame.payload_length >= AWS_WEBSOCKET_7BIT_VALUE_FOR_2BYTE_EXTENDED_LENGTH) {
  94. /* If 7bit payload length has a high value, then the next few bytes contain the real payload length */
  95. decoder->state_bytes_processed = 0;
  96. decoder->state = AWS_WEBSOCKET_DECODER_STATE_EXTENDED_LENGTH;
  97. } else {
  98. /* If 7bit payload length has low value, that's the actual payload size, jump past EXTENDED_LENGTH state */
  99. decoder->state = AWS_WEBSOCKET_DECODER_STATE_MASKING_KEY_CHECK;
  100. }
  101. return AWS_OP_SUCCESS;
  102. }
  103. /* STATE_EXTENDED_LENGTH: Decode extended length (state skipped if no extended length). */
  104. static int s_state_extended_length(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  105. if (data->len == 0) {
  106. return AWS_OP_SUCCESS;
  107. }
  108. /* The 7bit payload value loaded during the previous state indicated that
  109. * actual payload length is encoded across the next 2 or 8 bytes. */
  110. uint8_t total_bytes_extended_length;
  111. uint64_t min_acceptable_value;
  112. uint64_t max_acceptable_value;
  113. if (decoder->current_frame.payload_length == AWS_WEBSOCKET_7BIT_VALUE_FOR_2BYTE_EXTENDED_LENGTH) {
  114. total_bytes_extended_length = 2;
  115. min_acceptable_value = AWS_WEBSOCKET_2BYTE_EXTENDED_LENGTH_MIN_VALUE;
  116. max_acceptable_value = AWS_WEBSOCKET_2BYTE_EXTENDED_LENGTH_MAX_VALUE;
  117. } else {
  118. AWS_ASSERT(decoder->current_frame.payload_length == AWS_WEBSOCKET_7BIT_VALUE_FOR_8BYTE_EXTENDED_LENGTH);
  119. total_bytes_extended_length = 8;
  120. min_acceptable_value = AWS_WEBSOCKET_8BYTE_EXTENDED_LENGTH_MIN_VALUE;
  121. max_acceptable_value = AWS_WEBSOCKET_8BYTE_EXTENDED_LENGTH_MAX_VALUE;
  122. }
  123. /* Copy bytes of extended-length to state_cache, we'll process them later.*/
  124. AWS_ASSERT(total_bytes_extended_length > decoder->state_bytes_processed);
  125. size_t remaining_bytes = (size_t)(total_bytes_extended_length - decoder->state_bytes_processed);
  126. size_t bytes_to_consume = remaining_bytes <= data->len ? remaining_bytes : data->len;
  127. AWS_ASSERT(bytes_to_consume + decoder->state_bytes_processed <= sizeof(decoder->state_cache));
  128. memcpy(decoder->state_cache + decoder->state_bytes_processed, data->ptr, bytes_to_consume);
  129. aws_byte_cursor_advance(data, bytes_to_consume);
  130. decoder->state_bytes_processed += bytes_to_consume;
  131. /* Return, still waiting on more bytes */
  132. if (decoder->state_bytes_processed < total_bytes_extended_length) {
  133. return AWS_OP_SUCCESS;
  134. }
  135. /* All bytes have been copied into state_cache, now read them together as one number,
  136. * transforming from network byte order (big endian) to native endianness. */
  137. struct aws_byte_cursor cache_cursor = aws_byte_cursor_from_array(decoder->state_cache, total_bytes_extended_length);
  138. if (total_bytes_extended_length == 2) {
  139. uint16_t val;
  140. aws_byte_cursor_read_be16(&cache_cursor, &val);
  141. decoder->current_frame.payload_length = val;
  142. } else {
  143. aws_byte_cursor_read_be64(&cache_cursor, &decoder->current_frame.payload_length);
  144. }
  145. if (decoder->current_frame.payload_length < min_acceptable_value ||
  146. decoder->current_frame.payload_length > max_acceptable_value) {
  147. AWS_LOGF_ERROR(AWS_LS_HTTP_WEBSOCKET, "id=%p: Failed to decode payload length", (void *)decoder->user_data);
  148. return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
  149. }
  150. decoder->state = AWS_WEBSOCKET_DECODER_STATE_MASKING_KEY_CHECK;
  151. return AWS_OP_SUCCESS;
  152. }
  153. /* MASKING_KEY_CHECK: Determine if we need to decode masking-key. Consumes no data. */
  154. static int s_state_masking_key_check(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  155. (void)data;
  156. /* If mask bit was set, move to next state to process 4 bytes of masking key.
  157. * Otherwise skip next step, there is no masking key. */
  158. if (decoder->current_frame.masked) {
  159. decoder->state = AWS_WEBSOCKET_DECODER_STATE_MASKING_KEY;
  160. decoder->state_bytes_processed = 0;
  161. } else {
  162. decoder->state = AWS_WEBSOCKET_DECODER_STATE_PAYLOAD_CHECK;
  163. }
  164. return AWS_OP_SUCCESS;
  165. }
  166. /* MASKING_KEY: Decode masking-key (state skipped if no masking key). */
  167. static int s_state_masking_key(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  168. if (data->len == 0) {
  169. return AWS_OP_SUCCESS;
  170. }
  171. AWS_ASSERT(4 > decoder->state_bytes_processed);
  172. size_t bytes_remaining = 4 - (size_t)decoder->state_bytes_processed;
  173. size_t bytes_to_consume = bytes_remaining < data->len ? bytes_remaining : data->len;
  174. memcpy(decoder->current_frame.masking_key + decoder->state_bytes_processed, data->ptr, bytes_to_consume);
  175. aws_byte_cursor_advance(data, bytes_to_consume);
  176. decoder->state_bytes_processed += bytes_to_consume;
  177. /* If all bytes consumed, proceed to next state */
  178. if (decoder->state_bytes_processed == 4) {
  179. decoder->state = AWS_WEBSOCKET_DECODER_STATE_PAYLOAD_CHECK;
  180. }
  181. return AWS_OP_SUCCESS;
  182. }
  183. /* PAYLOAD_CHECK: Determine if we need to decode a payload. Consumes no data. */
  184. static int s_state_payload_check(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  185. (void)data;
  186. /* Invoke on_frame() callback to inform user of non-payload data. */
  187. int err = decoder->on_frame(&decoder->current_frame, decoder->user_data);
  188. if (err) {
  189. return AWS_OP_ERR;
  190. }
  191. /* Choose next state: either we have payload to process or we don't. */
  192. if (decoder->current_frame.payload_length > 0) {
  193. decoder->state_bytes_processed = 0;
  194. decoder->state = AWS_WEBSOCKET_DECODER_STATE_PAYLOAD;
  195. } else {
  196. decoder->state = AWS_WEBSOCKET_DECODER_STATE_FRAME_END;
  197. }
  198. return AWS_OP_SUCCESS;
  199. }
  200. /* PAYLOAD: Decode payload until we're done (state skipped if no payload). */
  201. static int s_state_payload(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  202. if (data->len == 0) {
  203. return AWS_OP_SUCCESS;
  204. }
  205. AWS_ASSERT(decoder->current_frame.payload_length > decoder->state_bytes_processed);
  206. uint64_t bytes_remaining = decoder->current_frame.payload_length - decoder->state_bytes_processed;
  207. size_t bytes_to_consume = bytes_remaining < data->len ? (size_t)bytes_remaining : data->len;
  208. struct aws_byte_cursor payload = aws_byte_cursor_advance(data, bytes_to_consume);
  209. /* Unmask data, if necessary.
  210. * RFC-6455 Section 5.3 Client-to-Server Masking
  211. * Each byte of payload is XOR against a byte of the masking-key */
  212. if (decoder->current_frame.masked) {
  213. uint64_t mask_index = decoder->state_bytes_processed;
  214. /* Optimization idea: don't do this 1 byte at a time */
  215. uint8_t *current_byte = payload.ptr;
  216. uint8_t *end_byte = payload.ptr + payload.len;
  217. while (current_byte != end_byte) {
  218. *current_byte++ ^= decoder->current_frame.masking_key[mask_index++ % 4];
  219. }
  220. }
  221. /* TODO: validate payload of CLOSE frame */
  222. /* Validate the UTF-8 for TEXT messages (a TEXT frame and any subsequent CONTINUATION frames) */
  223. if (decoder->processing_text_message && aws_websocket_is_data_frame(decoder->current_frame.opcode)) {
  224. if (aws_utf8_decoder_update(decoder->text_message_validator, payload)) {
  225. AWS_LOGF_ERROR(AWS_LS_HTTP_WEBSOCKET, "id=%p: Received invalid UTF-8", (void *)decoder->user_data);
  226. return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
  227. }
  228. }
  229. /* Invoke on_payload() callback to inform user of payload data */
  230. int err = decoder->on_payload(payload, decoder->user_data);
  231. if (err) {
  232. return AWS_OP_ERR;
  233. }
  234. decoder->state_bytes_processed += payload.len;
  235. AWS_ASSERT(decoder->state_bytes_processed <= decoder->current_frame.payload_length);
  236. /* If all data consumed, proceed to next state. */
  237. if (decoder->state_bytes_processed == decoder->current_frame.payload_length) {
  238. decoder->state = AWS_WEBSOCKET_DECODER_STATE_FRAME_END;
  239. }
  240. return AWS_OP_SUCCESS;
  241. }
  242. /* FRAME_END: Perform checks once we reach the end of the frame. */
  243. static int s_state_frame_end(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
  244. (void)data;
  245. /* If we're done processing a text message (a TEXT frame and any subsequent CONTINUATION frames),
  246. * complete the UTF-8 validation */
  247. if (decoder->processing_text_message && aws_websocket_is_data_frame(decoder->current_frame.opcode) &&
  248. decoder->current_frame.fin) {
  249. if (aws_utf8_decoder_finalize(decoder->text_message_validator)) {
  250. AWS_LOGF_ERROR(
  251. AWS_LS_HTTP_WEBSOCKET,
  252. "id=%p: Received invalid UTF-8 (incomplete encoding)",
  253. (void *)decoder->user_data);
  254. return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
  255. }
  256. decoder->processing_text_message = false;
  257. }
  258. /* Done! */
  259. decoder->state = AWS_WEBSOCKET_DECODER_STATE_DONE;
  260. return AWS_OP_SUCCESS;
  261. }
  262. static state_fn *s_state_functions[AWS_WEBSOCKET_DECODER_STATE_DONE] = {
  263. s_state_init,
  264. s_state_opcode_byte,
  265. s_state_length_byte,
  266. s_state_extended_length,
  267. s_state_masking_key_check,
  268. s_state_masking_key,
  269. s_state_payload_check,
  270. s_state_payload,
  271. s_state_frame_end,
  272. };
  273. int aws_websocket_decoder_process(
  274. struct aws_websocket_decoder *decoder,
  275. struct aws_byte_cursor *data,
  276. bool *frame_complete) {
  277. /* Run state machine until frame is completely decoded, or the state stops changing.
  278. * Note that we don't stop looping when data->len reaches zero, because some states consume no data. */
  279. while (decoder->state != AWS_WEBSOCKET_DECODER_STATE_DONE) {
  280. enum aws_websocket_decoder_state prev_state = decoder->state;
  281. int err = s_state_functions[decoder->state](decoder, data);
  282. if (err) {
  283. return AWS_OP_ERR;
  284. }
  285. if (decoder->state == prev_state) {
  286. AWS_ASSERT(data->len == 0); /* If no more work to do, all possible data should have been consumed */
  287. break;
  288. }
  289. }
  290. if (decoder->state == AWS_WEBSOCKET_DECODER_STATE_DONE) {
  291. decoder->state = AWS_WEBSOCKET_DECODER_STATE_INIT;
  292. *frame_complete = true;
  293. return AWS_OP_SUCCESS;
  294. }
  295. *frame_complete = false;
  296. return AWS_OP_SUCCESS;
  297. }
  298. void aws_websocket_decoder_init(
  299. struct aws_websocket_decoder *decoder,
  300. struct aws_allocator *alloc,
  301. aws_websocket_decoder_frame_fn *on_frame,
  302. aws_websocket_decoder_payload_fn *on_payload,
  303. void *user_data) {
  304. AWS_ZERO_STRUCT(*decoder);
  305. decoder->user_data = user_data;
  306. decoder->on_frame = on_frame;
  307. decoder->on_payload = on_payload;
  308. decoder->text_message_validator = aws_utf8_decoder_new(alloc, NULL /*options*/);
  309. }
  310. void aws_websocket_decoder_clean_up(struct aws_websocket_decoder *decoder) {
  311. aws_utf8_decoder_destroy(decoder->text_message_validator);
  312. AWS_ZERO_STRUCT(*decoder);
  313. }