s2n_ktls_io.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. /*
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License").
  5. * You may not use this file except in compliance with the License.
  6. * A copy of the License is located at
  7. *
  8. * http://aws.amazon.com/apache2.0
  9. *
  10. * or in the "license" file accompanying this file. This file is distributed
  11. * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  12. * express or implied. See the License for the specific language governing
  13. * permissions and limitations under the License.
  14. */
  15. #if defined(__FreeBSD__) || defined(__APPLE__)
  16. /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_socket.h.html
  17. * The POSIX standard does not define the CMSG_LEN and CMSG_SPACE macros. FreeBSD
  18. * and APPLE check and disable these macros if the _POSIX_C_SOURCE flag is set.
  19. *
  20. * Since s2n-tls already unsets the _POSIX_C_SOURCE in other files and is not
  21. * POSIX compliant, we continue the pattern here.
  22. */
  23. #undef _POSIX_C_SOURCE
  24. #endif
  25. #include <sys/socket.h>
  26. #ifdef S2N_LINUX_SENDFILE
  27. #include <sys/sendfile.h>
  28. #endif
  29. #include "error/s2n_errno.h"
  30. #include "tls/s2n_ktls.h"
  31. #include "tls/s2n_tls.h"
  32. #include "utils/s2n_io.h"
  33. #include "utils/s2n_result.h"
  34. #include "utils/s2n_safety.h"
  35. #include "utils/s2n_socket.h"
  36. /* record_type is of type uint8_t */
  37. #define S2N_KTLS_RECORD_TYPE_SIZE (sizeof(uint8_t))
  38. #define S2N_KTLS_CONTROL_BUFFER_SIZE (CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE))
  39. #define S2N_MAX_STACK_IOVECS 16
  40. #define S2N_MAX_STACK_IOVECS_MEM (S2N_MAX_STACK_IOVECS * sizeof(struct iovec))
  41. /* Used to override sendmsg and recvmsg for testing. */
  42. static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg);
  43. static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg);
  44. s2n_ktls_sendmsg_fn s2n_sendmsg_fn = s2n_ktls_default_sendmsg;
  45. s2n_ktls_recvmsg_fn s2n_recvmsg_fn = s2n_ktls_default_recvmsg;
  46. S2N_RESULT s2n_ktls_set_sendmsg_cb(struct s2n_connection *conn, s2n_ktls_sendmsg_fn send_cb,
  47. void *send_ctx)
  48. {
  49. RESULT_ENSURE_REF(conn);
  50. RESULT_ENSURE_REF(send_ctx);
  51. RESULT_ENSURE(s2n_in_test(), S2N_ERR_NOT_IN_TEST);
  52. conn->send_io_context = send_ctx;
  53. s2n_sendmsg_fn = send_cb;
  54. return S2N_RESULT_OK;
  55. }
  56. S2N_RESULT s2n_ktls_set_recvmsg_cb(struct s2n_connection *conn, s2n_ktls_recvmsg_fn recv_cb,
  57. void *recv_ctx)
  58. {
  59. RESULT_ENSURE_REF(conn);
  60. RESULT_ENSURE_REF(recv_ctx);
  61. RESULT_ENSURE(s2n_in_test(), S2N_ERR_NOT_IN_TEST);
  62. conn->recv_io_context = recv_ctx;
  63. s2n_recvmsg_fn = recv_cb;
  64. return S2N_RESULT_OK;
  65. }
  66. static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg)
  67. {
  68. POSIX_ENSURE_REF(io_context);
  69. POSIX_ENSURE_REF(msg);
  70. const struct s2n_socket_read_io_context *peer_socket_ctx = io_context;
  71. POSIX_ENSURE_REF(peer_socket_ctx);
  72. int fd = peer_socket_ctx->fd;
  73. return recvmsg(fd, msg, 0);
  74. }
  75. static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg)
  76. {
  77. POSIX_ENSURE_REF(io_context);
  78. POSIX_ENSURE_REF(msg);
  79. const struct s2n_socket_write_io_context *peer_socket_ctx = io_context;
  80. POSIX_ENSURE_REF(peer_socket_ctx);
  81. int fd = peer_socket_ctx->fd;
  82. return sendmsg(fd, msg, 0);
  83. }
  84. S2N_RESULT s2n_ktls_set_control_data(struct msghdr *msg, char *buf, size_t buf_size,
  85. int cmsg_type, uint8_t record_type)
  86. {
  87. RESULT_ENSURE_REF(msg);
  88. RESULT_ENSURE_REF(buf);
  89. /*
  90. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  91. * To create ancillary data, first initialize the msg_controllen
  92. * member of the msghdr with the length of the control message
  93. * buffer.
  94. */
  95. msg->msg_control = buf;
  96. msg->msg_controllen = buf_size;
  97. /*
  98. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  99. * Use CMSG_FIRSTHDR() on the msghdr to get the first
  100. * control message and CMSG_NXTHDR() to get all subsequent ones.
  101. */
  102. struct cmsghdr *hdr = CMSG_FIRSTHDR(msg);
  103. RESULT_ENSURE_REF(hdr);
  104. /*
  105. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  106. * In each control message, initialize cmsg_len (with CMSG_LEN()), the
  107. * other cmsghdr header fields, and the data portion using
  108. * CMSG_DATA().
  109. */
  110. hdr->cmsg_len = CMSG_LEN(S2N_KTLS_RECORD_TYPE_SIZE);
  111. hdr->cmsg_level = S2N_SOL_TLS;
  112. hdr->cmsg_type = cmsg_type;
  113. *CMSG_DATA(hdr) = record_type;
  114. /*
  115. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  116. * Finally, the msg_controllen field of the msghdr
  117. * should be set to the sum of the CMSG_SPACE() of the length of all
  118. * control messages in the buffer
  119. */
  120. RESULT_ENSURE_GTE(msg->msg_controllen, CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE));
  121. msg->msg_controllen = CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE);
  122. return S2N_RESULT_OK;
  123. }
  124. /* Expect to receive a single cmsghdr containing the TLS record_type.
  125. *
  126. * s2n-tls allocates enough space to receive a single cmsghdr. Since this is
  127. * used to get the record_type when receiving over kTLS (enabled via
  128. * `s2n_connection_ktls_enable_recv`), the application should not configure
  129. * the socket to receive additional control messages. In the event s2n-tls
  130. * can not retrieve the record_type, it is safer to drop the record.
  131. */
  132. S2N_RESULT s2n_ktls_get_control_data(struct msghdr *msg, int cmsg_type, uint8_t *record_type)
  133. {
  134. RESULT_ENSURE_REF(msg);
  135. RESULT_ENSURE_REF(record_type);
  136. /* https://man7.org/linux/man-pages/man3/recvmsg.3p.html
  137. * MSG_CTRUNC Control data was truncated.
  138. */
  139. if (msg->msg_flags & MSG_CTRUNC) {
  140. RESULT_BAIL(S2N_ERR_KTLS_BAD_CMSG);
  141. }
  142. /*
  143. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  144. * To create ancillary data, first initialize the msg_controllen
  145. * member of the msghdr with the length of the control message
  146. * buffer.
  147. */
  148. RESULT_ENSURE(msg->msg_control, S2N_ERR_SAFETY);
  149. RESULT_ENSURE(msg->msg_controllen >= CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE), S2N_ERR_SAFETY);
  150. /* https://man7.org/linux/man-pages/man3/cmsg.3.html
  151. * Use CMSG_FIRSTHDR() on the msghdr to get the first
  152. * control message and CMSG_NXTHDR() to get all subsequent ones.
  153. */
  154. struct cmsghdr *hdr = CMSG_FIRSTHDR(msg);
  155. RESULT_ENSURE(hdr, S2N_ERR_KTLS_BAD_CMSG);
  156. /*
  157. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  158. * In each control message, initialize cmsg_len (with CMSG_LEN()), the
  159. * other cmsghdr header fields, and the data portion using
  160. * CMSG_DATA().
  161. */
  162. RESULT_ENSURE(hdr->cmsg_level == S2N_SOL_TLS, S2N_ERR_KTLS_BAD_CMSG);
  163. RESULT_ENSURE(hdr->cmsg_type == cmsg_type, S2N_ERR_KTLS_BAD_CMSG);
  164. RESULT_ENSURE(hdr->cmsg_len == CMSG_LEN(S2N_KTLS_RECORD_TYPE_SIZE), S2N_ERR_KTLS_BAD_CMSG);
  165. *record_type = *CMSG_DATA(hdr);
  166. return S2N_RESULT_OK;
  167. }
  168. S2N_RESULT s2n_ktls_sendmsg(void *io_context, uint8_t record_type, const struct iovec *msg_iov,
  169. size_t msg_iovlen, s2n_blocked_status *blocked, size_t *bytes_written)
  170. {
  171. RESULT_ENSURE_REF(bytes_written);
  172. RESULT_ENSURE_REF(blocked);
  173. RESULT_ENSURE(msg_iov != NULL || msg_iovlen == 0, S2N_ERR_NULL);
  174. *blocked = S2N_BLOCKED_ON_WRITE;
  175. *bytes_written = 0;
  176. struct msghdr msg = {
  177. /* msghdr requires a non-const iovec. This is safe because s2n-tls does
  178. * not modify msg_iov after this point.
  179. */
  180. .msg_iov = (struct iovec *) (uintptr_t) msg_iov,
  181. .msg_iovlen = msg_iovlen,
  182. };
  183. char control_data[S2N_KTLS_CONTROL_BUFFER_SIZE] = { 0 };
  184. RESULT_GUARD(s2n_ktls_set_control_data(&msg, control_data, sizeof(control_data),
  185. S2N_TLS_SET_RECORD_TYPE, record_type));
  186. ssize_t result = 0;
  187. S2N_IO_RETRY_EINTR(result, s2n_sendmsg_fn(io_context, &msg));
  188. RESULT_GUARD(s2n_io_check_write_result(result));
  189. *blocked = S2N_NOT_BLOCKED;
  190. *bytes_written = result;
  191. return S2N_RESULT_OK;
  192. }
  193. S2N_RESULT s2n_ktls_recvmsg(void *io_context, uint8_t *record_type, uint8_t *buf,
  194. size_t buf_len, s2n_blocked_status *blocked, size_t *bytes_read)
  195. {
  196. RESULT_ENSURE_REF(record_type);
  197. RESULT_ENSURE_REF(bytes_read);
  198. RESULT_ENSURE_REF(blocked);
  199. RESULT_ENSURE_REF(buf);
  200. /* Ensure that buf_len is > 0 since trying to receive 0 bytes does not
  201. * make sense and a return value of `0` from recvmsg is treated as EOF.
  202. */
  203. RESULT_ENSURE_GT(buf_len, 0);
  204. *blocked = S2N_BLOCKED_ON_READ;
  205. *record_type = 0;
  206. *bytes_read = 0;
  207. struct iovec msg_iov = {
  208. .iov_base = buf,
  209. .iov_len = buf_len
  210. };
  211. struct msghdr msg = {
  212. .msg_iov = &msg_iov,
  213. .msg_iovlen = 1,
  214. };
  215. /*
  216. * https://man7.org/linux/man-pages/man3/cmsg.3.html
  217. * To create ancillary data, first initialize the msg_controllen
  218. * member of the msghdr with the length of the control message
  219. * buffer.
  220. */
  221. char control_data[S2N_KTLS_CONTROL_BUFFER_SIZE] = { 0 };
  222. msg.msg_controllen = sizeof(control_data);
  223. msg.msg_control = control_data;
  224. ssize_t result = 0;
  225. S2N_IO_RETRY_EINTR(result, s2n_recvmsg_fn(io_context, &msg));
  226. RESULT_GUARD(s2n_io_check_read_result(result));
  227. RESULT_GUARD(s2n_ktls_get_control_data(&msg, S2N_TLS_GET_RECORD_TYPE, record_type));
  228. *blocked = S2N_NOT_BLOCKED;
  229. *bytes_read = result;
  230. return S2N_RESULT_OK;
  231. }
  232. /* The iovec array `bufs` is constant and owned by the application.
  233. *
  234. * However, we need to apply the given offset to `bufs`. That may involve
  235. * updating the iov_base and iov_len of entries in `bufs` to reflect the bytes
  236. * already sent. Because `bufs` is constant, we need to instead copy `bufs` and
  237. * modify the copy.
  238. *
  239. * Since one of the primary benefits of kTLS is that we avoid buffering application
  240. * data and can pass application data as-is to the kernel, we try to limit the
  241. * situations where we need to copy `bufs` and use stack memory where possible.
  242. *
  243. * Note: We are copying an array of iovecs here, NOT the scattered application
  244. * data the iovecs reference. On Linux, the maximum data copied would be
  245. * 1024 (IOV_MAX on Linux) * 16 (sizeof(struct iovec)) = ~16KB.
  246. *
  247. * To avoid any copies when using a large number of iovecs, applications should
  248. * call s2n_sendv instead of s2n_sendv_with_offset.
  249. */
  250. static S2N_RESULT s2n_ktls_update_bufs_with_offset(const struct iovec **bufs, size_t *count,
  251. size_t offs, struct s2n_blob *mem)
  252. {
  253. RESULT_ENSURE_REF(bufs);
  254. RESULT_ENSURE_REF(count);
  255. RESULT_ENSURE(*bufs != NULL || *count == 0, S2N_ERR_NULL);
  256. RESULT_ENSURE_REF(mem);
  257. size_t skipped = 0;
  258. while (offs > 0) {
  259. /* If we need to skip more iovecs than actually exist,
  260. * then the offset is too large and therefore invalid.
  261. */
  262. RESULT_ENSURE(skipped < *count, S2N_ERR_INVALID_ARGUMENT);
  263. size_t iov_len = (*bufs)[skipped].iov_len;
  264. /* This is the last iovec affected by the offset. */
  265. if (offs < iov_len) {
  266. break;
  267. }
  268. offs -= iov_len;
  269. skipped++;
  270. }
  271. *count = (*count) - skipped;
  272. if (*count == 0) {
  273. return S2N_RESULT_OK;
  274. }
  275. *bufs = &(*bufs)[skipped];
  276. if (offs == 0) {
  277. return S2N_RESULT_OK;
  278. }
  279. size_t size = (*count) * (sizeof(struct iovec));
  280. /* If possible, use the existing stack memory in `mem` for the copy.
  281. * Otherwise, we need to allocate sufficient new heap memory. */
  282. if (size > mem->size) {
  283. RESULT_GUARD_POSIX(s2n_alloc(mem, size));
  284. }
  285. struct iovec *new_bufs = (struct iovec *) (void *) mem->data;
  286. RESULT_CHECKED_MEMCPY(new_bufs, *bufs, size);
  287. new_bufs[0].iov_base = (uint8_t *) new_bufs[0].iov_base + offs;
  288. new_bufs[0].iov_len = new_bufs[0].iov_len - offs;
  289. *bufs = new_bufs;
  290. return S2N_RESULT_OK;
  291. }
  292. ssize_t s2n_ktls_sendv_with_offset(struct s2n_connection *conn, const struct iovec *bufs,
  293. ssize_t count_in, ssize_t offs_in, s2n_blocked_status *blocked)
  294. {
  295. POSIX_ENSURE_REF(conn);
  296. POSIX_ENSURE(count_in >= 0, S2N_ERR_INVALID_ARGUMENT);
  297. size_t count = count_in;
  298. POSIX_ENSURE(offs_in >= 0, S2N_ERR_INVALID_ARGUMENT);
  299. size_t offs = offs_in;
  300. DEFER_CLEANUP(struct s2n_blob new_bufs = { 0 }, s2n_free_or_wipe);
  301. uint8_t new_bufs_mem[S2N_MAX_STACK_IOVECS_MEM] = { 0 };
  302. POSIX_GUARD(s2n_blob_init(&new_bufs, new_bufs_mem, sizeof(new_bufs_mem)));
  303. if (offs > 0) {
  304. POSIX_GUARD_RESULT(s2n_ktls_update_bufs_with_offset(&bufs, &count, offs, &new_bufs));
  305. }
  306. size_t bytes_written = 0;
  307. POSIX_GUARD_RESULT(s2n_ktls_sendmsg(conn->send_io_context, TLS_APPLICATION_DATA,
  308. bufs, count, blocked, &bytes_written));
  309. return bytes_written;
  310. }
  311. int s2n_ktls_send_cb(void *io_context, const uint8_t *buf, uint32_t len)
  312. {
  313. POSIX_ENSURE_REF(io_context);
  314. POSIX_ENSURE_REF(buf);
  315. /* For now, all control records are assumed to be alerts.
  316. * We can set the record_type on the io_context in the future.
  317. */
  318. const uint8_t record_type = TLS_ALERT;
  319. const struct iovec iov = {
  320. .iov_base = (void *) (uintptr_t) buf,
  321. .iov_len = len,
  322. };
  323. s2n_blocked_status blocked = S2N_NOT_BLOCKED;
  324. size_t bytes_written = 0;
  325. POSIX_GUARD_RESULT(s2n_ktls_sendmsg(io_context, record_type, &iov, 1,
  326. &blocked, &bytes_written));
  327. POSIX_ENSURE_LTE(bytes_written, len);
  328. return bytes_written;
  329. }
  330. int s2n_ktls_record_writev(struct s2n_connection *conn, uint8_t content_type,
  331. const struct iovec *in, int in_count, size_t offs, size_t to_write)
  332. {
  333. POSIX_ENSURE_REF(conn);
  334. POSIX_ENSURE(in_count > 0, S2N_ERR_INVALID_ARGUMENT);
  335. size_t count = in_count;
  336. POSIX_ENSURE_REF(in);
  337. /* Currently, ktls only supports sending alerts.
  338. * To also support handshake messages, we would need a way to track record_type.
  339. * We could add a field to the send io context.
  340. */
  341. POSIX_ENSURE(content_type == TLS_ALERT, S2N_ERR_UNIMPLEMENTED);
  342. /* When stuffers automatically resize, they allocate a potentially large
  343. * chunk of memory to avoid repeated resizes.
  344. * Since ktls only uses conn->out for control messages (alerts and eventually
  345. * handshake messages), we expect infrequent small writes with conn->out
  346. * freed in between. Since we're therefore more concerned with the size of
  347. * the allocation than the frequency, use a more accurate size for each write.
  348. */
  349. POSIX_GUARD(s2n_stuffer_resize_if_empty(&conn->out, to_write));
  350. POSIX_GUARD(s2n_stuffer_writev_bytes(&conn->out, in, count, offs, to_write));
  351. return to_write;
  352. }
  353. int s2n_sendfile(struct s2n_connection *conn, int in_fd, off_t offset, size_t count,
  354. size_t *bytes_written, s2n_blocked_status *blocked)
  355. {
  356. POSIX_ENSURE_REF(blocked);
  357. *blocked = S2N_BLOCKED_ON_WRITE;
  358. POSIX_ENSURE_REF(bytes_written);
  359. *bytes_written = 0;
  360. POSIX_ENSURE_REF(conn);
  361. POSIX_ENSURE(conn->ktls_send_enabled, S2N_ERR_KTLS_UNSUPPORTED_CONN);
  362. int out_fd = 0;
  363. POSIX_GUARD_RESULT(s2n_ktls_get_file_descriptor(conn, S2N_KTLS_MODE_SEND, &out_fd));
  364. #ifdef S2N_LINUX_SENDFILE
  365. /* https://man7.org/linux/man-pages/man2/sendfile.2.html */
  366. ssize_t result = 0;
  367. S2N_IO_RETRY_EINTR(result, sendfile(out_fd, in_fd, &offset, count));
  368. POSIX_GUARD_RESULT(s2n_io_check_write_result(result));
  369. *bytes_written = result;
  370. #else
  371. POSIX_BAIL(S2N_ERR_UNIMPLEMENTED);
  372. #endif
  373. *blocked = S2N_NOT_BLOCKED;
  374. return S2N_SUCCESS;
  375. }
  376. int s2n_ktls_read_full_record(struct s2n_connection *conn, uint8_t *record_type)
  377. {
  378. POSIX_ENSURE_REF(conn);
  379. POSIX_ENSURE_REF(record_type);
  380. /* If any unread data remains in conn->in, it must be application data that
  381. * couldn't be returned due to the size of the application's provided buffer.
  382. */
  383. if (s2n_stuffer_data_available(&conn->in)) {
  384. *record_type = TLS_APPLICATION_DATA;
  385. return S2N_SUCCESS;
  386. }
  387. POSIX_GUARD(s2n_stuffer_resize_if_empty(&conn->in, S2N_DEFAULT_FRAGMENT_LENGTH));
  388. struct s2n_stuffer record_stuffer = conn->in;
  389. size_t len = s2n_stuffer_space_remaining(&record_stuffer);
  390. uint8_t *buf = s2n_stuffer_raw_write(&record_stuffer, len);
  391. POSIX_ENSURE_REF(buf);
  392. s2n_blocked_status blocked = S2N_NOT_BLOCKED;
  393. size_t bytes_read = 0;
  394. /* Since recvmsg is responsible for decrypting the record in ktls,
  395. * we apply blinding to the recvmsg call.
  396. */
  397. s2n_result result = s2n_ktls_recvmsg(conn->recv_io_context, record_type,
  398. buf, len, &blocked, &bytes_read);
  399. WITH_ERROR_BLINDING(conn, POSIX_GUARD_RESULT(result));
  400. POSIX_GUARD(s2n_stuffer_skip_write(&conn->in, bytes_read));
  401. return S2N_SUCCESS;
  402. }