send-zerocopy.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <stdint.h>
  6. #include <assert.h>
  7. #include <errno.h>
  8. #include <limits.h>
  9. #include <fcntl.h>
  10. #include <unistd.h>
  11. #include <stdbool.h>
  12. #include <string.h>
  13. #include <arpa/inet.h>
  14. #include <linux/if_packet.h>
  15. #include <linux/ipv6.h>
  16. #include <linux/socket.h>
  17. #include <linux/sockios.h>
  18. #include <net/ethernet.h>
  19. #include <net/if.h>
  20. #include <netinet/ip.h>
  21. #include <netinet/in.h>
  22. #include <netinet/ip6.h>
  23. #include <netinet/tcp.h>
  24. #include <netinet/udp.h>
  25. #include <sys/socket.h>
  26. #include <sys/time.h>
  27. #include <sys/resource.h>
  28. #include <sys/un.h>
  29. #include <sys/ioctl.h>
  30. #include <sys/socket.h>
  31. #include <sys/stat.h>
  32. #include <sys/time.h>
  33. #include <sys/types.h>
  34. #include <sys/wait.h>
  35. #include <sys/mman.h>
  36. #include <linux/mman.h>
  37. #include "liburing.h"
  38. #include "helpers.h"
  39. #define MAX_MSG 128
  40. #define HOST "127.0.0.1"
  41. #define HOSTV6 "::1"
  42. #define MAX_IOV 32
  43. #define CORK_REQS 5
  44. #define RX_TAG 10000
  45. #define BUFFER_OFFSET 41
  46. #ifndef ARRAY_SIZE
  47. #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
  48. #endif
  49. enum {
  50. BUF_T_NORMAL,
  51. BUF_T_SMALL,
  52. BUF_T_NONALIGNED,
  53. BUF_T_LARGE,
  54. BUF_T_HUGETLB,
  55. __BUF_NR,
  56. };
  57. /* 32MB, should be enough to trigger a short send */
  58. #define LARGE_BUF_SIZE (1U << 25)
  59. static size_t page_sz;
  60. static char *tx_buffer, *rx_buffer;
  61. static struct iovec buffers_iov[__BUF_NR];
  62. static bool has_sendmsg;
  63. static bool check_cq_empty(struct io_uring *ring)
  64. {
  65. struct io_uring_cqe *cqe = NULL;
  66. int ret;
  67. ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
  68. return ret == -EAGAIN;
  69. }
  70. static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
  71. {
  72. struct io_uring_sqe *sqe;
  73. struct io_uring_cqe *cqe;
  74. int msg_flags = 0;
  75. unsigned zc_flags = 0;
  76. int payload_size = 100;
  77. int ret;
  78. sqe = io_uring_get_sqe(ring);
  79. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
  80. msg_flags, zc_flags);
  81. sqe->user_data = 1;
  82. ret = io_uring_submit(ring);
  83. assert(ret == 1);
  84. ret = io_uring_wait_cqe(ring, &cqe);
  85. assert(!ret && cqe->user_data == 1);
  86. if (cqe->res == -EINVAL) {
  87. assert(!(cqe->flags & IORING_CQE_F_MORE));
  88. return T_EXIT_SKIP;
  89. } else if (cqe->res != payload_size) {
  90. fprintf(stderr, "send failed %i\n", cqe->res);
  91. return T_EXIT_FAIL;
  92. }
  93. assert(cqe->flags & IORING_CQE_F_MORE);
  94. io_uring_cqe_seen(ring, cqe);
  95. ret = io_uring_wait_cqe(ring, &cqe);
  96. assert(!ret);
  97. assert(cqe->user_data == 1);
  98. assert(cqe->flags & IORING_CQE_F_NOTIF);
  99. assert(!(cqe->flags & IORING_CQE_F_MORE));
  100. io_uring_cqe_seen(ring, cqe);
  101. assert(check_cq_empty(ring));
  102. ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC);
  103. assert(ret == payload_size);
  104. return T_EXIT_PASS;
  105. }
  106. static int test_send_faults(int sock_tx, int sock_rx)
  107. {
  108. struct io_uring_sqe *sqe;
  109. struct io_uring_cqe *cqe;
  110. int msg_flags = 0;
  111. unsigned zc_flags = 0;
  112. int payload_size = 100;
  113. int ret, i, nr_cqes, nr_reqs = 3;
  114. struct io_uring ring;
  115. ret = io_uring_queue_init(32, &ring, IORING_SETUP_SUBMIT_ALL);
  116. if (ret) {
  117. fprintf(stderr, "queue init failed: %d\n", ret);
  118. return -1;
  119. }
  120. /* invalid buffer */
  121. sqe = io_uring_get_sqe(&ring);
  122. io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
  123. msg_flags, zc_flags);
  124. sqe->user_data = 1;
  125. /* invalid address */
  126. sqe = io_uring_get_sqe(&ring);
  127. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
  128. msg_flags, zc_flags);
  129. io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
  130. sizeof(struct sockaddr_in6));
  131. sqe->user_data = 2;
  132. /* invalid send/recv flags */
  133. sqe = io_uring_get_sqe(&ring);
  134. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
  135. msg_flags, ~0U);
  136. sqe->user_data = 3;
  137. ret = io_uring_submit(&ring);
  138. assert(ret == nr_reqs);
  139. nr_cqes = nr_reqs;
  140. for (i = 0; i < nr_cqes; i++) {
  141. ret = io_uring_wait_cqe(&ring, &cqe);
  142. assert(!ret);
  143. assert(cqe->user_data <= nr_reqs);
  144. if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
  145. int expected = (cqe->user_data == 3) ? -EINVAL : -EFAULT;
  146. if (cqe->res != expected) {
  147. fprintf(stderr, "invalid cqe res %i vs expected %i, "
  148. "user_data %i\n",
  149. cqe->res, expected, (int)cqe->user_data);
  150. return -1;
  151. }
  152. if (cqe->flags & IORING_CQE_F_MORE)
  153. nr_cqes++;
  154. } else {
  155. if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
  156. fprintf(stderr, "invalid notif cqe %i %i\n",
  157. cqe->res, cqe->flags);
  158. return -1;
  159. }
  160. }
  161. io_uring_cqe_seen(&ring, cqe);
  162. }
  163. assert(check_cq_empty(&ring));
  164. return T_EXIT_PASS;
  165. }
  166. static int create_socketpair_ip(struct sockaddr_storage *addr,
  167. int *sock_client, int *sock_server,
  168. bool ipv6, bool client_connect,
  169. bool msg_zc, bool tcp)
  170. {
  171. socklen_t addr_size;
  172. int family, sock, listen_sock = -1;
  173. int ret;
  174. memset(addr, 0, sizeof(*addr));
  175. if (ipv6) {
  176. struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
  177. family = AF_INET6;
  178. saddr->sin6_family = family;
  179. saddr->sin6_port = htons(0);
  180. addr_size = sizeof(*saddr);
  181. } else {
  182. struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
  183. family = AF_INET;
  184. saddr->sin_family = family;
  185. saddr->sin_port = htons(0);
  186. saddr->sin_addr.s_addr = htonl(INADDR_ANY);
  187. addr_size = sizeof(*saddr);
  188. }
  189. /* server sock setup */
  190. if (tcp) {
  191. sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP);
  192. } else {
  193. sock = *sock_server = socket(family, SOCK_DGRAM, 0);
  194. }
  195. if (sock < 0) {
  196. perror("socket");
  197. return 1;
  198. }
  199. ret = bind(sock, (struct sockaddr *)addr, addr_size);
  200. if (ret < 0) {
  201. perror("bind");
  202. return 1;
  203. }
  204. ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
  205. if (ret < 0) {
  206. fprintf(stderr, "getsockname failed %i\n", errno);
  207. return 1;
  208. }
  209. if (tcp) {
  210. ret = listen(sock, 128);
  211. assert(ret != -1);
  212. }
  213. if (ipv6) {
  214. struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
  215. inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr));
  216. } else {
  217. struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
  218. inet_pton(AF_INET, HOST, &saddr->sin_addr);
  219. }
  220. /* client sock setup */
  221. if (tcp) {
  222. *sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP);
  223. assert(client_connect);
  224. } else {
  225. *sock_client = socket(family, SOCK_DGRAM, 0);
  226. }
  227. if (*sock_client < 0) {
  228. perror("socket");
  229. return 1;
  230. }
  231. if (client_connect) {
  232. ret = connect(*sock_client, (struct sockaddr *)addr, addr_size);
  233. if (ret < 0) {
  234. perror("connect");
  235. return 1;
  236. }
  237. }
  238. if (msg_zc) {
  239. #ifdef SO_ZEROCOPY
  240. int val = 1;
  241. if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
  242. perror("setsockopt zc");
  243. return 1;
  244. }
  245. #else
  246. fprintf(stderr, "no SO_ZEROCOPY\n");
  247. return 1;
  248. #endif
  249. }
  250. if (tcp) {
  251. *sock_server = accept(listen_sock, NULL, NULL);
  252. if (!*sock_server) {
  253. fprintf(stderr, "can't accept\n");
  254. return 1;
  255. }
  256. close(listen_sock);
  257. }
  258. return 0;
  259. }
  260. struct send_conf {
  261. bool fixed_buf;
  262. bool mix_register;
  263. bool cork;
  264. bool force_async;
  265. bool use_sendmsg;
  266. bool tcp;
  267. bool zc;
  268. bool iovec;
  269. bool long_iovec;
  270. bool poll_first;
  271. int buf_index;
  272. struct sockaddr_storage *addr;
  273. };
  274. static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
  275. struct send_conf *conf)
  276. {
  277. struct iovec iov[MAX_IOV];
  278. struct msghdr msghdr[CORK_REQS];
  279. const unsigned zc_flags = 0;
  280. struct io_uring_sqe *sqe;
  281. struct io_uring_cqe *cqe;
  282. int nr_reqs = conf->cork ? CORK_REQS : 1;
  283. int i, ret, nr_cqes, addr_len = 0;
  284. size_t send_size = buffers_iov[conf->buf_index].iov_len;
  285. size_t chunk_size = send_size / nr_reqs;
  286. size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
  287. char *buf = buffers_iov[conf->buf_index].iov_base;
  288. assert(MAX_IOV >= CORK_REQS);
  289. if (conf->addr) {
  290. sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
  291. addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
  292. sizeof(struct sockaddr_in6);
  293. }
  294. memset(rx_buffer, 0, send_size);
  295. for (i = 0; i < nr_reqs; i++) {
  296. bool real_fixed_buf = conf->fixed_buf;
  297. size_t cur_size = chunk_size;
  298. int msg_flags = MSG_WAITALL;
  299. if (conf->mix_register)
  300. real_fixed_buf = rand() & 1;
  301. if (i != nr_reqs - 1)
  302. msg_flags |= MSG_MORE;
  303. else
  304. cur_size = chunk_size_last;
  305. sqe = io_uring_get_sqe(ring);
  306. if (!conf->use_sendmsg) {
  307. if (conf->zc) {
  308. io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
  309. cur_size, msg_flags, zc_flags);
  310. } else {
  311. io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
  312. cur_size, msg_flags);
  313. }
  314. if (real_fixed_buf) {
  315. sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
  316. sqe->buf_index = conf->buf_index;
  317. }
  318. if (conf->addr)
  319. io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
  320. addr_len);
  321. } else {
  322. struct iovec *io;
  323. int iov_len;
  324. if (conf->zc)
  325. io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
  326. else
  327. io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
  328. if (!conf->iovec) {
  329. io = &iov[i];
  330. iov_len = 1;
  331. iov[i].iov_len = cur_size;
  332. iov[i].iov_base = buf + i * chunk_size;
  333. } else {
  334. char *it = buf;
  335. int j;
  336. assert(nr_reqs == 1);
  337. iov_len = conf->long_iovec ? MAX_IOV : 4;
  338. io = iov;
  339. for (j = 0; j < iov_len; j++)
  340. io[j].iov_len = 1;
  341. /* first want to be easily advanced */
  342. io[0].iov_base = it;
  343. it += io[0].iov_len;
  344. /* this should cause retry */
  345. io[1].iov_len = chunk_size - iov_len + 1;
  346. io[1].iov_base = it;
  347. it += io[1].iov_len;
  348. /* fill the rest */
  349. for (j = 2; j < iov_len; j++) {
  350. io[j].iov_base = it;
  351. it += io[j].iov_len;
  352. }
  353. }
  354. memset(&msghdr[i], 0, sizeof(msghdr[i]));
  355. msghdr[i].msg_iov = io;
  356. msghdr[i].msg_iovlen = iov_len;
  357. if (conf->addr) {
  358. msghdr[i].msg_name = conf->addr;
  359. msghdr[i].msg_namelen = addr_len;
  360. }
  361. }
  362. sqe->user_data = i;
  363. if (conf->force_async)
  364. sqe->flags |= IOSQE_ASYNC;
  365. if (conf->poll_first)
  366. sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
  367. if (i != nr_reqs - 1)
  368. sqe->flags |= IOSQE_IO_LINK;
  369. }
  370. sqe = io_uring_get_sqe(ring);
  371. io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL);
  372. sqe->user_data = RX_TAG;
  373. ret = io_uring_submit(ring);
  374. if (ret != nr_reqs + 1) {
  375. fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs);
  376. return 1;
  377. }
  378. nr_cqes = nr_reqs + 1;
  379. for (i = 0; i < nr_cqes; i++) {
  380. int expected = chunk_size;
  381. ret = io_uring_wait_cqe(ring, &cqe);
  382. if (ret) {
  383. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  384. return 1;
  385. }
  386. if (cqe->user_data == RX_TAG) {
  387. if (cqe->res != send_size) {
  388. fprintf(stderr, "rx failed res: %i, expected %i\n",
  389. cqe->res, (int)send_size);
  390. return 1;
  391. }
  392. io_uring_cqe_seen(ring, cqe);
  393. continue;
  394. }
  395. if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
  396. fprintf(stderr, "unexpected cflags %i res %i\n",
  397. cqe->flags, cqe->res);
  398. return 1;
  399. }
  400. if (cqe->user_data >= nr_reqs) {
  401. fprintf(stderr, "invalid user_data %lu\n",
  402. (unsigned long)cqe->user_data);
  403. return 1;
  404. }
  405. if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
  406. if (cqe->flags & IORING_CQE_F_MORE)
  407. nr_cqes++;
  408. if (cqe->user_data == nr_reqs - 1)
  409. expected = chunk_size_last;
  410. if (cqe->res != expected) {
  411. fprintf(stderr, "invalid cqe->res %d expected %d\n",
  412. cqe->res, expected);
  413. return 1;
  414. }
  415. }
  416. io_uring_cqe_seen(ring, cqe);
  417. }
  418. for (i = 0; i < send_size; i++) {
  419. if (buf[i] != rx_buffer[i]) {
  420. fprintf(stderr, "botched data, first mismated byte %i, "
  421. "%u vs %u\n", i, buf[i], rx_buffer[i]);
  422. return 1;
  423. }
  424. }
  425. return 0;
  426. }
  427. static int test_inet_send(struct io_uring *ring)
  428. {
  429. struct send_conf conf;
  430. struct sockaddr_storage addr;
  431. int sock_client = -1, sock_server = -1;
  432. int ret, j, i;
  433. int buf_index;
  434. for (j = 0; j < 32; j++) {
  435. bool ipv6 = j & 1;
  436. bool client_connect = j & 2;
  437. bool msg_zc_set = j & 4;
  438. bool tcp = j & 8;
  439. bool swap_sockets = j & 16;
  440. if (tcp && !client_connect)
  441. continue;
  442. if (swap_sockets && !tcp)
  443. continue;
  444. #ifndef SO_ZEROCOPY
  445. if (msg_zc_set)
  446. continue;
  447. #endif
  448. ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
  449. client_connect, msg_zc_set, tcp);
  450. if (ret) {
  451. fprintf(stderr, "sock prep failed %d\n", ret);
  452. return 1;
  453. }
  454. if (swap_sockets) {
  455. int tmp_sock = sock_client;
  456. sock_client = sock_server;
  457. sock_server = tmp_sock;
  458. }
  459. for (i = 0; i < 1024; i++) {
  460. bool regbuf;
  461. conf.use_sendmsg = i & 1;
  462. conf.poll_first = i & 2;
  463. conf.fixed_buf = i & 4;
  464. conf.addr = (i & 8) ? &addr : NULL;
  465. conf.cork = i & 16;
  466. conf.mix_register = i & 32;
  467. conf.force_async = i & 64;
  468. conf.zc = i & 128;
  469. conf.iovec = i & 256;
  470. conf.long_iovec = i & 512;
  471. conf.tcp = tcp;
  472. regbuf = conf.mix_register || conf.fixed_buf;
  473. if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
  474. continue;
  475. if (!conf.zc) {
  476. if (regbuf)
  477. continue;
  478. /*
  479. * Non zerocopy send w/ addr was added together with sendmsg_zc,
  480. * skip if we the kernel doesn't support it.
  481. */
  482. if (conf.addr && !has_sendmsg)
  483. continue;
  484. }
  485. if (tcp && (conf.cork || conf.addr))
  486. continue;
  487. if (conf.mix_register && (!conf.cork || conf.fixed_buf))
  488. continue;
  489. if (!client_connect && conf.addr == NULL)
  490. continue;
  491. if (conf.use_sendmsg && (regbuf || !has_sendmsg))
  492. continue;
  493. if (msg_zc_set && !conf.zc)
  494. continue;
  495. for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
  496. size_t len = buffers_iov[buf_index].iov_len;
  497. if (!buffers_iov[buf_index].iov_base)
  498. continue;
  499. if (!tcp && len > 4 * page_sz)
  500. continue;
  501. conf.buf_index = buf_index;
  502. ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
  503. if (ret) {
  504. fprintf(stderr, "send failed fixed buf %i, "
  505. "conn %i, addr %i, cork %i\n",
  506. conf.fixed_buf, client_connect,
  507. !!conf.addr, conf.cork);
  508. return 1;
  509. }
  510. }
  511. }
  512. close(sock_client);
  513. close(sock_server);
  514. }
  515. return 0;
  516. }
  517. static int test_async_addr(struct io_uring *ring)
  518. {
  519. struct io_uring_sqe *sqe;
  520. struct io_uring_cqe *cqe;
  521. struct sockaddr_storage addr;
  522. int sock_tx = -1, sock_rx = -1;
  523. struct __kernel_timespec ts;
  524. int ret;
  525. ts.tv_sec = 1;
  526. ts.tv_nsec = 0;
  527. ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false);
  528. if (ret) {
  529. fprintf(stderr, "sock prep failed %d\n", ret);
  530. return 1;
  531. }
  532. sqe = io_uring_get_sqe(ring);
  533. io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS);
  534. sqe->user_data = 1;
  535. sqe->flags |= IOSQE_IO_LINK;
  536. sqe = io_uring_get_sqe(ring);
  537. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0);
  538. sqe->user_data = 2;
  539. io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr,
  540. sizeof(struct sockaddr_in6));
  541. ret = io_uring_submit(ring);
  542. assert(ret == 2);
  543. memset(&addr, 0, sizeof(addr));
  544. ret = io_uring_wait_cqe(ring, &cqe);
  545. if (ret) {
  546. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  547. return 1;
  548. }
  549. if (cqe->user_data != 1 || cqe->res != -ETIME) {
  550. fprintf(stderr, "invalid timeout res %i %i\n",
  551. (int)cqe->user_data, cqe->res);
  552. return 1;
  553. }
  554. io_uring_cqe_seen(ring, cqe);
  555. ret = io_uring_wait_cqe(ring, &cqe);
  556. if (ret) {
  557. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  558. return 1;
  559. }
  560. if (cqe->user_data != 2 || cqe->res != 1) {
  561. fprintf(stderr, "invalid send %i %i\n",
  562. (int)cqe->user_data, cqe->res);
  563. return 1;
  564. }
  565. io_uring_cqe_seen(ring, cqe);
  566. ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC);
  567. assert(ret == 1);
  568. ret = io_uring_wait_cqe(ring, &cqe);
  569. if (ret) {
  570. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  571. return 1;
  572. }
  573. assert(cqe->flags & IORING_CQE_F_NOTIF);
  574. io_uring_cqe_seen(ring, cqe);
  575. close(sock_tx);
  576. close(sock_rx);
  577. return 0;
  578. }
  579. static bool io_check_zc_sendmsg(struct io_uring *ring)
  580. {
  581. struct io_uring_probe *p;
  582. int ret;
  583. p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
  584. if (!p) {
  585. fprintf(stderr, "probe allocation failed\n");
  586. return false;
  587. }
  588. ret = io_uring_register_probe(ring, p, 256);
  589. if (ret)
  590. return false;
  591. return p->ops_len > IORING_OP_SENDMSG_ZC;
  592. }
  593. /* see also send_recv.c:test_invalid */
  594. static int test_invalid_zc(int fds[2])
  595. {
  596. struct io_uring ring;
  597. int ret;
  598. struct io_uring_cqe *cqe;
  599. struct io_uring_sqe *sqe;
  600. bool notif = false;
  601. if (!has_sendmsg)
  602. return 0;
  603. ret = t_create_ring(8, &ring, 0);
  604. if (ret)
  605. return ret;
  606. sqe = io_uring_get_sqe(&ring);
  607. io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
  608. sqe->opcode = IORING_OP_SENDMSG_ZC;
  609. sqe->flags |= IOSQE_ASYNC;
  610. ret = io_uring_submit(&ring);
  611. if (ret != 1) {
  612. fprintf(stderr, "submit failed %i\n", ret);
  613. return ret;
  614. }
  615. ret = io_uring_wait_cqe(&ring, &cqe);
  616. if (ret)
  617. return 1;
  618. if (cqe->flags & IORING_CQE_F_MORE)
  619. notif = true;
  620. io_uring_cqe_seen(&ring, cqe);
  621. if (notif) {
  622. ret = io_uring_wait_cqe(&ring, &cqe);
  623. if (ret)
  624. return 1;
  625. io_uring_cqe_seen(&ring, cqe);
  626. }
  627. io_uring_queue_exit(&ring);
  628. return 0;
  629. }
  630. int main(int argc, char *argv[])
  631. {
  632. struct sockaddr_storage addr;
  633. struct io_uring ring;
  634. int i, ret, sp[2];
  635. size_t len;
  636. if (argc > 1)
  637. return T_EXIT_SKIP;
  638. page_sz = sysconf(_SC_PAGESIZE);
  639. /* create TCP IPv6 pair */
  640. ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
  641. if (ret) {
  642. fprintf(stderr, "sock prep failed %d\n", ret);
  643. return T_EXIT_FAIL;
  644. }
  645. len = LARGE_BUF_SIZE;
  646. tx_buffer = aligned_alloc(page_sz, len);
  647. rx_buffer = aligned_alloc(page_sz, len);
  648. if (tx_buffer && rx_buffer) {
  649. buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
  650. buffers_iov[BUF_T_LARGE].iov_len = len;
  651. } else {
  652. if (tx_buffer)
  653. free(tx_buffer);
  654. if (rx_buffer)
  655. free(rx_buffer);
  656. printf("skip large buffer tests, can't alloc\n");
  657. len = 2 * page_sz;
  658. tx_buffer = aligned_alloc(page_sz, len);
  659. rx_buffer = aligned_alloc(page_sz, len);
  660. }
  661. if (!tx_buffer || !rx_buffer) {
  662. fprintf(stderr, "can't allocate buffers\n");
  663. return T_EXIT_FAIL;
  664. }
  665. srand((unsigned)time(NULL));
  666. for (i = 0; i < len; i++)
  667. tx_buffer[i] = i;
  668. memset(rx_buffer, 0, len);
  669. buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
  670. buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
  671. buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
  672. buffers_iov[BUF_T_SMALL].iov_len = 137;
  673. buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
  674. buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
  675. if (len == LARGE_BUF_SIZE) {
  676. void *huge_page;
  677. int off = page_sz + 27;
  678. len = 1U << 22;
  679. huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
  680. MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
  681. -1, 0);
  682. if (huge_page != MAP_FAILED) {
  683. buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
  684. buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
  685. }
  686. }
  687. ret = io_uring_queue_init(32, &ring, 0);
  688. if (ret) {
  689. fprintf(stderr, "queue init failed: %d\n", ret);
  690. return T_EXIT_FAIL;
  691. }
  692. ret = test_basic_send(&ring, sp[0], sp[1]);
  693. if (ret == T_EXIT_SKIP)
  694. return ret;
  695. if (ret) {
  696. fprintf(stderr, "test_basic_send() failed\n");
  697. return T_EXIT_FAIL;
  698. }
  699. has_sendmsg = io_check_zc_sendmsg(&ring);
  700. ret = test_send_faults(sp[0], sp[1]);
  701. if (ret) {
  702. fprintf(stderr, "test_send_faults() failed\n");
  703. return T_EXIT_FAIL;
  704. }
  705. ret = test_invalid_zc(sp);
  706. if (ret) {
  707. fprintf(stderr, "test_invalid_zc() failed\n");
  708. return T_EXIT_FAIL;
  709. }
  710. close(sp[0]);
  711. close(sp[1]);
  712. ret = test_async_addr(&ring);
  713. if (ret) {
  714. fprintf(stderr, "test_async_addr() failed\n");
  715. return T_EXIT_FAIL;
  716. }
  717. ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov));
  718. if (ret == T_SETUP_SKIP) {
  719. fprintf(stderr, "can't register bufs, skip\n");
  720. goto out;
  721. } else if (ret != T_SETUP_OK) {
  722. fprintf(stderr, "buffer registration failed %i\n", ret);
  723. return T_EXIT_FAIL;
  724. }
  725. if (buffers_iov[BUF_T_HUGETLB].iov_base) {
  726. buffers_iov[BUF_T_HUGETLB].iov_base += 13;
  727. buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
  728. }
  729. if (buffers_iov[BUF_T_LARGE].iov_base) {
  730. buffers_iov[BUF_T_LARGE].iov_base += 13;
  731. buffers_iov[BUF_T_LARGE].iov_len -= 26;
  732. }
  733. ret = test_inet_send(&ring);
  734. if (ret) {
  735. fprintf(stderr, "test_inet_send() failed\n");
  736. return T_EXIT_FAIL;
  737. }
  738. out:
  739. io_uring_queue_exit(&ring);
  740. close(sp[0]);
  741. close(sp[1]);
  742. return T_EXIT_PASS;
  743. }