recvsend_bundle-inc.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * Simple test case showing using send and recv bundles with incremental
  5. * buffer ring usage
  6. */
  7. #include <errno.h>
  8. #include <stdio.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <unistd.h>
  12. #include <arpa/inet.h>
  13. #include <sys/types.h>
  14. #include <sys/socket.h>
  15. #include <pthread.h>
  16. #define MSG_SIZE 128
  17. #define NR_MIN_MSGS 4
  18. #define NR_MAX_MSGS 32
  19. #define SEQ_SIZE (MSG_SIZE / sizeof(unsigned long))
  20. static int nr_msgs;
  21. #define RECV_BIDS 8192
  22. #define RECV_BID_MASK (RECV_BIDS - 1)
  23. #include <liburing.h>
  24. enum t_test_result {
  25. T_EXIT_PASS = 0,
  26. T_EXIT_FAIL = 1,
  27. T_EXIT_SKIP = 77,
  28. };
  29. #define PORT 10202
  30. #define HOST "127.0.0.1"
  31. static int use_port = PORT;
  32. #define SEND_BGID 7
  33. #define RECV_BGID 8
  34. static int no_send_mshot;
  35. struct recv_data {
  36. pthread_barrier_t connect;
  37. pthread_barrier_t startup;
  38. pthread_barrier_t barrier;
  39. pthread_barrier_t finish;
  40. unsigned long seq;
  41. int recv_bytes;
  42. int accept_fd;
  43. int abort;
  44. unsigned int max_sends;
  45. int to_eagain;
  46. void *recv_buf;
  47. int send_bundle;
  48. int recv_bundle;
  49. };
  50. static int arm_recv(struct io_uring *ring, struct recv_data *rd)
  51. {
  52. struct io_uring_sqe *sqe;
  53. int ret;
  54. sqe = io_uring_get_sqe(ring);
  55. io_uring_prep_recv_multishot(sqe, rd->accept_fd, NULL, 0, 0);
  56. if (rd->recv_bundle)
  57. sqe->ioprio |= IORING_RECVSEND_BUNDLE;
  58. sqe->buf_group = RECV_BGID;
  59. sqe->flags |= IOSQE_BUFFER_SELECT;
  60. sqe->user_data = 2;
  61. ret = io_uring_submit(ring);
  62. if (ret != 1) {
  63. fprintf(stderr, "submit failed: %d\n", ret);
  64. return 1;
  65. }
  66. return 0;
  67. }
  68. static int recv_prep(struct io_uring *ring, struct recv_data *rd, int *sock)
  69. {
  70. struct sockaddr_in saddr;
  71. int sockfd, ret, val, use_fd;
  72. socklen_t socklen;
  73. memset(&saddr, 0, sizeof(saddr));
  74. saddr.sin_family = AF_INET;
  75. saddr.sin_addr.s_addr = htonl(INADDR_ANY);
  76. saddr.sin_port = htons(use_port);
  77. sockfd = socket(AF_INET, SOCK_STREAM, 0);
  78. if (sockfd < 0) {
  79. perror("socket");
  80. return 1;
  81. }
  82. val = 1;
  83. setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
  84. ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
  85. if (ret < 0) {
  86. perror("bind");
  87. goto err;
  88. }
  89. ret = listen(sockfd, 1);
  90. if (ret < 0) {
  91. perror("listen");
  92. goto err;
  93. }
  94. pthread_barrier_wait(&rd->connect);
  95. socklen = sizeof(saddr);
  96. use_fd = accept(sockfd, (struct sockaddr *)&saddr, &socklen);
  97. if (use_fd < 0) {
  98. perror("accept");
  99. goto err;
  100. }
  101. rd->accept_fd = use_fd;
  102. pthread_barrier_wait(&rd->startup);
  103. pthread_barrier_wait(&rd->barrier);
  104. if (arm_recv(ring, rd))
  105. goto err;
  106. *sock = sockfd;
  107. return 0;
  108. err:
  109. close(sockfd);
  110. return 1;
  111. }
  112. static int verify_seq(struct recv_data *rd, void *verify_ptr, int verify_sz,
  113. int start_bid)
  114. {
  115. unsigned long *seqp;
  116. int seq_size = verify_sz / sizeof(unsigned long);
  117. int i;
  118. seqp = verify_ptr;
  119. for (i = 0; i < seq_size; i++) {
  120. if (rd->seq != *seqp) {
  121. fprintf(stderr, "bid=%d, got seq %lu, wanted %lu, offset %d\n", start_bid, *seqp, rd->seq, i);
  122. return 0;
  123. }
  124. seqp++;
  125. rd->seq++;
  126. }
  127. return 1;
  128. }
  129. static int recv_get_cqe(struct io_uring *ring, struct recv_data *rd,
  130. struct io_uring_cqe **cqe)
  131. {
  132. struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 100000000LL };
  133. int ret;
  134. do {
  135. ret = io_uring_wait_cqe_timeout(ring, cqe, &ts);
  136. if (!ret)
  137. return 0;
  138. if (ret == -ETIME) {
  139. if (rd->abort)
  140. break;
  141. continue;
  142. }
  143. fprintf(stderr, "wait recv: %d\n", ret);
  144. break;
  145. } while (1);
  146. return 1;
  147. }
  148. static int do_recv(struct io_uring *ring, struct recv_data *rd)
  149. {
  150. struct io_uring_cqe *cqe;
  151. void *verify_ptr;
  152. int verify_sz = 0;
  153. int verify_bid = 0;
  154. int bid;
  155. verify_ptr = malloc(rd->recv_bytes);
  156. do {
  157. if (recv_get_cqe(ring, rd, &cqe))
  158. break;
  159. if (cqe->res == -EINVAL) {
  160. fprintf(stdout, "recv not supported, skipping\n");
  161. return 0;
  162. }
  163. if (cqe->res < 0) {
  164. fprintf(stderr, "failed recv cqe: %d\n", cqe->res);
  165. goto err;
  166. }
  167. if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
  168. fprintf(stderr, "no buffer set in recv\n");
  169. goto err;
  170. }
  171. if (!(cqe->flags & IORING_CQE_F_BUF_MORE)) {
  172. fprintf(stderr, "CQE_F_BUF_MORE not set\n");
  173. goto err;
  174. }
  175. bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
  176. if (bid != 0) {
  177. fprintf(stderr, "got bid %d\n", bid);
  178. goto err;
  179. }
  180. if (!(verify_sz % MSG_SIZE)) {
  181. if (!verify_seq(rd, verify_ptr, verify_sz, verify_bid))
  182. goto err;
  183. verify_bid += verify_sz / MSG_SIZE;
  184. verify_bid &= RECV_BID_MASK;
  185. verify_sz = 0;
  186. } else {
  187. memcpy(verify_ptr + verify_sz, rd->recv_buf + (bid * MSG_SIZE), cqe->res);
  188. verify_sz += cqe->res;
  189. }
  190. rd->recv_bytes -= cqe->res;
  191. io_uring_cqe_seen(ring, cqe);
  192. if (!(cqe->flags & IORING_CQE_F_MORE) && rd->recv_bytes) {
  193. if (arm_recv(ring, rd))
  194. goto err;
  195. }
  196. } while (rd->recv_bytes);
  197. if (verify_sz && !(verify_sz % MSG_SIZE) &&
  198. !verify_seq(rd, verify_ptr, verify_sz, verify_bid))
  199. goto err;
  200. pthread_barrier_wait(&rd->finish);
  201. return 0;
  202. err:
  203. pthread_barrier_wait(&rd->finish);
  204. return 1;
  205. }
  206. static void *recv_fn(void *data)
  207. {
  208. struct recv_data *rd = data;
  209. struct io_uring_params p = { };
  210. struct io_uring ring;
  211. struct io_uring_buf_ring *br;
  212. void *buf, *ptr;
  213. int ret, sock;
  214. p.cq_entries = 4096;
  215. p.flags = IORING_SETUP_CQSIZE;
  216. io_uring_queue_init_params(16, &ring, &p);
  217. ret = 0;
  218. if (posix_memalign(&buf, 4096, MSG_SIZE * RECV_BIDS))
  219. goto err;
  220. br = io_uring_setup_buf_ring(&ring, RECV_BIDS, RECV_BGID, IOU_PBUF_RING_INC, &ret);
  221. if (!br) {
  222. fprintf(stderr, "failed setting up recv ring %d\n", ret);
  223. goto err;
  224. }
  225. ptr = buf;
  226. io_uring_buf_ring_add(br, ptr, MSG_SIZE * RECV_BIDS, 0, RECV_BID_MASK, 0);
  227. io_uring_buf_ring_advance(br, 1);
  228. rd->recv_buf = buf;
  229. ret = recv_prep(&ring, rd, &sock);
  230. if (ret) {
  231. fprintf(stderr, "recv_prep failed: %d\n", ret);
  232. goto err;
  233. }
  234. ret = do_recv(&ring, rd);
  235. close(sock);
  236. close(rd->accept_fd);
  237. free(buf);
  238. io_uring_queue_exit(&ring);
  239. err:
  240. return (void *)(intptr_t)ret;
  241. }
  242. static int __do_send_bundle(struct recv_data *rd, struct io_uring *ring, int sockfd)
  243. {
  244. struct io_uring_cqe *cqe;
  245. struct io_uring_sqe *sqe;
  246. int bytes_needed = MSG_SIZE * nr_msgs;
  247. int i, ret;
  248. sqe = io_uring_get_sqe(ring);
  249. io_uring_prep_send_bundle(sqe, sockfd, 0, 0);
  250. sqe->flags |= IOSQE_BUFFER_SELECT;
  251. sqe->buf_group = SEND_BGID;
  252. sqe->user_data = 1;
  253. ret = io_uring_submit(ring);
  254. if (ret != 1)
  255. return 1;
  256. pthread_barrier_wait(&rd->barrier);
  257. for (i = 0; i < nr_msgs; i++) {
  258. ret = io_uring_wait_cqe(ring, &cqe);
  259. if (ret) {
  260. fprintf(stderr, "wait send: %d\n", ret);
  261. return 1;
  262. }
  263. if (!i && cqe->res == -EINVAL) {
  264. rd->abort = 1;
  265. no_send_mshot = 1;
  266. break;
  267. }
  268. if (cqe->res < 0) {
  269. fprintf(stderr, "bad send cqe res: %d\n", cqe->res);
  270. return 1;
  271. }
  272. bytes_needed -= cqe->res;
  273. if (!bytes_needed) {
  274. io_uring_cqe_seen(ring, cqe);
  275. break;
  276. }
  277. if (!(cqe->flags & IORING_CQE_F_MORE)) {
  278. fprintf(stderr, "expected more, but MORE not set\n");
  279. return 1;
  280. }
  281. io_uring_cqe_seen(ring, cqe);
  282. }
  283. return 0;
  284. }
  285. static int __do_send(struct recv_data *rd, struct io_uring *ring, int sockfd)
  286. {
  287. struct io_uring_cqe *cqe;
  288. struct io_uring_sqe *sqe;
  289. int bytes_needed = MSG_SIZE * nr_msgs;
  290. int i, ret;
  291. for (i = 0; i < nr_msgs; i++) {
  292. sqe = io_uring_get_sqe(ring);
  293. io_uring_prep_send(sqe, sockfd, NULL, 0, 0);
  294. sqe->user_data = 10 + i;
  295. sqe->flags |= IOSQE_BUFFER_SELECT;
  296. sqe->buf_group = SEND_BGID;
  297. ret = io_uring_submit(ring);
  298. if (ret != 1)
  299. return 1;
  300. if (!i)
  301. pthread_barrier_wait(&rd->barrier);
  302. ret = io_uring_wait_cqe(ring, &cqe);
  303. if (ret) {
  304. fprintf(stderr, "send wait cqe %d\n", ret);
  305. return 1;
  306. }
  307. if (!i && cqe->res == -EINVAL) {
  308. rd->abort = 1;
  309. no_send_mshot = 1;
  310. break;
  311. }
  312. if (cqe->res != MSG_SIZE) {
  313. fprintf(stderr, "send failed cqe: %d\n", cqe->res);
  314. return 1;
  315. }
  316. if (cqe->res < 0) {
  317. fprintf(stderr, "bad send cqe res: %d\n", cqe->res);
  318. return 1;
  319. }
  320. bytes_needed -= cqe->res;
  321. io_uring_cqe_seen(ring, cqe);
  322. if (!bytes_needed)
  323. break;
  324. }
  325. return 0;
  326. }
  327. static int do_send(struct recv_data *rd)
  328. {
  329. struct sockaddr_in saddr;
  330. struct io_uring ring;
  331. unsigned long seq_buf[SEQ_SIZE], send_seq;
  332. struct io_uring_params p = { };
  333. struct io_uring_buf_ring *br;
  334. int sockfd, ret, len, i;
  335. socklen_t optlen;
  336. void *buf, *ptr;
  337. ret = io_uring_queue_init_params(16, &ring, &p);
  338. if (ret) {
  339. fprintf(stderr, "queue init failed: %d\n", ret);
  340. return 1;
  341. }
  342. if (!(p.features & IORING_FEAT_RECVSEND_BUNDLE)) {
  343. no_send_mshot = 1;
  344. return 0;
  345. }
  346. if (posix_memalign(&buf, 4096, MSG_SIZE * nr_msgs))
  347. return 1;
  348. br = io_uring_setup_buf_ring(&ring, nr_msgs, SEND_BGID, 0, &ret);
  349. if (!br) {
  350. if (ret == -EINVAL) {
  351. fprintf(stderr, "einval on br setup\n");
  352. return 0;
  353. }
  354. fprintf(stderr, "failed setting up send ring %d\n", ret);
  355. return 1;
  356. }
  357. ptr = buf;
  358. for (i = 0; i < nr_msgs; i++) {
  359. io_uring_buf_ring_add(br, ptr, MSG_SIZE, i, nr_msgs - 1, i);
  360. ptr += MSG_SIZE;
  361. }
  362. io_uring_buf_ring_advance(br, nr_msgs);
  363. memset(&saddr, 0, sizeof(saddr));
  364. saddr.sin_family = AF_INET;
  365. saddr.sin_port = htons(use_port);
  366. inet_pton(AF_INET, HOST, &saddr.sin_addr);
  367. sockfd = socket(AF_INET, SOCK_STREAM, 0);
  368. if (sockfd < 0) {
  369. perror("socket");
  370. goto err2;
  371. }
  372. pthread_barrier_wait(&rd->connect);
  373. ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
  374. if (ret < 0) {
  375. perror("connect");
  376. goto err;
  377. }
  378. pthread_barrier_wait(&rd->startup);
  379. optlen = sizeof(len);
  380. len = 1024 * MSG_SIZE;
  381. setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &len, optlen);
  382. /* almost fill queue, leave room for one message */
  383. send_seq = 0;
  384. rd->to_eagain = 0;
  385. while (rd->max_sends && rd->max_sends--) {
  386. for (i = 0; i < SEQ_SIZE; i++)
  387. seq_buf[i] = send_seq++;
  388. ret = send(sockfd, seq_buf, sizeof(seq_buf), MSG_DONTWAIT);
  389. if (ret < 0) {
  390. if (errno == EAGAIN) {
  391. send_seq -= SEQ_SIZE;
  392. break;
  393. }
  394. perror("send");
  395. return 1;
  396. } else if (ret != sizeof(seq_buf)) {
  397. fprintf(stderr, "short %d send\n", ret);
  398. return 1;
  399. }
  400. rd->to_eagain++;
  401. rd->recv_bytes += sizeof(seq_buf);
  402. }
  403. ptr = buf;
  404. for (i = 0; i < nr_msgs; i++) {
  405. unsigned long *pseq = ptr;
  406. int j;
  407. for (j = 0; j < SEQ_SIZE; j++)
  408. pseq[j] = send_seq++;
  409. ptr += MSG_SIZE;
  410. }
  411. /* prepare more messages, sending with bundle */
  412. rd->recv_bytes += (nr_msgs * MSG_SIZE);
  413. if (rd->send_bundle)
  414. ret = __do_send_bundle(rd, &ring, sockfd);
  415. else
  416. ret = __do_send(rd, &ring, sockfd);
  417. if (ret)
  418. goto err;
  419. pthread_barrier_wait(&rd->finish);
  420. close(sockfd);
  421. free(buf);
  422. io_uring_queue_exit(&ring);
  423. return 0;
  424. err:
  425. close(sockfd);
  426. err2:
  427. io_uring_queue_exit(&ring);
  428. pthread_barrier_wait(&rd->finish);
  429. return 1;
  430. }
  431. static int test(int backlog, unsigned int max_sends, int *to_eagain,
  432. int send_bundle, int recv_bundle)
  433. {
  434. pthread_t recv_thread;
  435. struct recv_data rd;
  436. int ret;
  437. void *retval;
  438. memset(&rd, 0, sizeof(rd));
  439. pthread_barrier_init(&rd.connect, NULL, 2);
  440. pthread_barrier_init(&rd.startup, NULL, 2);
  441. pthread_barrier_init(&rd.barrier, NULL, 2);
  442. pthread_barrier_init(&rd.finish, NULL, 2);
  443. rd.max_sends = max_sends;
  444. if (to_eagain)
  445. *to_eagain = 0;
  446. rd.send_bundle = send_bundle;
  447. rd.recv_bundle = recv_bundle;
  448. ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
  449. if (ret) {
  450. fprintf(stderr, "Thread create failed: %d\n", ret);
  451. return 1;
  452. }
  453. ret = do_send(&rd);
  454. if (no_send_mshot)
  455. return 0;
  456. if (ret)
  457. return ret;
  458. pthread_join(recv_thread, &retval);
  459. if (to_eagain)
  460. *to_eagain = rd.to_eagain;
  461. return (intptr_t)retval;
  462. }
  463. static int run_tests(void)
  464. {
  465. int ret, eagain_hit;
  466. nr_msgs = NR_MIN_MSGS;
  467. /* test basic send bundle first */
  468. ret = test(0, 0, NULL, 0, 0);
  469. if (ret) {
  470. fprintf(stderr, "test a failed\n");
  471. return T_EXIT_FAIL;
  472. }
  473. if (no_send_mshot)
  474. return T_EXIT_SKIP;
  475. /* test recv bundle */
  476. ret = test(0, 0, NULL, 0, 1);
  477. if (ret) {
  478. fprintf(stderr, "test b failed\n");
  479. return T_EXIT_FAIL;
  480. }
  481. /* test bundling recv and send */
  482. ret = test(0, 0, NULL, 1, 1);
  483. if (ret) {
  484. fprintf(stderr, "test c failed\n");
  485. return T_EXIT_FAIL;
  486. }
  487. /* test bundling with full socket */
  488. ret = test(1, 1000000, &eagain_hit, 1, 1);
  489. if (ret) {
  490. fprintf(stderr, "test d failed\n");
  491. return T_EXIT_FAIL;
  492. }
  493. /* test bundling with almost full socket */
  494. ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1);
  495. if (ret) {
  496. fprintf(stderr, "test e failed\n");
  497. return T_EXIT_FAIL;
  498. }
  499. /* test recv bundle with almost full socket */
  500. ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1);
  501. if (ret) {
  502. fprintf(stderr, "test f failed\n");
  503. return T_EXIT_FAIL;
  504. }
  505. /* test send bundle with almost full socket */
  506. ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0);
  507. if (ret) {
  508. fprintf(stderr, "test g failed\n");
  509. return T_EXIT_FAIL;
  510. }
  511. /* now repeat the last three tests, but with > FAST_UIOV segments */
  512. nr_msgs = NR_MAX_MSGS;
  513. /* test bundling with almost full socket */
  514. ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 1, 1);
  515. if (ret) {
  516. fprintf(stderr, "test h failed\n");
  517. return T_EXIT_FAIL;
  518. }
  519. /* test recv bundle with almost full socket */
  520. ret = test(1, eagain_hit - (nr_msgs / 2), NULL, 0, 1);
  521. if (ret) {
  522. fprintf(stderr, "test i failed\n");
  523. return T_EXIT_FAIL;
  524. }
  525. /* test send bundle with almost full socket */
  526. ret = test(1, eagain_hit - (nr_msgs / 2), &eagain_hit, 1, 0);
  527. if (ret) {
  528. fprintf(stderr, "test j failed\n");
  529. return T_EXIT_FAIL;
  530. }
  531. return T_EXIT_PASS;
  532. }
  533. static int test_tcp(void)
  534. {
  535. int ret;
  536. ret = run_tests();
  537. if (ret == T_EXIT_FAIL)
  538. fprintf(stderr, "TCP test case failed\n");
  539. return ret;
  540. }
  541. static bool has_pbuf_ring_inc(void)
  542. {
  543. struct io_uring_buf_ring *br;
  544. bool has_pbuf_inc = false;
  545. struct io_uring ring;
  546. void *buf;
  547. int ret;
  548. ret = io_uring_queue_init(1, &ring, 0);
  549. if (ret)
  550. return false;
  551. if (posix_memalign(&buf, 4096, MSG_SIZE * RECV_BIDS))
  552. return false;
  553. br = io_uring_setup_buf_ring(&ring, RECV_BIDS, RECV_BGID, IOU_PBUF_RING_INC, &ret);
  554. if (br) {
  555. has_pbuf_inc = true;
  556. io_uring_unregister_buf_ring(&ring, RECV_BGID);
  557. }
  558. io_uring_queue_exit(&ring);
  559. free(buf);
  560. return has_pbuf_inc;
  561. }
  562. int main(int argc, char *argv[])
  563. {
  564. int ret;
  565. if (argc > 1)
  566. return T_EXIT_SKIP;
  567. if (!has_pbuf_ring_inc())
  568. return T_EXIT_SKIP;
  569. ret = test_tcp();
  570. if (ret != T_EXIT_PASS)
  571. return ret;
  572. return T_EXIT_PASS;
  573. }