send-zerocopy.c 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <stdint.h>
  6. #include <assert.h>
  7. #include <errno.h>
  8. #include <limits.h>
  9. #include <fcntl.h>
  10. #include <unistd.h>
  11. #include <stdbool.h>
  12. #include <string.h>
  13. #include <arpa/inet.h>
  14. #include <linux/if_packet.h>
  15. #include <linux/ipv6.h>
  16. #include <linux/socket.h>
  17. #include <linux/sockios.h>
  18. #include <net/ethernet.h>
  19. #include <net/if.h>
  20. #include <netinet/ip.h>
  21. #include <netinet/in.h>
  22. #include <netinet/ip6.h>
  23. #include <netinet/tcp.h>
  24. #include <netinet/udp.h>
  25. #include <sys/socket.h>
  26. #include <sys/time.h>
  27. #include <sys/resource.h>
  28. #include <sys/un.h>
  29. #include <sys/ioctl.h>
  30. #include <sys/socket.h>
  31. #include <sys/stat.h>
  32. #include <sys/time.h>
  33. #include <sys/types.h>
  34. #include <sys/wait.h>
  35. #include <sys/mman.h>
  36. #include <linux/mman.h>
  37. #include "liburing.h"
  38. #include "helpers.h"
  39. #define MAX_MSG 128
  40. #define HOST "127.0.0.1"
  41. #define HOSTV6 "::1"
  42. #define MAX_IOV 32
  43. #define CORK_REQS 5
  44. #define RX_TAG 10000
  45. #define BUFFER_OFFSET 41
  46. #ifndef ARRAY_SIZE
  47. #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
  48. #endif
  49. enum {
  50. BUF_T_NORMAL,
  51. BUF_T_SMALL,
  52. BUF_T_NONALIGNED,
  53. BUF_T_LARGE,
  54. BUF_T_HUGETLB,
  55. __BUF_NR,
  56. };
  57. /* 32MB, should be enough to trigger a short send */
  58. #define LARGE_BUF_SIZE (1U << 25)
  59. static size_t page_sz;
  60. static char *tx_buffer, *rx_buffer;
  61. static struct iovec buffers_iov[__BUF_NR];
  62. static bool has_sendzc;
  63. static bool has_sendmsg;
  64. static bool hit_enomem;
  65. static int probe_zc_support(void)
  66. {
  67. struct io_uring ring;
  68. struct io_uring_probe *p;
  69. int ret;
  70. has_sendzc = has_sendmsg = false;
  71. ret = io_uring_queue_init(1, &ring, 0);
  72. if (ret)
  73. return -1;
  74. p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
  75. if (!p)
  76. return -1;
  77. ret = io_uring_register_probe(&ring, p, 256);
  78. if (ret)
  79. return -1;
  80. has_sendzc = p->ops_len > IORING_OP_SEND_ZC;
  81. has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC;
  82. io_uring_queue_exit(&ring);
  83. free(p);
  84. return 0;
  85. }
  86. static bool check_cq_empty(struct io_uring *ring)
  87. {
  88. struct io_uring_cqe *cqe = NULL;
  89. int ret;
  90. ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
  91. return ret == -EAGAIN;
  92. }
  93. static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
  94. {
  95. struct io_uring_sqe *sqe;
  96. struct io_uring_cqe *cqe;
  97. int msg_flags = 0;
  98. unsigned zc_flags = 0;
  99. int payload_size = 100;
  100. int ret;
  101. sqe = io_uring_get_sqe(ring);
  102. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
  103. msg_flags, zc_flags);
  104. sqe->user_data = 1;
  105. ret = io_uring_submit(ring);
  106. assert(ret == 1);
  107. ret = io_uring_wait_cqe(ring, &cqe);
  108. assert(!ret && cqe->user_data == 1);
  109. if (cqe->res != payload_size) {
  110. fprintf(stderr, "send failed %i\n", cqe->res);
  111. return T_EXIT_FAIL;
  112. }
  113. assert(cqe->flags & IORING_CQE_F_MORE);
  114. io_uring_cqe_seen(ring, cqe);
  115. ret = io_uring_wait_cqe(ring, &cqe);
  116. assert(!ret);
  117. assert(cqe->user_data == 1);
  118. assert(cqe->flags & IORING_CQE_F_NOTIF);
  119. assert(!(cqe->flags & IORING_CQE_F_MORE));
  120. io_uring_cqe_seen(ring, cqe);
  121. assert(check_cq_empty(ring));
  122. ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC);
  123. assert(ret == payload_size);
  124. return T_EXIT_PASS;
  125. }
  126. static int test_send_faults_check(struct io_uring *ring, int expected)
  127. {
  128. struct io_uring_cqe *cqe;
  129. int ret, nr_cqes = 0;
  130. bool more = true;
  131. while (more) {
  132. nr_cqes++;
  133. ret = io_uring_wait_cqe(ring, &cqe);
  134. assert(!ret);
  135. assert(cqe->user_data == 1);
  136. if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) {
  137. fprintf(stderr, "test_send_faults_check notif came first\n");
  138. return -1;
  139. }
  140. if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
  141. if (cqe->res != expected) {
  142. fprintf(stderr, "invalid cqe res %i vs expected %i, "
  143. "user_data %i\n",
  144. cqe->res, expected, (int)cqe->user_data);
  145. return -1;
  146. }
  147. } else {
  148. if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
  149. fprintf(stderr, "invalid notif cqe %i %i\n",
  150. cqe->res, cqe->flags);
  151. return -1;
  152. }
  153. }
  154. more = cqe->flags & IORING_CQE_F_MORE;
  155. io_uring_cqe_seen(ring, cqe);
  156. }
  157. if (nr_cqes > 2) {
  158. fprintf(stderr, "test_send_faults_check() too many CQEs %i\n",
  159. nr_cqes);
  160. return -1;
  161. }
  162. assert(check_cq_empty(ring));
  163. return 0;
  164. }
  165. static int test_send_faults(int sock_tx, int sock_rx)
  166. {
  167. struct io_uring_sqe *sqe;
  168. int msg_flags = 0;
  169. unsigned zc_flags = 0;
  170. int ret, payload_size = 100;
  171. struct io_uring ring;
  172. ret = io_uring_queue_init(32, &ring, 0);
  173. if (ret) {
  174. fprintf(stderr, "queue init failed: %d\n", ret);
  175. return -1;
  176. }
  177. /* invalid buffer */
  178. sqe = io_uring_get_sqe(&ring);
  179. io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
  180. msg_flags, zc_flags);
  181. sqe->user_data = 1;
  182. ret = io_uring_submit(&ring);
  183. assert(ret == 1);
  184. ret = test_send_faults_check(&ring, -EFAULT);
  185. if (ret) {
  186. fprintf(stderr, "test_send_faults with invalid buf failed\n");
  187. return -1;
  188. }
  189. /* invalid address */
  190. sqe = io_uring_get_sqe(&ring);
  191. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
  192. msg_flags, zc_flags);
  193. io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
  194. sizeof(struct sockaddr_in6));
  195. sqe->user_data = 1;
  196. ret = io_uring_submit(&ring);
  197. assert(ret == 1);
  198. ret = test_send_faults_check(&ring, -EFAULT);
  199. if (ret) {
  200. fprintf(stderr, "test_send_faults with invalid addr failed\n");
  201. return -1;
  202. }
  203. /* invalid send/recv flags */
  204. sqe = io_uring_get_sqe(&ring);
  205. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
  206. msg_flags, ~0U);
  207. sqe->user_data = 1;
  208. ret = io_uring_submit(&ring);
  209. assert(ret == 1);
  210. ret = test_send_faults_check(&ring, -EINVAL);
  211. if (ret) {
  212. fprintf(stderr, "test_send_faults with invalid flags failed\n");
  213. return -1;
  214. }
  215. return T_EXIT_PASS;
  216. }
  217. static int create_socketpair_ip(struct sockaddr_storage *addr,
  218. int *sock_client, int *sock_server,
  219. bool ipv6, bool client_connect,
  220. bool msg_zc, bool tcp)
  221. {
  222. socklen_t addr_size;
  223. int family, sock, listen_sock = -1;
  224. int ret;
  225. memset(addr, 0, sizeof(*addr));
  226. if (ipv6) {
  227. struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
  228. family = AF_INET6;
  229. saddr->sin6_family = family;
  230. saddr->sin6_port = htons(0);
  231. addr_size = sizeof(*saddr);
  232. } else {
  233. struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
  234. family = AF_INET;
  235. saddr->sin_family = family;
  236. saddr->sin_port = htons(0);
  237. saddr->sin_addr.s_addr = htonl(INADDR_ANY);
  238. addr_size = sizeof(*saddr);
  239. }
  240. /* server sock setup */
  241. if (tcp) {
  242. sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP);
  243. } else {
  244. sock = *sock_server = socket(family, SOCK_DGRAM, 0);
  245. }
  246. if (sock < 0) {
  247. perror("socket");
  248. return 1;
  249. }
  250. ret = bind(sock, (struct sockaddr *)addr, addr_size);
  251. if (ret < 0) {
  252. perror("bind");
  253. return 1;
  254. }
  255. ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
  256. if (ret < 0) {
  257. fprintf(stderr, "getsockname failed %i\n", errno);
  258. return 1;
  259. }
  260. if (tcp) {
  261. ret = listen(sock, 128);
  262. assert(ret != -1);
  263. }
  264. if (ipv6) {
  265. struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
  266. inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr));
  267. } else {
  268. struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
  269. inet_pton(AF_INET, HOST, &saddr->sin_addr);
  270. }
  271. /* client sock setup */
  272. if (tcp) {
  273. *sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP);
  274. assert(client_connect);
  275. } else {
  276. *sock_client = socket(family, SOCK_DGRAM, 0);
  277. }
  278. if (*sock_client < 0) {
  279. perror("socket");
  280. return 1;
  281. }
  282. if (client_connect) {
  283. ret = connect(*sock_client, (struct sockaddr *)addr, addr_size);
  284. if (ret < 0) {
  285. perror("connect");
  286. return 1;
  287. }
  288. }
  289. if (msg_zc) {
  290. #ifdef SO_ZEROCOPY
  291. int val = 1;
  292. /*
  293. * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc.
  294. * It's only here to test interactions with MSG_ZEROCOPY.
  295. */
  296. if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
  297. perror("setsockopt zc");
  298. return 1;
  299. }
  300. #else
  301. fprintf(stderr, "no SO_ZEROCOPY\n");
  302. return 1;
  303. #endif
  304. }
  305. if (tcp) {
  306. *sock_server = accept(listen_sock, NULL, NULL);
  307. if (!*sock_server) {
  308. fprintf(stderr, "can't accept\n");
  309. return 1;
  310. }
  311. close(listen_sock);
  312. }
  313. return 0;
  314. }
  315. struct send_conf {
  316. bool fixed_buf;
  317. bool mix_register;
  318. bool cork;
  319. bool force_async;
  320. bool use_sendmsg;
  321. bool tcp;
  322. bool zc;
  323. bool iovec;
  324. bool long_iovec;
  325. bool poll_first;
  326. int buf_index;
  327. struct sockaddr_storage *addr;
  328. };
  329. static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
  330. struct send_conf *conf)
  331. {
  332. struct iovec iov[MAX_IOV];
  333. struct msghdr msghdr[CORK_REQS];
  334. const unsigned zc_flags = 0;
  335. struct io_uring_sqe *sqe;
  336. struct io_uring_cqe *cqe;
  337. int nr_reqs = conf->cork ? CORK_REQS : 1;
  338. int i, ret, nr_cqes, addr_len = 0;
  339. size_t send_size = buffers_iov[conf->buf_index].iov_len;
  340. size_t chunk_size = send_size / nr_reqs;
  341. size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
  342. char *buf = buffers_iov[conf->buf_index].iov_base;
  343. assert(MAX_IOV >= CORK_REQS);
  344. if (conf->addr) {
  345. sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
  346. addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
  347. sizeof(struct sockaddr_in6);
  348. }
  349. memset(rx_buffer, 0, send_size);
  350. for (i = 0; i < nr_reqs; i++) {
  351. bool real_fixed_buf = conf->fixed_buf;
  352. size_t cur_size = chunk_size;
  353. int msg_flags = MSG_WAITALL;
  354. if (conf->mix_register)
  355. real_fixed_buf = rand() & 1;
  356. if (i != nr_reqs - 1)
  357. msg_flags |= MSG_MORE;
  358. else
  359. cur_size = chunk_size_last;
  360. sqe = io_uring_get_sqe(ring);
  361. if (!conf->use_sendmsg) {
  362. if (conf->zc) {
  363. io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
  364. cur_size, msg_flags, zc_flags);
  365. } else {
  366. io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
  367. cur_size, msg_flags);
  368. }
  369. if (real_fixed_buf) {
  370. sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
  371. sqe->buf_index = conf->buf_index;
  372. }
  373. if (conf->addr)
  374. io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
  375. addr_len);
  376. } else {
  377. struct iovec *io;
  378. int iov_len;
  379. if (conf->zc)
  380. io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
  381. else
  382. io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
  383. if (!conf->iovec) {
  384. io = &iov[i];
  385. iov_len = 1;
  386. iov[i].iov_len = cur_size;
  387. iov[i].iov_base = buf + i * chunk_size;
  388. } else {
  389. char *it = buf;
  390. int j;
  391. assert(nr_reqs == 1);
  392. iov_len = conf->long_iovec ? MAX_IOV : 4;
  393. io = iov;
  394. for (j = 0; j < iov_len; j++)
  395. io[j].iov_len = 1;
  396. /* first want to be easily advanced */
  397. io[0].iov_base = it;
  398. it += io[0].iov_len;
  399. /* this should cause retry */
  400. io[1].iov_len = chunk_size - iov_len + 1;
  401. io[1].iov_base = it;
  402. it += io[1].iov_len;
  403. /* fill the rest */
  404. for (j = 2; j < iov_len; j++) {
  405. io[j].iov_base = it;
  406. it += io[j].iov_len;
  407. }
  408. }
  409. memset(&msghdr[i], 0, sizeof(msghdr[i]));
  410. msghdr[i].msg_iov = io;
  411. msghdr[i].msg_iovlen = iov_len;
  412. if (conf->addr) {
  413. msghdr[i].msg_name = conf->addr;
  414. msghdr[i].msg_namelen = addr_len;
  415. }
  416. }
  417. sqe->user_data = i;
  418. if (conf->force_async)
  419. sqe->flags |= IOSQE_ASYNC;
  420. if (conf->poll_first)
  421. sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
  422. if (i != nr_reqs - 1)
  423. sqe->flags |= IOSQE_IO_LINK;
  424. }
  425. sqe = io_uring_get_sqe(ring);
  426. io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL);
  427. sqe->user_data = RX_TAG;
  428. ret = io_uring_submit(ring);
  429. if (ret != nr_reqs + 1) {
  430. fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs);
  431. return 1;
  432. }
  433. nr_cqes = nr_reqs + 1;
  434. for (i = 0; i < nr_cqes; i++) {
  435. int expected = chunk_size;
  436. ret = io_uring_wait_cqe(ring, &cqe);
  437. if (ret) {
  438. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  439. return 1;
  440. }
  441. if (cqe->user_data == RX_TAG) {
  442. if (cqe->res != send_size) {
  443. fprintf(stderr, "rx failed res: %i, expected %i\n",
  444. cqe->res, (int)send_size);
  445. return 1;
  446. }
  447. io_uring_cqe_seen(ring, cqe);
  448. continue;
  449. }
  450. if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
  451. fprintf(stderr, "unexpected cflags %i res %i\n",
  452. cqe->flags, cqe->res);
  453. return 1;
  454. }
  455. if (cqe->user_data >= nr_reqs) {
  456. fprintf(stderr, "invalid user_data %lu\n",
  457. (unsigned long)cqe->user_data);
  458. return 1;
  459. }
  460. if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
  461. if (cqe->flags & IORING_CQE_F_MORE)
  462. nr_cqes++;
  463. if (cqe->user_data == nr_reqs - 1)
  464. expected = chunk_size_last;
  465. if (cqe->res != expected) {
  466. if (cqe->res == -ENOMEM) {
  467. if (!hit_enomem) {
  468. fprintf(stderr, "Hit -ENOMEM. "
  469. "Increase ulimit -l "
  470. "limit for a complete "
  471. "test run. Skipping "
  472. "parts.\n");
  473. hit_enomem = 1;
  474. }
  475. return 0;
  476. }
  477. fprintf(stderr, "invalid cqe->res %d expected %d\n",
  478. cqe->res, expected);
  479. return 1;
  480. }
  481. }
  482. io_uring_cqe_seen(ring, cqe);
  483. }
  484. for (i = 0; i < send_size; i++) {
  485. if (buf[i] != rx_buffer[i]) {
  486. fprintf(stderr, "botched data, first mismated byte %i, "
  487. "%u vs %u\n", i, buf[i], rx_buffer[i]);
  488. return 1;
  489. }
  490. }
  491. return 0;
  492. }
  493. static int test_inet_send(struct io_uring *ring)
  494. {
  495. struct send_conf conf;
  496. struct sockaddr_storage addr;
  497. int sock_client = -1, sock_server = -1;
  498. int ret, j, i;
  499. int buf_index;
  500. for (j = 0; j < 32; j++) {
  501. bool ipv6 = j & 1;
  502. bool client_connect = j & 2;
  503. bool msg_zc_set = j & 4;
  504. bool tcp = j & 8;
  505. bool swap_sockets = j & 16;
  506. if (tcp && !client_connect)
  507. continue;
  508. if (swap_sockets && !tcp)
  509. continue;
  510. #ifndef SO_ZEROCOPY
  511. if (msg_zc_set)
  512. continue;
  513. #endif
  514. ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
  515. client_connect, msg_zc_set, tcp);
  516. if (ret) {
  517. fprintf(stderr, "sock prep failed %d\n", ret);
  518. return 1;
  519. }
  520. if (swap_sockets) {
  521. int tmp_sock = sock_client;
  522. sock_client = sock_server;
  523. sock_server = tmp_sock;
  524. }
  525. for (i = 0; i < 1024; i++) {
  526. bool regbuf;
  527. conf.use_sendmsg = i & 1;
  528. conf.poll_first = i & 2;
  529. conf.fixed_buf = i & 4;
  530. conf.addr = (i & 8) ? &addr : NULL;
  531. conf.cork = i & 16;
  532. conf.mix_register = i & 32;
  533. conf.force_async = i & 64;
  534. conf.zc = i & 128;
  535. conf.iovec = i & 256;
  536. conf.long_iovec = i & 512;
  537. conf.tcp = tcp;
  538. regbuf = conf.mix_register || conf.fixed_buf;
  539. if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
  540. continue;
  541. if (!conf.zc) {
  542. if (regbuf)
  543. continue;
  544. /*
  545. * Non zerocopy send w/ addr was added together with sendmsg_zc,
  546. * skip if we the kernel doesn't support it.
  547. */
  548. if (conf.addr && !has_sendmsg)
  549. continue;
  550. }
  551. if (tcp && (conf.cork || conf.addr))
  552. continue;
  553. if (conf.mix_register && (!conf.cork || conf.fixed_buf))
  554. continue;
  555. if (!client_connect && conf.addr == NULL)
  556. continue;
  557. if (conf.use_sendmsg && (regbuf || !has_sendmsg))
  558. continue;
  559. if (msg_zc_set && !conf.zc)
  560. continue;
  561. for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
  562. size_t len = buffers_iov[buf_index].iov_len;
  563. if (!buffers_iov[buf_index].iov_base)
  564. continue;
  565. /* UDP IPv4 max datagram size is under 64K */
  566. if (!tcp && len > (1U << 15))
  567. continue;
  568. conf.buf_index = buf_index;
  569. ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
  570. if (ret) {
  571. fprintf(stderr, "send failed fixed buf %i, "
  572. "conn %i, addr %i, cork %i\n",
  573. conf.fixed_buf, client_connect,
  574. !!conf.addr, conf.cork);
  575. return 1;
  576. }
  577. }
  578. }
  579. close(sock_client);
  580. close(sock_server);
  581. }
  582. return 0;
  583. }
  584. static int test_async_addr(struct io_uring *ring)
  585. {
  586. struct io_uring_sqe *sqe;
  587. struct io_uring_cqe *cqe;
  588. struct sockaddr_storage addr;
  589. int sock_tx = -1, sock_rx = -1;
  590. struct __kernel_timespec ts;
  591. int ret;
  592. ts.tv_sec = 1;
  593. ts.tv_nsec = 0;
  594. ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false);
  595. if (ret) {
  596. fprintf(stderr, "sock prep failed %d\n", ret);
  597. return 1;
  598. }
  599. sqe = io_uring_get_sqe(ring);
  600. io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS);
  601. sqe->user_data = 1;
  602. sqe->flags |= IOSQE_IO_LINK;
  603. sqe = io_uring_get_sqe(ring);
  604. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0);
  605. sqe->user_data = 2;
  606. io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr,
  607. sizeof(struct sockaddr_in6));
  608. ret = io_uring_submit(ring);
  609. assert(ret == 2);
  610. memset(&addr, 0, sizeof(addr));
  611. ret = io_uring_wait_cqe(ring, &cqe);
  612. if (ret) {
  613. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  614. return 1;
  615. }
  616. if (cqe->user_data != 1 || cqe->res != -ETIME) {
  617. fprintf(stderr, "invalid timeout res %i %i\n",
  618. (int)cqe->user_data, cqe->res);
  619. return 1;
  620. }
  621. io_uring_cqe_seen(ring, cqe);
  622. ret = io_uring_wait_cqe(ring, &cqe);
  623. if (ret) {
  624. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  625. return 1;
  626. }
  627. if (cqe->user_data != 2 || cqe->res != 1) {
  628. fprintf(stderr, "invalid send %i %i\n",
  629. (int)cqe->user_data, cqe->res);
  630. return 1;
  631. }
  632. io_uring_cqe_seen(ring, cqe);
  633. ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC);
  634. assert(ret == 1);
  635. ret = io_uring_wait_cqe(ring, &cqe);
  636. if (ret) {
  637. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  638. return 1;
  639. }
  640. assert(cqe->flags & IORING_CQE_F_NOTIF);
  641. io_uring_cqe_seen(ring, cqe);
  642. close(sock_tx);
  643. close(sock_rx);
  644. return 0;
  645. }
  646. static int test_sendzc_report(struct io_uring *ring)
  647. {
  648. struct io_uring_sqe *sqe;
  649. struct io_uring_cqe *cqe;
  650. struct sockaddr_storage addr;
  651. int sock_tx, sock_rx;
  652. int ret;
  653. ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, true, false, true);
  654. if (ret) {
  655. fprintf(stderr, "sock prep failed %d\n", ret);
  656. return 1;
  657. }
  658. sqe = io_uring_get_sqe(ring);
  659. io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0,
  660. IORING_SEND_ZC_REPORT_USAGE);
  661. ret = io_uring_submit(ring);
  662. if (ret != 1) {
  663. fprintf(stderr, "io_uring_submit failed %i\n", ret);
  664. return 1;
  665. }
  666. ret = io_uring_wait_cqe(ring, &cqe);
  667. if (ret) {
  668. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  669. return 1;
  670. }
  671. if (cqe->res != 1 && cqe->res != -EINVAL) {
  672. fprintf(stderr, "sendzc report failed %u\n", cqe->res);
  673. return 1;
  674. }
  675. if (!(cqe->flags & IORING_CQE_F_MORE)) {
  676. fprintf(stderr, "expected notification %i\n", cqe->res);
  677. return 1;
  678. }
  679. io_uring_cqe_seen(ring, cqe);
  680. ret = io_uring_wait_cqe(ring, &cqe);
  681. if (ret) {
  682. fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
  683. return 1;
  684. }
  685. if (cqe->flags & IORING_CQE_F_MORE) {
  686. fprintf(stderr, "F_MORE after notification\n");
  687. return 1;
  688. }
  689. io_uring_cqe_seen(ring, cqe);
  690. close(sock_tx);
  691. close(sock_rx);
  692. return 0;
  693. }
  694. /* see also send_recv.c:test_invalid */
  695. static int test_invalid_zc(int fds[2])
  696. {
  697. struct io_uring ring;
  698. int ret;
  699. struct io_uring_cqe *cqe;
  700. struct io_uring_sqe *sqe;
  701. bool notif = false;
  702. if (!has_sendmsg)
  703. return 0;
  704. ret = t_create_ring(8, &ring, 0);
  705. if (ret)
  706. return ret;
  707. sqe = io_uring_get_sqe(&ring);
  708. io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
  709. sqe->opcode = IORING_OP_SENDMSG_ZC;
  710. sqe->flags |= IOSQE_ASYNC;
  711. ret = io_uring_submit(&ring);
  712. if (ret != 1) {
  713. fprintf(stderr, "submit failed %i\n", ret);
  714. return ret;
  715. }
  716. ret = io_uring_wait_cqe(&ring, &cqe);
  717. if (ret)
  718. return 1;
  719. if (cqe->flags & IORING_CQE_F_MORE)
  720. notif = true;
  721. io_uring_cqe_seen(&ring, cqe);
  722. if (notif) {
  723. ret = io_uring_wait_cqe(&ring, &cqe);
  724. if (ret)
  725. return 1;
  726. io_uring_cqe_seen(&ring, cqe);
  727. }
  728. io_uring_queue_exit(&ring);
  729. return 0;
  730. }
  731. static int run_basic_tests(void)
  732. {
  733. struct sockaddr_storage addr;
  734. int ret, i, sp[2];
  735. /* create TCP IPv6 pair */
  736. ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
  737. if (ret) {
  738. fprintf(stderr, "sock prep failed %d\n", ret);
  739. return -1;
  740. }
  741. for (i = 0; i < 2; i++) {
  742. struct io_uring ring;
  743. unsigned ring_flags = 0;
  744. if (i & 1)
  745. ring_flags |= IORING_SETUP_DEFER_TASKRUN;
  746. ret = io_uring_queue_init(32, &ring, ring_flags);
  747. if (ret) {
  748. if (ret == -EINVAL)
  749. continue;
  750. fprintf(stderr, "queue init failed: %d\n", ret);
  751. return -1;
  752. }
  753. ret = test_basic_send(&ring, sp[0], sp[1]);
  754. if (ret) {
  755. fprintf(stderr, "test_basic_send() failed\n");
  756. return -1;
  757. }
  758. ret = test_send_faults(sp[0], sp[1]);
  759. if (ret) {
  760. fprintf(stderr, "test_send_faults() failed\n");
  761. return -1;
  762. }
  763. ret = test_invalid_zc(sp);
  764. if (ret) {
  765. fprintf(stderr, "test_invalid_zc() failed\n");
  766. return -1;
  767. }
  768. ret = test_async_addr(&ring);
  769. if (ret) {
  770. fprintf(stderr, "test_async_addr() failed\n");
  771. return T_EXIT_FAIL;
  772. }
  773. ret = test_sendzc_report(&ring);
  774. if (ret) {
  775. fprintf(stderr, "test_sendzc_report() failed\n");
  776. return T_EXIT_FAIL;
  777. }
  778. io_uring_queue_exit(&ring);
  779. }
  780. close(sp[0]);
  781. close(sp[1]);
  782. return 0;
  783. }
  784. int main(int argc, char *argv[])
  785. {
  786. size_t len;
  787. int ret, i;
  788. if (argc > 1)
  789. return T_EXIT_SKIP;
  790. ret = probe_zc_support();
  791. if (ret) {
  792. printf("probe failed\n");
  793. return T_EXIT_FAIL;
  794. }
  795. if (!has_sendzc) {
  796. printf("no IORING_OP_SEND_ZC support, skip\n");
  797. return T_EXIT_SKIP;
  798. }
  799. page_sz = sysconf(_SC_PAGESIZE);
  800. len = LARGE_BUF_SIZE;
  801. tx_buffer = aligned_alloc(page_sz, len);
  802. rx_buffer = aligned_alloc(page_sz, len);
  803. if (tx_buffer && rx_buffer) {
  804. buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
  805. buffers_iov[BUF_T_LARGE].iov_len = len;
  806. } else {
  807. if (tx_buffer)
  808. free(tx_buffer);
  809. if (rx_buffer)
  810. free(rx_buffer);
  811. printf("skip large buffer tests, can't alloc\n");
  812. len = 2 * page_sz;
  813. tx_buffer = aligned_alloc(page_sz, len);
  814. rx_buffer = aligned_alloc(page_sz, len);
  815. }
  816. if (!tx_buffer || !rx_buffer) {
  817. fprintf(stderr, "can't allocate buffers\n");
  818. return T_EXIT_FAIL;
  819. }
  820. srand((unsigned)time(NULL));
  821. for (i = 0; i < len; i++)
  822. tx_buffer[i] = i;
  823. memset(rx_buffer, 0, len);
  824. buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
  825. buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
  826. buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
  827. buffers_iov[BUF_T_SMALL].iov_len = 137;
  828. buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
  829. buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
  830. if (len == LARGE_BUF_SIZE) {
  831. void *huge_page;
  832. int off = page_sz + 27;
  833. len = 1U << 22;
  834. huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
  835. MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
  836. -1, 0);
  837. if (huge_page != MAP_FAILED) {
  838. buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
  839. buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
  840. }
  841. }
  842. ret = run_basic_tests();
  843. if (ret)
  844. return T_EXIT_FAIL;
  845. for (i = 0; i < 2; i++) {
  846. struct io_uring ring;
  847. unsigned ring_flags = 0;
  848. if (i & 1)
  849. ring_flags |= IORING_SETUP_SINGLE_ISSUER |
  850. IORING_SETUP_DEFER_TASKRUN;
  851. ret = io_uring_queue_init(32, &ring, ring_flags);
  852. if (ret) {
  853. if (ret == -EINVAL)
  854. continue;
  855. fprintf(stderr, "queue init failed: %d\n", ret);
  856. return -1;
  857. }
  858. ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov));
  859. if (ret == T_SETUP_SKIP) {
  860. fprintf(stderr, "can't register bufs, skip\n");
  861. goto out;
  862. } else if (ret != T_SETUP_OK) {
  863. fprintf(stderr, "buffer registration failed %i\n", ret);
  864. return T_EXIT_FAIL;
  865. }
  866. if (buffers_iov[BUF_T_HUGETLB].iov_base) {
  867. buffers_iov[BUF_T_HUGETLB].iov_base += 13;
  868. buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
  869. }
  870. if (buffers_iov[BUF_T_LARGE].iov_base) {
  871. buffers_iov[BUF_T_LARGE].iov_base += 13;
  872. buffers_iov[BUF_T_LARGE].iov_len -= 26;
  873. }
  874. ret = test_inet_send(&ring);
  875. if (ret) {
  876. fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n",
  877. ring_flags & IORING_SETUP_DEFER_TASKRUN);
  878. return T_EXIT_FAIL;
  879. }
  880. if (buffers_iov[BUF_T_HUGETLB].iov_base) {
  881. buffers_iov[BUF_T_HUGETLB].iov_base -= 13;
  882. buffers_iov[BUF_T_HUGETLB].iov_len += 26;
  883. }
  884. if (buffers_iov[BUF_T_LARGE].iov_base) {
  885. buffers_iov[BUF_T_LARGE].iov_base -= 13;
  886. buffers_iov[BUF_T_LARGE].iov_len += 26;
  887. }
  888. out:
  889. io_uring_queue_exit(&ring);
  890. }
  891. return T_EXIT_PASS;
  892. }