cq-overflow.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * Description: run various CQ ring overflow tests
  5. *
  6. */
  7. #include <errno.h>
  8. #include <stdio.h>
  9. #include <unistd.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <fcntl.h>
  13. #include <assert.h>
  14. #include "helpers.h"
  15. #include "liburing.h"
  16. #define FILE_SIZE (256 * 1024)
  17. #define BS 4096
  18. #define BUFFERS (FILE_SIZE / BS)
  19. static struct iovec *vecs;
  20. #define ENTRIES 8
  21. /*
  22. * io_uring has rare cases where CQEs are lost.
  23. * This happens when there is no space in the CQ ring, and also there is no
  24. * GFP_ATOMIC memory available. In reality this probably means that the process
  25. * is about to be killed as many other things might start failing, but we still
  26. * want to test that liburing and the kernel deal with this properly. The fault
  27. * injection framework allows us to test this scenario. Unfortunately this
  28. * requires some system wide changes and so we do not enable this by default.
  29. * The tests in this file should work in both cases (where overflows are queued
  30. * and where they are dropped) on recent kernels.
  31. *
  32. * In order to test dropped CQEs you should enable fault injection in the kernel
  33. * config:
  34. *
  35. * CONFIG_FAULT_INJECTION=y
  36. * CONFIG_FAILSLAB=y
  37. * CONFIG_FAULT_INJECTION_DEBUG_FS=y
  38. *
  39. * and then run the test as follows:
  40. * echo Y > /sys/kernel/debug/failslab/task-filter
  41. * echo 100 > /sys/kernel/debug/failslab/probability
  42. * echo 0 > /sys/kernel/debug/failslab/verbose
  43. * echo 100000 > /sys/kernel/debug/failslab/times
  44. * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
  45. */
  46. static int test_io(const char *file, unsigned long usecs, unsigned *drops,
  47. int fault)
  48. {
  49. struct io_uring_sqe *sqe;
  50. struct io_uring_cqe *cqe;
  51. struct io_uring_params p;
  52. unsigned reaped, total;
  53. struct io_uring ring;
  54. int nodrop, i, fd, ret;
  55. bool cqe_dropped = false;
  56. fd = open(file, O_RDONLY | O_DIRECT);
  57. if (fd < 0) {
  58. if (errno == EINVAL)
  59. return T_EXIT_SKIP;
  60. perror("file open");
  61. return T_EXIT_FAIL;
  62. }
  63. memset(&p, 0, sizeof(p));
  64. ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
  65. if (ret) {
  66. close(fd);
  67. fprintf(stderr, "ring create failed: %d\n", ret);
  68. return T_EXIT_FAIL;
  69. }
  70. nodrop = 0;
  71. if (p.features & IORING_FEAT_NODROP)
  72. nodrop = 1;
  73. total = 0;
  74. for (i = 0; i < BUFFERS / 2; i++) {
  75. off_t offset;
  76. sqe = io_uring_get_sqe(&ring);
  77. if (!sqe) {
  78. fprintf(stderr, "sqe get failed\n");
  79. goto err;
  80. }
  81. offset = BS * (rand() % BUFFERS);
  82. if (fault && i == ENTRIES + 4) {
  83. free(vecs[i].iov_base);
  84. vecs[i].iov_base = NULL;
  85. }
  86. io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
  87. ret = io_uring_submit(&ring);
  88. if (nodrop && ret == -EBUSY) {
  89. *drops = 1;
  90. total = i;
  91. break;
  92. } else if (ret != 1) {
  93. fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
  94. total = i;
  95. break;
  96. }
  97. total++;
  98. }
  99. if (*drops)
  100. goto reap_it;
  101. usleep(usecs);
  102. for (i = total; i < BUFFERS; i++) {
  103. off_t offset;
  104. sqe = io_uring_get_sqe(&ring);
  105. if (!sqe) {
  106. fprintf(stderr, "sqe get failed\n");
  107. goto err;
  108. }
  109. offset = BS * (rand() % BUFFERS);
  110. io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
  111. ret = io_uring_submit(&ring);
  112. if (nodrop && ret == -EBUSY) {
  113. *drops = 1;
  114. break;
  115. } else if (ret != 1) {
  116. fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
  117. break;
  118. }
  119. total++;
  120. }
  121. reap_it:
  122. reaped = 0;
  123. do {
  124. if (nodrop && !cqe_dropped) {
  125. /* nodrop should never lose events unless cqe_dropped */
  126. if (reaped == total)
  127. break;
  128. } else {
  129. if (reaped + *ring.cq.koverflow == total)
  130. break;
  131. }
  132. ret = io_uring_wait_cqe(&ring, &cqe);
  133. if (nodrop && ret == -EBADR) {
  134. cqe_dropped = true;
  135. continue;
  136. } else if (ret) {
  137. fprintf(stderr, "wait_cqe=%d\n", ret);
  138. goto err;
  139. }
  140. if (cqe->res != BS) {
  141. if (!(fault && cqe->res == -EFAULT)) {
  142. fprintf(stderr, "cqe res %d, wanted %d\n",
  143. cqe->res, BS);
  144. goto err;
  145. }
  146. }
  147. io_uring_cqe_seen(&ring, cqe);
  148. reaped++;
  149. } while (1);
  150. if (!io_uring_peek_cqe(&ring, &cqe)) {
  151. fprintf(stderr, "found unexpected completion\n");
  152. goto err;
  153. }
  154. if (!nodrop || cqe_dropped) {
  155. *drops = *ring.cq.koverflow;
  156. } else if (*ring.cq.koverflow) {
  157. fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
  158. goto err;
  159. }
  160. io_uring_queue_exit(&ring);
  161. close(fd);
  162. return T_EXIT_PASS;
  163. err:
  164. if (fd != -1)
  165. close(fd);
  166. io_uring_queue_exit(&ring);
  167. return T_EXIT_SKIP;
  168. }
  169. static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
  170. {
  171. struct io_uring_cqe *cqe;
  172. int i, ret = 0, seq = 0;
  173. unsigned int start_overflow = *ring->cq.koverflow;
  174. bool dropped = false;
  175. for (i = 0; i < nr_events; i++) {
  176. if (do_wait)
  177. ret = io_uring_wait_cqe(ring, &cqe);
  178. else
  179. ret = io_uring_peek_cqe(ring, &cqe);
  180. if (do_wait && ret == -EBADR) {
  181. unsigned int this_drop = *ring->cq.koverflow -
  182. start_overflow;
  183. dropped = true;
  184. start_overflow = *ring->cq.koverflow;
  185. assert(this_drop > 0);
  186. i += (this_drop - 1);
  187. continue;
  188. } else if (ret) {
  189. if (ret != -EAGAIN)
  190. fprintf(stderr, "cqe peek failed: %d\n", ret);
  191. break;
  192. }
  193. if (!dropped && cqe->user_data != seq) {
  194. fprintf(stderr, "cqe sequence out-of-order\n");
  195. fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
  196. seq);
  197. return -EINVAL;
  198. }
  199. seq++;
  200. io_uring_cqe_seen(ring, cqe);
  201. }
  202. return i ? i : ret;
  203. }
  204. /*
  205. * Submit some NOPs and watch if the overflow is correct
  206. */
  207. static int test_overflow(void)
  208. {
  209. struct io_uring ring;
  210. struct io_uring_params p;
  211. struct io_uring_sqe *sqe;
  212. unsigned pending;
  213. int ret, i, j;
  214. memset(&p, 0, sizeof(p));
  215. ret = io_uring_queue_init_params(4, &ring, &p);
  216. if (ret) {
  217. fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
  218. return 1;
  219. }
  220. /* submit 4x4 SQEs, should overflow the ring by 8 */
  221. pending = 0;
  222. for (i = 0; i < 4; i++) {
  223. for (j = 0; j < 4; j++) {
  224. sqe = io_uring_get_sqe(&ring);
  225. if (!sqe) {
  226. fprintf(stderr, "get sqe failed\n");
  227. goto err;
  228. }
  229. io_uring_prep_nop(sqe);
  230. sqe->user_data = (i * 4) + j;
  231. }
  232. ret = io_uring_submit(&ring);
  233. if (ret == 4) {
  234. pending += 4;
  235. continue;
  236. }
  237. if (p.features & IORING_FEAT_NODROP) {
  238. if (ret == -EBUSY)
  239. break;
  240. }
  241. fprintf(stderr, "sqe submit failed: %d\n", ret);
  242. goto err;
  243. }
  244. /* we should now have 8 completions ready */
  245. ret = reap_events(&ring, pending, 0);
  246. if (ret < 0)
  247. goto err;
  248. if (!(p.features & IORING_FEAT_NODROP)) {
  249. if (*ring.cq.koverflow != 8) {
  250. fprintf(stderr, "cq ring overflow %d, expected 8\n",
  251. *ring.cq.koverflow);
  252. goto err;
  253. }
  254. }
  255. io_uring_queue_exit(&ring);
  256. return 0;
  257. err:
  258. io_uring_queue_exit(&ring);
  259. return 1;
  260. }
  261. static void submit_one_nop(struct io_uring *ring, int ud)
  262. {
  263. struct io_uring_sqe *sqe;
  264. int ret;
  265. sqe = io_uring_get_sqe(ring);
  266. assert(sqe);
  267. io_uring_prep_nop(sqe);
  268. sqe->user_data = ud;
  269. ret = io_uring_submit(ring);
  270. assert(ret == 1);
  271. }
  272. /*
  273. * Create an overflow condition and ensure that SQEs are still processed
  274. */
  275. static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
  276. bool defer)
  277. {
  278. struct io_uring ring;
  279. struct io_uring_params p;
  280. int ret, i, j, ud, cqe_count;
  281. unsigned int count;
  282. int const N = 8;
  283. int const LOOPS = 128;
  284. int const QUEUE_LENGTH = 1024;
  285. int completions[N];
  286. int queue[QUEUE_LENGTH];
  287. int queued = 0;
  288. int outstanding = 0;
  289. bool cqe_dropped = false;
  290. memset(&completions, 0, sizeof(int) * N);
  291. memset(&p, 0, sizeof(p));
  292. p.cq_entries = 2 * cqe_multiple;
  293. p.flags |= IORING_SETUP_CQSIZE;
  294. if (poll)
  295. p.flags |= IORING_SETUP_IOPOLL;
  296. if (defer)
  297. p.flags |= IORING_SETUP_SINGLE_ISSUER |
  298. IORING_SETUP_DEFER_TASKRUN;
  299. ret = io_uring_queue_init_params(2, &ring, &p);
  300. if (ret) {
  301. fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
  302. return 1;
  303. }
  304. assert(p.cq_entries < N);
  305. /* submit N SQEs, some should overflow */
  306. for (i = 0; i < N; i++) {
  307. submit_one_nop(&ring, i);
  308. outstanding++;
  309. }
  310. for (i = 0; i < LOOPS; i++) {
  311. struct io_uring_cqe *cqes[N];
  312. if (io_uring_cq_has_overflow(&ring)) {
  313. /*
  314. * Flush any overflowed CQEs and process those. Actively
  315. * flush these to make sure CQEs arrive in vague order
  316. * of being sent.
  317. */
  318. ret = io_uring_get_events(&ring);
  319. if (ret != 0) {
  320. fprintf(stderr,
  321. "io_uring_get_events returned %d\n",
  322. ret);
  323. goto err;
  324. }
  325. } else if (!cqe_dropped) {
  326. for (j = 0; j < queued; j++) {
  327. submit_one_nop(&ring, queue[j]);
  328. outstanding++;
  329. }
  330. queued = 0;
  331. }
  332. /* We have lost some random cqes, stop if no remaining. */
  333. if (cqe_dropped && outstanding == *ring.cq.koverflow)
  334. break;
  335. ret = io_uring_wait_cqe(&ring, &cqes[0]);
  336. if (ret == -EBADR) {
  337. cqe_dropped = true;
  338. fprintf(stderr, "CQE dropped\n");
  339. continue;
  340. } else if (ret != 0) {
  341. fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
  342. goto err;
  343. }
  344. cqe_count = 1;
  345. if (batch) {
  346. ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
  347. if (ret < 0) {
  348. fprintf(stderr,
  349. "io_uring_peek_batch_cqe failed %d\n",
  350. ret);
  351. goto err;
  352. }
  353. cqe_count = ret;
  354. }
  355. for (j = 0; j < cqe_count; j++) {
  356. assert(cqes[j]->user_data < N);
  357. ud = cqes[j]->user_data;
  358. completions[ud]++;
  359. assert(queued < QUEUE_LENGTH);
  360. queue[queued++] = (int)ud;
  361. }
  362. io_uring_cq_advance(&ring, cqe_count);
  363. outstanding -= cqe_count;
  364. }
  365. /* See if there were any drops by flushing the CQ ring *and* overflow */
  366. do {
  367. struct io_uring_cqe *cqe;
  368. ret = io_uring_get_events(&ring);
  369. if (ret < 0) {
  370. if (ret == -EBADR) {
  371. fprintf(stderr, "CQE dropped\n");
  372. cqe_dropped = true;
  373. break;
  374. }
  375. goto err;
  376. }
  377. if (outstanding && !io_uring_cq_ready(&ring))
  378. ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
  379. if (ret && ret != -ETIME) {
  380. if (ret == -EBADR) {
  381. fprintf(stderr, "CQE dropped\n");
  382. cqe_dropped = true;
  383. break;
  384. }
  385. fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
  386. goto err;
  387. }
  388. count = io_uring_cq_ready(&ring);
  389. io_uring_cq_advance(&ring, count);
  390. outstanding -= count;
  391. } while (count);
  392. io_uring_queue_exit(&ring);
  393. /* Make sure that completions come back in the same order they were
  394. * sent. If they come back unfairly then this will concentrate on a
  395. * couple of indices.
  396. */
  397. for (i = 1; !cqe_dropped && i < N; i++) {
  398. if (abs(completions[i] - completions[i - 1]) > 1) {
  399. fprintf(stderr, "bad completion size %d %d\n",
  400. completions[i], completions[i - 1]);
  401. goto err;
  402. }
  403. }
  404. return 0;
  405. err:
  406. io_uring_queue_exit(&ring);
  407. return 1;
  408. }
  409. int main(int argc, char *argv[])
  410. {
  411. const char *fname = ".cq-overflow";
  412. unsigned iters, drops;
  413. unsigned long usecs;
  414. int ret;
  415. int i;
  416. bool can_defer;
  417. if (argc > 1)
  418. return T_EXIT_SKIP;
  419. can_defer = t_probe_defer_taskrun();
  420. for (i = 0; i < 16; i++) {
  421. bool batch = i & 1;
  422. int mult = (i & 2) ? 1 : 2;
  423. bool poll = i & 4;
  424. bool defer = i & 8;
  425. if (defer && !can_defer)
  426. continue;
  427. ret = test_overflow_handling(batch, mult, poll, defer);
  428. if (ret) {
  429. fprintf(stderr, "test_overflow_handling("
  430. "batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
  431. batch, mult, poll, defer);
  432. goto err;
  433. }
  434. }
  435. ret = test_overflow();
  436. if (ret) {
  437. fprintf(stderr, "test_overflow failed\n");
  438. return ret;
  439. }
  440. t_create_file(fname, FILE_SIZE);
  441. vecs = t_create_buffers(BUFFERS, BS);
  442. iters = 0;
  443. usecs = 1000;
  444. do {
  445. drops = 0;
  446. ret = test_io(fname, usecs, &drops, 0);
  447. if (ret == T_EXIT_SKIP)
  448. break;
  449. else if (ret != T_EXIT_PASS) {
  450. fprintf(stderr, "test_io nofault failed\n");
  451. goto err;
  452. }
  453. if (drops)
  454. break;
  455. usecs = (usecs * 12) / 10;
  456. iters++;
  457. } while (iters < 40);
  458. if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
  459. fprintf(stderr, "test_io nofault failed\n");
  460. goto err;
  461. }
  462. if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
  463. fprintf(stderr, "test_io fault failed\n");
  464. goto err;
  465. }
  466. unlink(fname);
  467. if(vecs != NULL) {
  468. for (i = 0; i < BUFFERS; i++)
  469. free(vecs[i].iov_base);
  470. }
  471. free(vecs);
  472. return T_EXIT_PASS;
  473. err:
  474. unlink(fname);
  475. if(vecs != NULL) {
  476. for (i = 0; i < BUFFERS; i++)
  477. free(vecs[i].iov_base);
  478. }
  479. free(vecs);
  480. return T_EXIT_FAIL;
  481. }