cq-overflow.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * Description: run various CQ ring overflow tests
  5. *
  6. */
  7. #include <errno.h>
  8. #include <stdio.h>
  9. #include <unistd.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <fcntl.h>
  13. #include <assert.h>
  14. #include "helpers.h"
  15. #include "liburing.h"
  16. #define FILE_SIZE (256 * 1024)
  17. #define BS 4096
  18. #define BUFFERS (FILE_SIZE / BS)
  19. static struct iovec *vecs;
  20. #define ENTRIES 8
  21. /*
  22. * io_uring has rare cases where CQEs are lost.
  23. * This happens when there is no space in the CQ ring, and also there is no
  24. * GFP_ATOMIC memory available. In reality this probably means that the process
  25. * is about to be killed as many other things might start failing, but we still
  26. * want to test that liburing and the kernel deal with this properly. The fault
  27. * injection framework allows us to test this scenario. Unfortunately this
  28. * requires some system wide changes and so we do not enable this by default.
  29. * The tests in this file should work in both cases (where overflows are queued
  30. * and where they are dropped) on recent kernels.
  31. *
  32. * In order to test dropped CQEs you should enable fault injection in the kernel
  33. * config:
  34. *
  35. * CONFIG_FAULT_INJECTION=y
  36. * CONFIG_FAILSLAB=y
  37. * CONFIG_FAULT_INJECTION_DEBUG_FS=y
  38. *
  39. * and then run the test as follows:
  40. * echo Y > /sys/kernel/debug/failslab/task-filter
  41. * echo 100 > /sys/kernel/debug/failslab/probability
  42. * echo 0 > /sys/kernel/debug/failslab/verbose
  43. * echo 100000 > /sys/kernel/debug/failslab/times
  44. * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
  45. */
  46. static int test_io(const char *file, unsigned long usecs, unsigned *drops,
  47. int fault)
  48. {
  49. struct io_uring_sqe *sqe;
  50. struct io_uring_cqe *cqe;
  51. struct io_uring_params p;
  52. unsigned reaped, total;
  53. struct io_uring ring;
  54. int nodrop, i, fd, ret;
  55. bool cqe_dropped = false;
  56. fd = open(file, O_RDONLY | O_DIRECT);
  57. if (fd < 0) {
  58. if (errno == EINVAL)
  59. return T_EXIT_SKIP;
  60. perror("file open");
  61. return T_EXIT_FAIL;
  62. }
  63. memset(&p, 0, sizeof(p));
  64. ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
  65. if (ret) {
  66. close(fd);
  67. fprintf(stderr, "ring create failed: %d\n", ret);
  68. return T_EXIT_FAIL;
  69. }
  70. nodrop = 0;
  71. if (p.features & IORING_FEAT_NODROP)
  72. nodrop = 1;
  73. total = 0;
  74. for (i = 0; i < BUFFERS / 2; i++) {
  75. off_t offset;
  76. sqe = io_uring_get_sqe(&ring);
  77. if (!sqe) {
  78. fprintf(stderr, "sqe get failed\n");
  79. goto err;
  80. }
  81. offset = BS * (rand() % BUFFERS);
  82. if (fault && i == ENTRIES + 4)
  83. vecs[i].iov_base = NULL;
  84. io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
  85. ret = io_uring_submit(&ring);
  86. if (nodrop && ret == -EBUSY) {
  87. *drops = 1;
  88. total = i;
  89. break;
  90. } else if (ret != 1) {
  91. fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
  92. total = i;
  93. break;
  94. }
  95. total++;
  96. }
  97. if (*drops)
  98. goto reap_it;
  99. usleep(usecs);
  100. for (i = total; i < BUFFERS; i++) {
  101. off_t offset;
  102. sqe = io_uring_get_sqe(&ring);
  103. if (!sqe) {
  104. fprintf(stderr, "sqe get failed\n");
  105. goto err;
  106. }
  107. offset = BS * (rand() % BUFFERS);
  108. io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
  109. ret = io_uring_submit(&ring);
  110. if (nodrop && ret == -EBUSY) {
  111. *drops = 1;
  112. break;
  113. } else if (ret != 1) {
  114. fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
  115. break;
  116. }
  117. total++;
  118. }
  119. reap_it:
  120. reaped = 0;
  121. do {
  122. if (nodrop && !cqe_dropped) {
  123. /* nodrop should never lose events unless cqe_dropped */
  124. if (reaped == total)
  125. break;
  126. } else {
  127. if (reaped + *ring.cq.koverflow == total)
  128. break;
  129. }
  130. ret = io_uring_wait_cqe(&ring, &cqe);
  131. if (nodrop && ret == -EBADR) {
  132. cqe_dropped = true;
  133. continue;
  134. } else if (ret) {
  135. fprintf(stderr, "wait_cqe=%d\n", ret);
  136. goto err;
  137. }
  138. if (cqe->res != BS) {
  139. if (!(fault && cqe->res == -EFAULT)) {
  140. fprintf(stderr, "cqe res %d, wanted %d\n",
  141. cqe->res, BS);
  142. goto err;
  143. }
  144. }
  145. io_uring_cqe_seen(&ring, cqe);
  146. reaped++;
  147. } while (1);
  148. if (!io_uring_peek_cqe(&ring, &cqe)) {
  149. fprintf(stderr, "found unexpected completion\n");
  150. goto err;
  151. }
  152. if (!nodrop || cqe_dropped) {
  153. *drops = *ring.cq.koverflow;
  154. } else if (*ring.cq.koverflow) {
  155. fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
  156. goto err;
  157. }
  158. io_uring_queue_exit(&ring);
  159. close(fd);
  160. return T_EXIT_PASS;
  161. err:
  162. if (fd != -1)
  163. close(fd);
  164. io_uring_queue_exit(&ring);
  165. return T_EXIT_SKIP;
  166. }
  167. static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
  168. {
  169. struct io_uring_cqe *cqe;
  170. int i, ret = 0, seq = 0;
  171. unsigned int start_overflow = *ring->cq.koverflow;
  172. bool dropped = false;
  173. for (i = 0; i < nr_events; i++) {
  174. if (do_wait)
  175. ret = io_uring_wait_cqe(ring, &cqe);
  176. else
  177. ret = io_uring_peek_cqe(ring, &cqe);
  178. if (do_wait && ret == -EBADR) {
  179. unsigned int this_drop = *ring->cq.koverflow -
  180. start_overflow;
  181. dropped = true;
  182. start_overflow = *ring->cq.koverflow;
  183. assert(this_drop > 0);
  184. i += (this_drop - 1);
  185. continue;
  186. } else if (ret) {
  187. if (ret != -EAGAIN)
  188. fprintf(stderr, "cqe peek failed: %d\n", ret);
  189. break;
  190. }
  191. if (!dropped && cqe->user_data != seq) {
  192. fprintf(stderr, "cqe sequence out-of-order\n");
  193. fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
  194. seq);
  195. return -EINVAL;
  196. }
  197. seq++;
  198. io_uring_cqe_seen(ring, cqe);
  199. }
  200. return i ? i : ret;
  201. }
  202. /*
  203. * Submit some NOPs and watch if the overflow is correct
  204. */
  205. static int test_overflow(void)
  206. {
  207. struct io_uring ring;
  208. struct io_uring_params p;
  209. struct io_uring_sqe *sqe;
  210. unsigned pending;
  211. int ret, i, j;
  212. memset(&p, 0, sizeof(p));
  213. ret = io_uring_queue_init_params(4, &ring, &p);
  214. if (ret) {
  215. fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
  216. return 1;
  217. }
  218. /* submit 4x4 SQEs, should overflow the ring by 8 */
  219. pending = 0;
  220. for (i = 0; i < 4; i++) {
  221. for (j = 0; j < 4; j++) {
  222. sqe = io_uring_get_sqe(&ring);
  223. if (!sqe) {
  224. fprintf(stderr, "get sqe failed\n");
  225. goto err;
  226. }
  227. io_uring_prep_nop(sqe);
  228. sqe->user_data = (i * 4) + j;
  229. }
  230. ret = io_uring_submit(&ring);
  231. if (ret == 4) {
  232. pending += 4;
  233. continue;
  234. }
  235. if (p.features & IORING_FEAT_NODROP) {
  236. if (ret == -EBUSY)
  237. break;
  238. }
  239. fprintf(stderr, "sqe submit failed: %d\n", ret);
  240. goto err;
  241. }
  242. /* we should now have 8 completions ready */
  243. ret = reap_events(&ring, pending, 0);
  244. if (ret < 0)
  245. goto err;
  246. if (!(p.features & IORING_FEAT_NODROP)) {
  247. if (*ring.cq.koverflow != 8) {
  248. fprintf(stderr, "cq ring overflow %d, expected 8\n",
  249. *ring.cq.koverflow);
  250. goto err;
  251. }
  252. }
  253. io_uring_queue_exit(&ring);
  254. return 0;
  255. err:
  256. io_uring_queue_exit(&ring);
  257. return 1;
  258. }
  259. static void submit_one_nop(struct io_uring *ring, int ud)
  260. {
  261. struct io_uring_sqe *sqe;
  262. int ret;
  263. sqe = io_uring_get_sqe(ring);
  264. assert(sqe);
  265. io_uring_prep_nop(sqe);
  266. sqe->user_data = ud;
  267. ret = io_uring_submit(ring);
  268. assert(ret == 1);
  269. }
  270. /*
  271. * Create an overflow condition and ensure that SQEs are still processed
  272. */
  273. static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
  274. bool defer)
  275. {
  276. struct io_uring ring;
  277. struct io_uring_params p;
  278. int ret, i, j, ud, cqe_count;
  279. unsigned int count;
  280. int const N = 8;
  281. int const LOOPS = 128;
  282. int const QUEUE_LENGTH = 1024;
  283. int completions[N];
  284. int queue[QUEUE_LENGTH];
  285. int queued = 0;
  286. int outstanding = 0;
  287. bool cqe_dropped = false;
  288. memset(&completions, 0, sizeof(int) * N);
  289. memset(&p, 0, sizeof(p));
  290. p.cq_entries = 2 * cqe_multiple;
  291. p.flags |= IORING_SETUP_CQSIZE;
  292. if (poll)
  293. p.flags |= IORING_SETUP_IOPOLL;
  294. if (defer)
  295. p.flags |= IORING_SETUP_SINGLE_ISSUER |
  296. IORING_SETUP_DEFER_TASKRUN;
  297. ret = io_uring_queue_init_params(2, &ring, &p);
  298. if (ret) {
  299. fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
  300. return 1;
  301. }
  302. assert(p.cq_entries < N);
  303. /* submit N SQEs, some should overflow */
  304. for (i = 0; i < N; i++) {
  305. submit_one_nop(&ring, i);
  306. outstanding++;
  307. }
  308. for (i = 0; i < LOOPS; i++) {
  309. struct io_uring_cqe *cqes[N];
  310. if (io_uring_cq_has_overflow(&ring)) {
  311. /*
  312. * Flush any overflowed CQEs and process those. Actively
  313. * flush these to make sure CQEs arrive in vague order
  314. * of being sent.
  315. */
  316. ret = io_uring_get_events(&ring);
  317. if (ret != 0) {
  318. fprintf(stderr,
  319. "io_uring_get_events returned %d\n",
  320. ret);
  321. goto err;
  322. }
  323. } else if (!cqe_dropped) {
  324. for (j = 0; j < queued; j++) {
  325. submit_one_nop(&ring, queue[j]);
  326. outstanding++;
  327. }
  328. queued = 0;
  329. }
  330. /* We have lost some random cqes, stop if no remaining. */
  331. if (cqe_dropped && outstanding == *ring.cq.koverflow)
  332. break;
  333. ret = io_uring_wait_cqe(&ring, &cqes[0]);
  334. if (ret == -EBADR) {
  335. cqe_dropped = true;
  336. fprintf(stderr, "CQE dropped\n");
  337. continue;
  338. } else if (ret != 0) {
  339. fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
  340. goto err;
  341. }
  342. cqe_count = 1;
  343. if (batch) {
  344. ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
  345. if (ret < 0) {
  346. fprintf(stderr,
  347. "io_uring_peek_batch_cqe failed %d\n",
  348. ret);
  349. goto err;
  350. }
  351. cqe_count = ret;
  352. }
  353. for (j = 0; j < cqe_count; j++) {
  354. assert(cqes[j]->user_data < N);
  355. ud = cqes[j]->user_data;
  356. completions[ud]++;
  357. assert(queued < QUEUE_LENGTH);
  358. queue[queued++] = (int)ud;
  359. }
  360. io_uring_cq_advance(&ring, cqe_count);
  361. outstanding -= cqe_count;
  362. }
  363. /* See if there were any drops by flushing the CQ ring *and* overflow */
  364. do {
  365. struct io_uring_cqe *cqe;
  366. ret = io_uring_get_events(&ring);
  367. if (ret < 0) {
  368. if (ret == -EBADR) {
  369. fprintf(stderr, "CQE dropped\n");
  370. cqe_dropped = true;
  371. break;
  372. }
  373. goto err;
  374. }
  375. if (outstanding && !io_uring_cq_ready(&ring))
  376. ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
  377. if (ret && ret != -ETIME) {
  378. if (ret == -EBADR) {
  379. fprintf(stderr, "CQE dropped\n");
  380. cqe_dropped = true;
  381. break;
  382. }
  383. fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
  384. goto err;
  385. }
  386. count = io_uring_cq_ready(&ring);
  387. io_uring_cq_advance(&ring, count);
  388. outstanding -= count;
  389. } while (count);
  390. io_uring_queue_exit(&ring);
  391. /* Make sure that completions come back in the same order they were
  392. * sent. If they come back unfairly then this will concentrate on a
  393. * couple of indices.
  394. */
  395. for (i = 1; !cqe_dropped && i < N; i++) {
  396. if (abs(completions[i] - completions[i - 1]) > 1) {
  397. fprintf(stderr, "bad completion size %d %d\n",
  398. completions[i], completions[i - 1]);
  399. goto err;
  400. }
  401. }
  402. return 0;
  403. err:
  404. io_uring_queue_exit(&ring);
  405. return 1;
  406. }
  407. int main(int argc, char *argv[])
  408. {
  409. const char *fname = ".cq-overflow";
  410. unsigned iters, drops;
  411. unsigned long usecs;
  412. int ret;
  413. int i;
  414. bool can_defer;
  415. if (argc > 1)
  416. return T_EXIT_SKIP;
  417. can_defer = t_probe_defer_taskrun();
  418. for (i = 0; i < 16; i++) {
  419. bool batch = i & 1;
  420. int mult = (i & 2) ? 1 : 2;
  421. bool poll = i & 4;
  422. bool defer = i & 8;
  423. if (defer && !can_defer)
  424. continue;
  425. ret = test_overflow_handling(batch, mult, poll, defer);
  426. if (ret) {
  427. fprintf(stderr, "test_overflow_handling("
  428. "batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
  429. batch, mult, poll, defer);
  430. goto err;
  431. }
  432. }
  433. ret = test_overflow();
  434. if (ret) {
  435. fprintf(stderr, "test_overflow failed\n");
  436. return ret;
  437. }
  438. t_create_file(fname, FILE_SIZE);
  439. vecs = t_create_buffers(BUFFERS, BS);
  440. iters = 0;
  441. usecs = 1000;
  442. do {
  443. drops = 0;
  444. ret = test_io(fname, usecs, &drops, 0);
  445. if (ret == T_EXIT_SKIP)
  446. break;
  447. else if (ret != T_EXIT_PASS) {
  448. fprintf(stderr, "test_io nofault failed\n");
  449. goto err;
  450. }
  451. if (drops)
  452. break;
  453. usecs = (usecs * 12) / 10;
  454. iters++;
  455. } while (iters < 40);
  456. if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
  457. fprintf(stderr, "test_io nofault failed\n");
  458. goto err;
  459. }
  460. if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
  461. fprintf(stderr, "test_io fault failed\n");
  462. goto err;
  463. }
  464. unlink(fname);
  465. return T_EXIT_PASS;
  466. err:
  467. unlink(fname);
  468. return T_EXIT_FAIL;
  469. }