io_uring_register.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * io_uring_register.c
  5. *
  6. * Description: Unit tests for the io_uring_register system call.
  7. *
  8. * Copyright 2019, Red Hat, Inc.
  9. * Author: Jeff Moyer <jmoyer@redhat.com>
  10. */
  11. #include <stdio.h>
  12. #include <fcntl.h>
  13. #include <string.h>
  14. #include <stdlib.h>
  15. #include <unistd.h>
  16. #include <errno.h>
  17. #include <sys/sysinfo.h>
  18. #include <poll.h>
  19. #include <assert.h>
  20. #include <sys/uio.h>
  21. #include <sys/mman.h>
  22. #include <linux/mman.h>
  23. #include <sys/time.h>
  24. #include <sys/resource.h>
  25. #include <sys/vfs.h>
  26. #include <limits.h>
  27. #include "helpers.h"
  28. #include "liburing.h"
  29. #include "../src/syscall.h"
  30. static int pagesize;
  31. static rlim_t mlock_limit;
  32. static int devnull;
  33. static int expect_fail(int fd, unsigned int opcode, void *arg,
  34. unsigned int nr_args, int error, int error2)
  35. {
  36. int ret;
  37. ret = io_uring_register(fd, opcode, arg, nr_args);
  38. if (ret >= 0) {
  39. int ret2 = 0;
  40. fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
  41. if (opcode == IORING_REGISTER_BUFFERS) {
  42. ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
  43. 0, 0);
  44. } else if (opcode == IORING_REGISTER_FILES) {
  45. ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0,
  46. 0);
  47. }
  48. if (ret2) {
  49. fprintf(stderr, "internal error: failed to unregister\n");
  50. exit(1);
  51. }
  52. return 1;
  53. }
  54. if (ret != error && (error2 && ret != error2)) {
  55. fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret);
  56. return 1;
  57. }
  58. return 0;
  59. }
  60. static int new_io_uring(int entries, struct io_uring_params *p)
  61. {
  62. int fd;
  63. fd = io_uring_setup(entries, p);
  64. if (fd < 0) {
  65. perror("io_uring_setup");
  66. exit(1);
  67. }
  68. return fd;
  69. }
  70. #define MAXFDS (UINT_MAX * sizeof(int))
  71. #define OFS_MAGIC 0x794c7630
  72. #define TMPFS_MAGIC 0x01021994
  73. #define RAMFS_MAGIC 0x858458f6
  74. static void *map_filebacked(size_t size)
  75. {
  76. struct statfs buf;
  77. int fd, ret;
  78. void *addr;
  79. char template[32] = "io_uring_register-test-XXXXXXXX";
  80. fd = mkstemp(template);
  81. if (fd < 0) {
  82. perror("mkstemp");
  83. return NULL;
  84. }
  85. if (statfs(template, &buf) < 0) {
  86. perror("statfs");
  87. unlink(template);
  88. close(fd);
  89. return NULL;
  90. }
  91. unlink(template);
  92. /* virtual file systems may not present as file mapped */
  93. if (buf.f_type == OFS_MAGIC || buf.f_type == RAMFS_MAGIC ||
  94. buf.f_type == TMPFS_MAGIC) {
  95. close(fd);
  96. return NULL;
  97. }
  98. ret = ftruncate(fd, size);
  99. if (ret < 0) {
  100. perror("ftruncate");
  101. close(fd);
  102. return NULL;
  103. }
  104. addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
  105. if (addr == MAP_FAILED) {
  106. perror("mmap");
  107. close(fd);
  108. return NULL;
  109. }
  110. close(fd);
  111. return addr;
  112. }
  113. /*
  114. * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now,
  115. * but probably should augment it to test 253 and 254, specifically.
  116. */
  117. static int test_max_fds(int uring_fd)
  118. {
  119. int status = 1;
  120. int ret;
  121. void *fd_as; /* file descriptor address space */
  122. int fdtable_fd; /* fd for the file that will be mapped over and over */
  123. int io_fd; /* the valid fd for I/O -- /dev/null */
  124. int *fds; /* used to map the file into the address space */
  125. char template[32] = "io_uring_register-test-XXXXXXXX";
  126. unsigned long long i, nr_maps, nr_fds;
  127. /*
  128. * First, mmap anonymous the full size. That will guarantee the
  129. * mapping will fit in the memory area selected by mmap. Then,
  130. * over-write that mapping using a file-backed mapping, 128MiB at
  131. * a time using MAP_FIXED.
  132. */
  133. fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
  134. MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
  135. if (fd_as == MAP_FAILED) {
  136. if (errno == ENOMEM)
  137. return 0;
  138. perror("mmap fd_as");
  139. exit(1);
  140. }
  141. fdtable_fd = mkstemp(template);
  142. if (fdtable_fd < 0) {
  143. perror("mkstemp");
  144. exit(1);
  145. }
  146. unlink(template);
  147. ret = ftruncate(fdtable_fd, 128*1024*1024);
  148. if (ret < 0) {
  149. perror("ftruncate");
  150. exit(1);
  151. }
  152. io_fd = open("/dev/null", O_RDWR);
  153. if (io_fd < 0) {
  154. perror("open /dev/null");
  155. exit(1);
  156. }
  157. fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
  158. MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
  159. if (fds == MAP_FAILED) {
  160. perror("mmap fdtable");
  161. exit(1);
  162. }
  163. /* fill the fd table */
  164. nr_fds = 128*1024*1024 / sizeof(int);
  165. for (i = 0; i < nr_fds; i++)
  166. fds[i] = io_fd;
  167. /* map the file through the rest of the address space */
  168. nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
  169. for (i = 0; i < nr_maps; i++) {
  170. fds = &fds[nr_fds]; /* advance fds by 128MiB */
  171. fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
  172. MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
  173. if (fds == MAP_FAILED) {
  174. fprintf(stderr, "mmap failed at offset %lu\n",
  175. (unsigned long)((char *)fd_as - (char *)fds));
  176. exit(1);
  177. }
  178. }
  179. /* Now fd_as points to the file descriptor array. */
  180. /*
  181. * We may not be able to map all of these files. Let's back off
  182. * until success.
  183. */
  184. nr_fds = UINT_MAX;
  185. while (nr_fds) {
  186. ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as,
  187. nr_fds);
  188. if (ret != 0) {
  189. nr_fds /= 2;
  190. continue;
  191. }
  192. status = 0;
  193. ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
  194. if (ret < 0) {
  195. errno = -ret;
  196. perror("io_uring_register UNREGISTER_FILES");
  197. exit(1);
  198. }
  199. break;
  200. }
  201. close(io_fd);
  202. close(fdtable_fd);
  203. ret = munmap(fd_as, UINT_MAX * sizeof(int));
  204. if (ret != 0) {
  205. fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
  206. exit(1);
  207. }
  208. return status;
  209. }
  210. static int test_memlock_exceeded(int fd)
  211. {
  212. int ret;
  213. void *buf;
  214. struct iovec iov;
  215. /* if limit is larger than 2gb, just skip this test */
  216. if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
  217. return 0;
  218. iov.iov_len = mlock_limit * 2;
  219. buf = t_malloc(iov.iov_len);
  220. iov.iov_base = buf;
  221. while (iov.iov_len) {
  222. ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
  223. if (ret == -ENOMEM) {
  224. iov.iov_len /= 2;
  225. continue;
  226. } else if (ret == -EFAULT) {
  227. free(buf);
  228. return 0;
  229. } else if (ret) {
  230. fprintf(stderr, "expected success or EFAULT, got %d\n", ret);
  231. free(buf);
  232. return 1;
  233. }
  234. ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
  235. if (ret != 0) {
  236. fprintf(stderr, "error: unregister failed with %d\n", ret);
  237. free(buf);
  238. return 1;
  239. }
  240. break;
  241. }
  242. if (!iov.iov_len)
  243. printf("Unable to register buffers. Check memlock rlimit.\n");
  244. free(buf);
  245. return 0;
  246. }
  247. static int test_iovec_nr(int fd)
  248. {
  249. int i, ret, status = 0;
  250. unsigned int nr = 1000000;
  251. struct iovec *iovs;
  252. void *buf;
  253. iovs = malloc(nr * sizeof(struct iovec));
  254. if (!iovs) {
  255. fprintf(stdout, "can't allocate iovecs, skip\n");
  256. return 0;
  257. }
  258. buf = t_malloc(pagesize);
  259. for (i = 0; i < nr; i++) {
  260. iovs[i].iov_base = buf;
  261. iovs[i].iov_len = pagesize;
  262. }
  263. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0);
  264. /* reduce to UIO_MAXIOV */
  265. nr = UIO_MAXIOV;
  266. ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
  267. if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) {
  268. fprintf(stderr, "can't register large iovec for regular users, skip\n");
  269. } else if (ret != 0) {
  270. fprintf(stderr, "expected success, got %d\n", ret);
  271. status = 1;
  272. } else {
  273. io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
  274. }
  275. free(buf);
  276. free(iovs);
  277. return status;
  278. }
  279. /*
  280. * io_uring limit is 1G. iov_len limit is ~OUL, I think
  281. */
  282. static int test_iovec_size(int fd)
  283. {
  284. unsigned int status = 0;
  285. int ret;
  286. struct iovec iov;
  287. void *buf;
  288. /* NULL pointer for base */
  289. iov.iov_base = 0;
  290. iov.iov_len = 4096;
  291. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
  292. /* valid base, 0 length */
  293. iov.iov_base = &buf;
  294. iov.iov_len = 0;
  295. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
  296. /* valid base, length exceeds size */
  297. /* this requires an unampped page directly after buf */
  298. buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
  299. MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  300. assert(buf != MAP_FAILED);
  301. ret = munmap(buf + pagesize, pagesize);
  302. assert(ret == 0);
  303. iov.iov_base = buf;
  304. iov.iov_len = 2 * pagesize;
  305. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
  306. munmap(buf, pagesize);
  307. /* huge page */
  308. buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
  309. MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
  310. -1, 0);
  311. if (buf == MAP_FAILED) {
  312. printf("Unable to map a huge page. Try increasing "
  313. "/proc/sys/vm/nr_hugepages by at least 1.\n");
  314. printf("Skipping the hugepage test\n");
  315. } else {
  316. /*
  317. * This should succeed, so long as RLIMIT_MEMLOCK is
  318. * not exceeded
  319. */
  320. iov.iov_base = buf;
  321. iov.iov_len = 2*1024*1024;
  322. ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
  323. if (ret < 0) {
  324. if (ret == -ENOMEM)
  325. printf("Unable to test registering of a huge "
  326. "page. Try increasing the "
  327. "RLIMIT_MEMLOCK resource limit by at "
  328. "least 2MB.");
  329. else {
  330. fprintf(stderr, "expected success, got %d\n", ret);
  331. status = 1;
  332. }
  333. } else {
  334. ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
  335. 0, 0);
  336. if (ret < 0) {
  337. fprintf(stderr, "io_uring_unregister: %s\n",
  338. strerror(-ret));
  339. status = 1;
  340. }
  341. }
  342. }
  343. ret = munmap(iov.iov_base, iov.iov_len);
  344. assert(ret == 0);
  345. /* file-backed buffers -- not supported */
  346. buf = map_filebacked(2*1024*1024);
  347. if (buf) {
  348. iov.iov_base = buf;
  349. iov.iov_len = 2*1024*1024;
  350. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP);
  351. munmap(buf, 2*1024*1024);
  352. }
  353. /* bump up against the soft limit and make sure we get EFAULT
  354. * or whatever we're supposed to get. NOTE: this requires
  355. * running the test as non-root. */
  356. if (getuid() != 0)
  357. status |= test_memlock_exceeded(fd);
  358. return status;
  359. }
  360. static int ioring_poll(struct io_uring *ring, int fd, int fixed)
  361. {
  362. int ret;
  363. struct io_uring_sqe *sqe;
  364. struct io_uring_cqe *cqe;
  365. sqe = io_uring_get_sqe(ring);
  366. memset(sqe, 0, sizeof(*sqe));
  367. sqe->opcode = IORING_OP_POLL_ADD;
  368. if (fixed)
  369. sqe->flags = IOSQE_FIXED_FILE;
  370. sqe->fd = fd;
  371. sqe->poll_events = POLLIN|POLLOUT;
  372. ret = io_uring_submit(ring);
  373. if (ret != 1) {
  374. fprintf(stderr, "failed to submit poll sqe: %d.\n", ret);
  375. return 1;
  376. }
  377. ret = io_uring_wait_cqe(ring, &cqe);
  378. if (ret < 0) {
  379. fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
  380. return 1;
  381. }
  382. ret = 0;
  383. if (!(cqe->res & POLLOUT)) {
  384. fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
  385. POLLOUT, cqe->res);
  386. ret = 1;
  387. }
  388. io_uring_cqe_seen(ring, cqe);
  389. return ret;
  390. }
  391. static int __test_poll_ringfd(int ring_flags)
  392. {
  393. int status = 0;
  394. int ret;
  395. int fd;
  396. struct io_uring ring;
  397. ret = io_uring_queue_init(2, &ring, ring_flags);
  398. if (ret) {
  399. perror("io_uring_queue_init");
  400. return 1;
  401. }
  402. fd = ring.ring_fd;
  403. /* try polling the ring fd */
  404. status = ioring_poll(&ring, fd, 0);
  405. /*
  406. * now register the ring fd, and try the poll again. This should
  407. * fail, because the kernel does not allow registering of the
  408. * ring_fd.
  409. */
  410. status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0);
  411. /* tear down queue */
  412. io_uring_queue_exit(&ring);
  413. return status;
  414. }
  415. static int test_poll_ringfd(void)
  416. {
  417. int ret;
  418. ret = __test_poll_ringfd(0);
  419. if (ret)
  420. return ret;
  421. return __test_poll_ringfd(IORING_SETUP_SQPOLL);
  422. }
  423. int main(int argc, char **argv)
  424. {
  425. int fd, ret;
  426. unsigned int status = 0;
  427. struct io_uring_params p;
  428. struct rlimit rlim;
  429. if (argc > 1)
  430. return T_EXIT_SKIP;
  431. /* setup globals */
  432. pagesize = getpagesize();
  433. ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
  434. if (ret < 0) {
  435. perror("getrlimit");
  436. return T_EXIT_PASS;
  437. }
  438. mlock_limit = rlim.rlim_cur;
  439. devnull = open("/dev/null", O_RDWR);
  440. if (devnull < 0) {
  441. perror("open /dev/null");
  442. exit(T_EXIT_FAIL);
  443. }
  444. /* invalid fd */
  445. status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0);
  446. /* valid fd that is not an io_uring fd */
  447. status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0);
  448. /* invalid opcode */
  449. memset(&p, 0, sizeof(p));
  450. fd = new_io_uring(1, &p);
  451. ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0);
  452. if (ret) {
  453. /* if this succeeds, tear down the io_uring instance
  454. * and start clean for the next test. */
  455. close(fd);
  456. fd = new_io_uring(1, &p);
  457. }
  458. /* IORING_REGISTER_BUFFERS */
  459. status |= test_iovec_size(fd);
  460. status |= test_iovec_nr(fd);
  461. /* IORING_REGISTER_FILES */
  462. status |= test_max_fds(fd);
  463. close(fd);
  464. /* uring poll on the uring fd */
  465. status |= test_poll_ringfd();
  466. if (status)
  467. fprintf(stderr, "FAIL\n");
  468. return status;
  469. }