io_uring_register.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * io_uring_register.c
  5. *
  6. * Description: Unit tests for the io_uring_register system call.
  7. *
  8. * Copyright 2019, Red Hat, Inc.
  9. * Author: Jeff Moyer <jmoyer@redhat.com>
  10. */
  11. #include <stdio.h>
  12. #include <fcntl.h>
  13. #include <string.h>
  14. #include <stdlib.h>
  15. #include <unistd.h>
  16. #include <errno.h>
  17. #include <sys/sysinfo.h>
  18. #include <poll.h>
  19. #include <assert.h>
  20. #include <sys/uio.h>
  21. #include <sys/mman.h>
  22. #include <linux/mman.h>
  23. #include <sys/time.h>
  24. #include <sys/resource.h>
  25. #include <limits.h>
  26. #include "helpers.h"
  27. #include "liburing.h"
  28. #include "../src/syscall.h"
  29. static int pagesize;
  30. static rlim_t mlock_limit;
  31. static int devnull;
  32. static int expect_fail(int fd, unsigned int opcode, void *arg,
  33. unsigned int nr_args, int error)
  34. {
  35. int ret;
  36. ret = io_uring_register(fd, opcode, arg, nr_args);
  37. if (ret >= 0) {
  38. int ret2 = 0;
  39. fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
  40. if (opcode == IORING_REGISTER_BUFFERS) {
  41. ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
  42. 0, 0);
  43. } else if (opcode == IORING_REGISTER_FILES) {
  44. ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0,
  45. 0);
  46. }
  47. if (ret2) {
  48. fprintf(stderr, "internal error: failed to unregister\n");
  49. exit(1);
  50. }
  51. return 1;
  52. }
  53. if (ret != error) {
  54. fprintf(stderr, "expected %d, got %d\n", error, ret);
  55. return 1;
  56. }
  57. return 0;
  58. }
  59. static int new_io_uring(int entries, struct io_uring_params *p)
  60. {
  61. int fd;
  62. fd = io_uring_setup(entries, p);
  63. if (fd < 0) {
  64. perror("io_uring_setup");
  65. exit(1);
  66. }
  67. return fd;
  68. }
  69. #define MAXFDS (UINT_MAX * sizeof(int))
  70. static void *map_filebacked(size_t size)
  71. {
  72. int fd, ret;
  73. void *addr;
  74. char template[32] = "io_uring_register-test-XXXXXXXX";
  75. fd = mkstemp(template);
  76. if (fd < 0) {
  77. perror("mkstemp");
  78. return NULL;
  79. }
  80. unlink(template);
  81. ret = ftruncate(fd, size);
  82. if (ret < 0) {
  83. perror("ftruncate");
  84. close(fd);
  85. return NULL;
  86. }
  87. addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
  88. if (addr == MAP_FAILED) {
  89. perror("mmap");
  90. close(fd);
  91. return NULL;
  92. }
  93. close(fd);
  94. return addr;
  95. }
  96. /*
  97. * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now,
  98. * but probably should augment it to test 253 and 254, specifically.
  99. */
  100. static int test_max_fds(int uring_fd)
  101. {
  102. int status = 1;
  103. int ret;
  104. void *fd_as; /* file descriptor address space */
  105. int fdtable_fd; /* fd for the file that will be mapped over and over */
  106. int io_fd; /* the valid fd for I/O -- /dev/null */
  107. int *fds; /* used to map the file into the address space */
  108. char template[32] = "io_uring_register-test-XXXXXXXX";
  109. unsigned long long i, nr_maps, nr_fds;
  110. /*
  111. * First, mmap anonymous the full size. That will guarantee the
  112. * mapping will fit in the memory area selected by mmap. Then,
  113. * over-write that mapping using a file-backed mapping, 128MiB at
  114. * a time using MAP_FIXED.
  115. */
  116. fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
  117. MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
  118. if (fd_as == MAP_FAILED) {
  119. if (errno == ENOMEM)
  120. return 0;
  121. perror("mmap fd_as");
  122. exit(1);
  123. }
  124. fdtable_fd = mkstemp(template);
  125. if (fdtable_fd < 0) {
  126. perror("mkstemp");
  127. exit(1);
  128. }
  129. unlink(template);
  130. ret = ftruncate(fdtable_fd, 128*1024*1024);
  131. if (ret < 0) {
  132. perror("ftruncate");
  133. exit(1);
  134. }
  135. io_fd = open("/dev/null", O_RDWR);
  136. if (io_fd < 0) {
  137. perror("open /dev/null");
  138. exit(1);
  139. }
  140. fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
  141. MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
  142. if (fds == MAP_FAILED) {
  143. perror("mmap fdtable");
  144. exit(1);
  145. }
  146. /* fill the fd table */
  147. nr_fds = 128*1024*1024 / sizeof(int);
  148. for (i = 0; i < nr_fds; i++)
  149. fds[i] = io_fd;
  150. /* map the file through the rest of the address space */
  151. nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
  152. for (i = 0; i < nr_maps; i++) {
  153. fds = &fds[nr_fds]; /* advance fds by 128MiB */
  154. fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
  155. MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
  156. if (fds == MAP_FAILED) {
  157. fprintf(stderr, "mmap failed at offset %lu\n",
  158. (unsigned long)((char *)fd_as - (char *)fds));
  159. exit(1);
  160. }
  161. }
  162. /* Now fd_as points to the file descriptor array. */
  163. /*
  164. * We may not be able to map all of these files. Let's back off
  165. * until success.
  166. */
  167. nr_fds = UINT_MAX;
  168. while (nr_fds) {
  169. ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as,
  170. nr_fds);
  171. if (ret != 0) {
  172. nr_fds /= 2;
  173. continue;
  174. }
  175. status = 0;
  176. ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
  177. if (ret < 0) {
  178. ret = errno;
  179. errno = ret;
  180. perror("io_uring_register UNREGISTER_FILES");
  181. exit(1);
  182. }
  183. break;
  184. }
  185. close(io_fd);
  186. close(fdtable_fd);
  187. ret = munmap(fd_as, UINT_MAX * sizeof(int));
  188. if (ret != 0) {
  189. fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
  190. exit(1);
  191. }
  192. return status;
  193. }
  194. static int test_memlock_exceeded(int fd)
  195. {
  196. int ret;
  197. void *buf;
  198. struct iovec iov;
  199. /* if limit is larger than 2gb, just skip this test */
  200. if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
  201. return 0;
  202. iov.iov_len = mlock_limit * 2;
  203. buf = t_malloc(iov.iov_len);
  204. iov.iov_base = buf;
  205. while (iov.iov_len) {
  206. ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
  207. if (ret < 0) {
  208. if (errno == ENOMEM) {
  209. iov.iov_len /= 2;
  210. continue;
  211. }
  212. if (errno == EFAULT) {
  213. free(buf);
  214. return 0;
  215. }
  216. fprintf(stderr, "expected success or EFAULT, got %d\n", errno);
  217. free(buf);
  218. return 1;
  219. }
  220. ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
  221. if (ret != 0) {
  222. fprintf(stderr, "error: unregister failed with %d\n", errno);
  223. free(buf);
  224. return 1;
  225. }
  226. break;
  227. }
  228. if (!iov.iov_len)
  229. printf("Unable to register buffers. Check memlock rlimit.\n");
  230. free(buf);
  231. return 0;
  232. }
  233. static int test_iovec_nr(int fd)
  234. {
  235. int i, ret, status = 0;
  236. unsigned int nr = 1000000;
  237. struct iovec *iovs;
  238. void *buf;
  239. iovs = malloc(nr * sizeof(struct iovec));
  240. if (!iovs) {
  241. fprintf(stdout, "can't allocate iovecs, skip\n");
  242. return 0;
  243. }
  244. buf = t_malloc(pagesize);
  245. for (i = 0; i < nr; i++) {
  246. iovs[i].iov_base = buf;
  247. iovs[i].iov_len = pagesize;
  248. }
  249. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL);
  250. /* reduce to UIO_MAXIOV */
  251. nr = UIO_MAXIOV;
  252. ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
  253. if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) {
  254. fprintf(stderr, "can't register large iovec for regular users, skip\n");
  255. } else if (ret != 0) {
  256. fprintf(stderr, "expected success, got %d\n", errno);
  257. status = 1;
  258. } else {
  259. io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
  260. }
  261. free(buf);
  262. free(iovs);
  263. return status;
  264. }
  265. /*
  266. * io_uring limit is 1G. iov_len limit is ~OUL, I think
  267. */
  268. static int test_iovec_size(int fd)
  269. {
  270. unsigned int status = 0;
  271. int ret;
  272. struct iovec iov;
  273. void *buf;
  274. /* NULL pointer for base */
  275. iov.iov_base = 0;
  276. iov.iov_len = 4096;
  277. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT);
  278. /* valid base, 0 length */
  279. iov.iov_base = &buf;
  280. iov.iov_len = 0;
  281. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT);
  282. /* valid base, length exceeds size */
  283. /* this requires an unampped page directly after buf */
  284. buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
  285. MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  286. assert(buf != MAP_FAILED);
  287. ret = munmap(buf + pagesize, pagesize);
  288. assert(ret == 0);
  289. iov.iov_base = buf;
  290. iov.iov_len = 2 * pagesize;
  291. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT);
  292. munmap(buf, pagesize);
  293. /* huge page */
  294. buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
  295. MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
  296. -1, 0);
  297. if (buf == MAP_FAILED) {
  298. printf("Unable to map a huge page. Try increasing "
  299. "/proc/sys/vm/nr_hugepages by at least 1.\n");
  300. printf("Skipping the hugepage test\n");
  301. } else {
  302. /*
  303. * This should succeed, so long as RLIMIT_MEMLOCK is
  304. * not exceeded
  305. */
  306. iov.iov_base = buf;
  307. iov.iov_len = 2*1024*1024;
  308. ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
  309. if (ret < 0) {
  310. if (ret == -ENOMEM)
  311. printf("Unable to test registering of a huge "
  312. "page. Try increasing the "
  313. "RLIMIT_MEMLOCK resource limit by at "
  314. "least 2MB.");
  315. else {
  316. fprintf(stderr, "expected success, got %d\n", ret);
  317. status = 1;
  318. }
  319. } else {
  320. ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
  321. 0, 0);
  322. if (ret < 0) {
  323. fprintf(stderr, "io_uring_unregister: %s\n",
  324. strerror(-ret));
  325. status = 1;
  326. }
  327. }
  328. }
  329. ret = munmap(iov.iov_base, iov.iov_len);
  330. assert(ret == 0);
  331. /* file-backed buffers -- not supported */
  332. buf = map_filebacked(2*1024*1024);
  333. if (!buf)
  334. status = 1;
  335. iov.iov_base = buf;
  336. iov.iov_len = 2*1024*1024;
  337. status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EOPNOTSUPP);
  338. munmap(buf, 2*1024*1024);
  339. /* bump up against the soft limit and make sure we get EFAULT
  340. * or whatever we're supposed to get. NOTE: this requires
  341. * running the test as non-root. */
  342. if (getuid() != 0)
  343. status |= test_memlock_exceeded(fd);
  344. return status;
  345. }
  346. static int ioring_poll(struct io_uring *ring, int fd, int fixed)
  347. {
  348. int ret;
  349. struct io_uring_sqe *sqe;
  350. struct io_uring_cqe *cqe;
  351. sqe = io_uring_get_sqe(ring);
  352. memset(sqe, 0, sizeof(*sqe));
  353. sqe->opcode = IORING_OP_POLL_ADD;
  354. if (fixed)
  355. sqe->flags = IOSQE_FIXED_FILE;
  356. sqe->fd = fd;
  357. sqe->poll_events = POLLIN|POLLOUT;
  358. ret = io_uring_submit(ring);
  359. if (ret != 1) {
  360. fprintf(stderr, "failed to submit poll sqe: %d.\n", ret);
  361. return 1;
  362. }
  363. ret = io_uring_wait_cqe(ring, &cqe);
  364. if (ret < 0) {
  365. fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
  366. return 1;
  367. }
  368. ret = 0;
  369. if (!(cqe->res & POLLOUT)) {
  370. fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
  371. POLLOUT, cqe->res);
  372. ret = 1;
  373. }
  374. io_uring_cqe_seen(ring, cqe);
  375. return ret;
  376. }
  377. static int test_poll_ringfd(void)
  378. {
  379. int status = 0;
  380. int ret;
  381. int fd;
  382. struct io_uring ring;
  383. ret = io_uring_queue_init(1, &ring, 0);
  384. if (ret) {
  385. perror("io_uring_queue_init");
  386. return 1;
  387. }
  388. fd = ring.ring_fd;
  389. /* try polling the ring fd */
  390. status = ioring_poll(&ring, fd, 0);
  391. /*
  392. * now register the ring fd, and try the poll again. This should
  393. * fail, because the kernel does not allow registering of the
  394. * ring_fd.
  395. */
  396. status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF);
  397. /* tear down queue */
  398. io_uring_queue_exit(&ring);
  399. return status;
  400. }
  401. int main(int argc, char **argv)
  402. {
  403. int fd, ret;
  404. unsigned int status = 0;
  405. struct io_uring_params p;
  406. struct rlimit rlim;
  407. if (argc > 1)
  408. return T_EXIT_SKIP;
  409. /* setup globals */
  410. pagesize = getpagesize();
  411. ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
  412. if (ret < 0) {
  413. perror("getrlimit");
  414. return T_EXIT_PASS;
  415. }
  416. mlock_limit = rlim.rlim_cur;
  417. devnull = open("/dev/null", O_RDWR);
  418. if (devnull < 0) {
  419. perror("open /dev/null");
  420. exit(T_EXIT_FAIL);
  421. }
  422. /* invalid fd */
  423. status |= expect_fail(-1, 0, NULL, 0, -EBADF);
  424. /* valid fd that is not an io_uring fd */
  425. status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP);
  426. /* invalid opcode */
  427. memset(&p, 0, sizeof(p));
  428. fd = new_io_uring(1, &p);
  429. ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL);
  430. if (ret) {
  431. /* if this succeeds, tear down the io_uring instance
  432. * and start clean for the next test. */
  433. close(fd);
  434. fd = new_io_uring(1, &p);
  435. }
  436. /* IORING_REGISTER_BUFFERS */
  437. status |= test_iovec_size(fd);
  438. status |= test_iovec_nr(fd);
  439. /* IORING_REGISTER_FILES */
  440. status |= test_max_fds(fd);
  441. close(fd);
  442. /* uring poll on the uring fd */
  443. status |= test_poll_ringfd();
  444. if (status)
  445. fprintf(stderr, "FAIL\n");
  446. return status;
  447. }