fixed-hugepage.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * Test fixed buffers consisting of hugepages.
  5. */
  6. #include <stdio.h>
  7. #include <string.h>
  8. #include <fcntl.h>
  9. #include <stdlib.h>
  10. #include <sys/mman.h>
  11. #include <linux/mman.h>
  12. #include "liburing.h"
  13. #include "helpers.h"
  14. /*
  15. * Before testing
  16. * echo (>=4) > /proc/sys/vm/nr_hugepages
  17. * echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
  18. * echo always > /sys/kernel/mm/transparent_hugepage/hugepages-16kB/enabled
  19. *
  20. * Not 100% guaranteed to get THP-backed memory, but in general it does.
  21. */
  22. #define MTHP_16KB (16UL * 1024)
  23. #define HUGEPAGE_SIZE (2UL * 1024 * 1024)
  24. #define NR_BUFS 1
  25. #define IN_FD "/dev/urandom"
  26. #define OUT_FD "/dev/zero"
  27. static int open_files(char *fname_in, int *fd_in, int *fd_out)
  28. {
  29. *fd_in = open(fname_in, O_RDONLY, 0644);
  30. if (*fd_in < 0) {
  31. printf("open %s failed\n", fname_in);
  32. return -1;
  33. }
  34. *fd_out = open(OUT_FD, O_RDWR, 0644);
  35. if (*fd_out < 0) {
  36. printf("open %s failed\n", OUT_FD);
  37. return -1;
  38. }
  39. return 0;
  40. }
  41. static void unmap(struct iovec *iov, int nr_bufs, size_t offset)
  42. {
  43. int i;
  44. for (i = 0; i < nr_bufs; i++)
  45. munmap(iov[i].iov_base - offset, iov[i].iov_len + offset);
  46. }
  47. static int mmap_hugebufs(struct iovec *iov, int nr_bufs, size_t buf_size, size_t offset)
  48. {
  49. int i;
  50. for (i = 0; i < nr_bufs; i++) {
  51. void *base = NULL;
  52. base = mmap(NULL, buf_size, PROT_READ | PROT_WRITE,
  53. MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
  54. if (base == MAP_FAILED) {
  55. printf("Unable to map hugetlb page. Try increasing the "
  56. "value in /proc/sys/vm/nr_hugepages\n");
  57. unmap(iov, i, offset);
  58. return -1;
  59. }
  60. memset(base, 0, buf_size);
  61. iov[i].iov_base = base + offset;
  62. iov[i].iov_len = buf_size - offset;
  63. }
  64. return 0;
  65. }
  66. /* map a hugepage and smaller page to a contiguous memory */
  67. static int mmap_mixture(struct iovec *iov, int nr_bufs, size_t buf_size, bool huge_on_left)
  68. {
  69. int i;
  70. void *small_base = NULL, *huge_base = NULL, *start = NULL,
  71. *huge_start = NULL, *small_start = NULL;
  72. size_t small_size = buf_size - HUGEPAGE_SIZE;
  73. size_t seg_size = ((buf_size / HUGEPAGE_SIZE) + 1) * HUGEPAGE_SIZE;
  74. start = mmap(NULL, seg_size * nr_bufs, PROT_NONE,
  75. MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
  76. if (start == MAP_FAILED) {
  77. printf("Unable to preserve the page mixture memory. "
  78. "Try increasing the RLIMIT_MEMLOCK resource limit\n");
  79. return -1;
  80. }
  81. for (i = 0; i < nr_bufs; i++) {
  82. if (huge_on_left) {
  83. huge_start = start;
  84. small_start = start + HUGEPAGE_SIZE;
  85. } else {
  86. huge_start = start + HUGEPAGE_SIZE;
  87. small_start = start + HUGEPAGE_SIZE - small_size;
  88. }
  89. huge_base = mmap(huge_start, HUGEPAGE_SIZE, PROT_READ | PROT_WRITE,
  90. MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED, -1, 0);
  91. if (huge_base == MAP_FAILED) {
  92. printf("Unable to map hugetlb page in the page mixture. "
  93. "Try increasing the value in /proc/sys/vm/nr_hugepages\n");
  94. unmap(iov, nr_bufs, 0);
  95. return -1;
  96. }
  97. small_base = mmap(small_start, small_size, PROT_READ | PROT_WRITE,
  98. MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
  99. if (small_base == MAP_FAILED) {
  100. printf("Unable to map small page in the page mixture. "
  101. "Try increasing the RLIMIT_MEMLOCK resource limit\n");
  102. unmap(iov, nr_bufs, 0);
  103. return -1;
  104. }
  105. if (huge_on_left) {
  106. iov[i].iov_base = huge_base;
  107. memset(huge_base, 0, buf_size);
  108. }
  109. else {
  110. iov[i].iov_base = small_base;
  111. memset(small_base, 0, buf_size);
  112. }
  113. iov[i].iov_len = buf_size;
  114. start += seg_size;
  115. }
  116. return 0;
  117. }
  118. static void free_bufs(struct iovec *iov, int nr_bufs, size_t offset)
  119. {
  120. int i;
  121. for (i = 0; i < nr_bufs; i++)
  122. free(iov[i].iov_base - offset);
  123. }
  124. static int get_mthp_bufs(struct iovec *iov, int nr_bufs, size_t buf_size,
  125. size_t alignment, size_t offset)
  126. {
  127. int i;
  128. for (i = 0; i < nr_bufs; i++) {
  129. void *base = NULL;
  130. if (posix_memalign(&base, alignment, buf_size)) {
  131. printf("Unable to allocate mthp pages. "
  132. "Try increasing the RLIMIT_MEMLOCK resource limit\n");
  133. free_bufs(iov, i, offset);
  134. return -1;
  135. }
  136. memset(base, 0, buf_size);
  137. iov[i].iov_base = base + offset;
  138. iov[i].iov_len = buf_size - offset;
  139. }
  140. return 0;
  141. }
  142. static int do_read(struct io_uring *ring, int fd, struct iovec *iov, int nr_bufs)
  143. {
  144. struct io_uring_sqe *sqe;
  145. struct io_uring_cqe *cqe;
  146. int i, ret;
  147. for (i = 0; i < nr_bufs; i++) {
  148. sqe = io_uring_get_sqe(ring);
  149. if (!sqe) {
  150. fprintf(stderr, "Could not get SQE.\n");
  151. return -1;
  152. }
  153. io_uring_prep_read_fixed(sqe, fd, iov[i].iov_base, iov[i].iov_len, 0, i);
  154. io_uring_submit(ring);
  155. ret = io_uring_wait_cqe(ring, &cqe);
  156. if (ret < 0) {
  157. fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret));
  158. return -1;
  159. }
  160. if (cqe->res < 0) {
  161. fprintf(stderr, "Error in async read operation: %s\n", strerror(-cqe->res));
  162. return -1;
  163. }
  164. if (cqe->res != iov[i].iov_len) {
  165. fprintf(stderr, "cqe res: %d, expected: %lu\n", cqe->res, (unsigned long) iov[i].iov_len);
  166. return -1;
  167. }
  168. io_uring_cqe_seen(ring, cqe);
  169. }
  170. return 0;
  171. }
  172. static int do_write(struct io_uring *ring, int fd, struct iovec *iov, int nr_bufs)
  173. {
  174. struct io_uring_sqe *sqe;
  175. struct io_uring_cqe *cqe;
  176. int i, ret;
  177. for (i = 0; i < nr_bufs; i++) {
  178. sqe = io_uring_get_sqe(ring);
  179. if (!sqe) {
  180. fprintf(stderr, "Could not get SQE.\n");
  181. return -1;
  182. }
  183. io_uring_prep_write_fixed(sqe, fd, iov[i].iov_base, iov[i].iov_len, 0, i);
  184. io_uring_submit(ring);
  185. ret = io_uring_wait_cqe(ring, &cqe);
  186. if (ret < 0) {
  187. fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret));
  188. return -1;
  189. }
  190. if (cqe->res < 0) {
  191. fprintf(stderr, "Error in async write operation: %s\n", strerror(-cqe->res));
  192. return -1;
  193. }
  194. if (cqe->res != iov[i].iov_len) {
  195. fprintf(stderr, "cqe res: %d, expected: %lu\n", cqe->res, (unsigned long) iov[i].iov_len);
  196. return -1;
  197. }
  198. io_uring_cqe_seen(ring, cqe);
  199. }
  200. return 0;
  201. }
  202. static int register_submit(struct io_uring *ring, struct iovec *iov,
  203. int nr_bufs, int fd_in, int fd_out)
  204. {
  205. int ret;
  206. ret = io_uring_register_buffers(ring, iov, nr_bufs);
  207. if (ret) {
  208. if (ret != -ENOMEM)
  209. fprintf(stderr, "Error registering buffers: %s\n", strerror(-ret));
  210. return ret;
  211. }
  212. ret = do_read(ring, fd_in, iov, nr_bufs);
  213. if (ret) {
  214. fprintf(stderr, "Read test failed\n");
  215. return ret;
  216. }
  217. ret = do_write(ring, fd_out, iov, nr_bufs);
  218. if (ret) {
  219. fprintf(stderr, "Write test failed\n");
  220. return ret;
  221. }
  222. ret = io_uring_unregister_buffers(ring);
  223. if (ret) {
  224. fprintf(stderr, "Error unregistering buffers for one hugepage test: %s", strerror(-ret));
  225. return ret;
  226. }
  227. return 0;
  228. }
  229. static int test_one_hugepage(struct io_uring *ring, int fd_in, int fd_out)
  230. {
  231. struct iovec iov[NR_BUFS];
  232. size_t buf_size = HUGEPAGE_SIZE;
  233. int ret;
  234. if (mmap_hugebufs(iov, NR_BUFS, buf_size, 0))
  235. return T_EXIT_SKIP;
  236. ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
  237. unmap(iov, NR_BUFS, 0);
  238. if (ret == -ENOMEM)
  239. return T_EXIT_SKIP;
  240. return ret ? T_EXIT_FAIL : T_EXIT_PASS;
  241. }
  242. static int test_multi_hugepages(struct io_uring *ring, int fd_in, int fd_out)
  243. {
  244. struct iovec iov[NR_BUFS];
  245. size_t buf_size = 4 * HUGEPAGE_SIZE;
  246. int ret;
  247. if (mmap_hugebufs(iov, NR_BUFS, buf_size, 0))
  248. return T_EXIT_SKIP;
  249. ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
  250. unmap(iov, NR_BUFS, 0);
  251. if (ret == -ENOMEM)
  252. return T_EXIT_SKIP;
  253. return ret ? T_EXIT_FAIL : T_EXIT_PASS;
  254. }
  255. static int test_unaligned_hugepage(struct io_uring *ring, int fd_in, int fd_out)
  256. {
  257. struct iovec iov[NR_BUFS];
  258. size_t buf_size = 3 * HUGEPAGE_SIZE;
  259. size_t offset = 0x1234;
  260. int ret;
  261. if (mmap_hugebufs(iov, NR_BUFS, buf_size, offset))
  262. return T_EXIT_SKIP;
  263. ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
  264. unmap(iov, NR_BUFS, offset);
  265. if (ret == -ENOMEM)
  266. return T_EXIT_SKIP;
  267. return ret ? T_EXIT_FAIL : T_EXIT_PASS;
  268. }
  269. static int test_multi_unaligned_mthps(struct io_uring *ring, int fd_in, int fd_out)
  270. {
  271. struct iovec iov[NR_BUFS];
  272. int ret;
  273. size_t buf_size = 3 * MTHP_16KB;
  274. size_t offset = 0x1234;
  275. if (get_mthp_bufs(iov, NR_BUFS, buf_size, MTHP_16KB, offset))
  276. return T_EXIT_SKIP;
  277. ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
  278. free_bufs(iov, NR_BUFS, offset);
  279. if (ret == -ENOMEM)
  280. return T_EXIT_SKIP;
  281. return ret ? T_EXIT_FAIL : T_EXIT_PASS;
  282. }
  283. /* Should not coalesce */
  284. static int test_page_mixture(struct io_uring *ring, int fd_in, int fd_out, int huge_on_left)
  285. {
  286. struct iovec iov[NR_BUFS];
  287. size_t buf_size = HUGEPAGE_SIZE + MTHP_16KB;
  288. int ret;
  289. if (mmap_mixture(iov, NR_BUFS, buf_size, huge_on_left))
  290. return T_EXIT_SKIP;
  291. ret = register_submit(ring, iov, NR_BUFS, fd_in, fd_out);
  292. unmap(iov, NR_BUFS, 0);
  293. if (ret == -ENOMEM)
  294. return T_EXIT_SKIP;
  295. return ret ? T_EXIT_FAIL : T_EXIT_PASS;
  296. }
  297. int main(int argc, char *argv[])
  298. {
  299. struct io_uring ring;
  300. int ret, fd_in, fd_out;
  301. char *fname_in;
  302. if (argc > 1)
  303. fname_in = argv[1];
  304. else
  305. fname_in = IN_FD;
  306. if (open_files(fname_in, &fd_in, &fd_out))
  307. return T_EXIT_SKIP;
  308. ret = t_create_ring(8, &ring, 0);
  309. if (ret == T_SETUP_SKIP)
  310. return T_EXIT_SKIP;
  311. else if (ret < 0)
  312. return T_EXIT_FAIL;
  313. ret = test_one_hugepage(&ring, fd_in, fd_out);
  314. if (ret != T_EXIT_PASS) {
  315. if (ret != T_EXIT_SKIP)
  316. fprintf(stderr, "Test one hugepage failed.\n");
  317. return ret;
  318. }
  319. ret = test_multi_hugepages(&ring, fd_in, fd_out);
  320. if (ret != T_EXIT_PASS) {
  321. if (ret != T_EXIT_SKIP)
  322. fprintf(stderr, "Test multi hugepages failed.\n");
  323. return ret;
  324. }
  325. ret = test_unaligned_hugepage(&ring, fd_in, fd_out);
  326. if (ret != T_EXIT_PASS) {
  327. if (ret != T_EXIT_SKIP)
  328. fprintf(stderr, "Test unaligned hugepage failed.\n");
  329. return ret;
  330. }
  331. ret = test_multi_unaligned_mthps(&ring, fd_in, fd_out);
  332. if (ret != T_EXIT_PASS) {
  333. if (ret != T_EXIT_SKIP)
  334. fprintf(stderr, "Test unaligned multi-size'd THPs failed.\n");
  335. return ret;
  336. }
  337. ret = test_page_mixture(&ring, fd_in, fd_out, true);
  338. if (ret != T_EXIT_PASS) {
  339. if (ret != T_EXIT_SKIP)
  340. fprintf(stderr, "Test huge small page mixture (start with huge) failed.\n");
  341. return ret;
  342. }
  343. ret = test_page_mixture(&ring, fd_in, fd_out, false);
  344. if (ret != T_EXIT_PASS) {
  345. if (ret != T_EXIT_SKIP)
  346. fprintf(stderr, "Test huge small page mixture (start with small) failed.\n");
  347. return ret;
  348. }
  349. io_uring_queue_exit(&ring);
  350. return T_EXIT_PASS;
  351. }