Browse Source

Update contrib/libs/liburing to 2.6
3b51a9fb14de805208d11f1c077c78bb5d487e0f

thegeorg 9 months ago
parent
commit
afd4899380

+ 27 - 0
contrib/libs/liburing/CHANGELOG

@@ -1,3 +1,29 @@
+liburing-2.6 release
+
+- Add getsockopt and setsockopt socket commands
+- Add test cases to test/hardlink
+- Man page fixes
+- Add futex support, and test cases
+- Add waitid support, and test cases
+- Add read multishot, and test cases
+- Add support for IORING_SETUP_NO_SQARRAY
+- Use IORING_SETUP_NO_SQARRAY as the default
+- Add support for IORING_OP_FIXED_FD_INSTALL
+- Add io_uring_prep_fixed_fd_install() helper
+- Support for napi busy polling
+- Improve/add test cases
+- Man page fixes
+- Add sample 'proxy' example
+
+liburing-2.5 release
+
+- Add support for io_uring_prep_cmd_sock()
+- Add support for application allocated ring memory, for placing rings
+  in huge mem. Available through io_uring_queue_init_mem().
+- Add support for registered ring fds
+- Various documentation updates
+- Various fixes
+
 liburing-2.4 release
 
 - Add io_uring_{major,minor,check}_version() functions.
@@ -15,6 +41,7 @@ liburing-2.4 release
   io_uring_prep_socket_direct() factor in being called with
   IORING_FILE_INDEX_ALLOC for allocating a direct descriptor.
 - Add io_uring_prep_sendto() function.
+- Add io_uring_prep_cmd_sock() function.
 
 liburing-2.3 release
 

+ 1 - 1
contrib/libs/liburing/README

@@ -54,7 +54,7 @@ Building liburing
     # Prepare build config (optional).
     #
     #  --cc  specifies the C   compiler.
-    #  --cxx speficies the C++ compiler.
+    #  --cxx specifies the C++ compiler.
     #
     ./configure --cc=gcc --cxx=g++;
 

+ 1 - 0
contrib/libs/liburing/config-host.h

@@ -14,3 +14,4 @@
 #define CONFIG_HAVE_ARRAY_BOUNDS
 #define CONFIG_HAVE_NVME_URING
 #define CONFIG_HAVE_FANOTIFY
+#define CONFIG_HAVE_FUTEXV

+ 0 - 1
contrib/libs/liburing/src/arch/aarch64/lib.h

@@ -4,7 +4,6 @@
 #define LIBURING_ARCH_AARCH64_LIB_H
 
 #include <elf.h>
-#include <sys/auxv.h>
 #include "../../syscall.h"
 
 static inline long __get_page_size(void)

+ 126 - 20
contrib/libs/liburing/src/include/liburing.h

@@ -2,14 +2,6 @@
 #ifndef LIB_URING_H
 #define LIB_URING_H
 
-#ifndef _XOPEN_SOURCE
-#define _XOPEN_SOURCE 500 /* Required for glibc to expose sigset_t */
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE /* Required for musl to expose cpu_set_t */
-#endif
-
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@@ -21,6 +13,7 @@
 #include <fcntl.h>
 #include <sched.h>
 #include <linux/swab.h>
+#include <sys/wait.h>
 #include "liburing/compat.h"
 #include "liburing/io_uring.h"
 #include "liburing/io_uring_version.h"
@@ -164,6 +157,9 @@ IOURINGINLINE int io_uring_opcode_supported(const struct io_uring_probe *p,
 	return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
 }
 
+int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring,
+				struct io_uring_params *p,
+				void *buf, size_t buf_size);
 int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
 				struct io_uring_params *p);
 int io_uring_queue_init(unsigned entries, struct io_uring *ring,
@@ -235,12 +231,16 @@ int io_uring_close_ring_fd(struct io_uring *ring);
 int io_uring_register_buf_ring(struct io_uring *ring,
 			       struct io_uring_buf_reg *reg, unsigned int flags);
 int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid);
+int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head);
 int io_uring_register_sync_cancel(struct io_uring *ring,
 				 struct io_uring_sync_cancel_reg *reg);
 
 int io_uring_register_file_alloc_range(struct io_uring *ring,
 					unsigned off, unsigned len);
 
+int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi);
+int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi);
+
 int io_uring_get_events(struct io_uring *ring);
 int io_uring_submit_and_get_events(struct io_uring *ring);
 
@@ -375,17 +375,10 @@ IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
 	sqe->file_index = file_index + 1;
 }
 
-IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
-				    const void *addr, unsigned len,
-				    __u64 offset)
+IOURINGINLINE void io_uring_initialize_sqe(struct io_uring_sqe *sqe) 
 {
-	sqe->opcode = (__u8) op;
 	sqe->flags = 0;
 	sqe->ioprio = 0;
-	sqe->fd = fd;
-	sqe->off = offset;
-	sqe->addr = (unsigned long) addr;
-	sqe->len = len;
 	sqe->rw_flags = 0;
 	sqe->buf_index = 0;
 	sqe->personality = 0;
@@ -394,6 +387,17 @@ IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
 	sqe->__pad2[0] = 0;
 }
 
+IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
+				    const void *addr, unsigned len,
+				    __u64 offset)
+{
+	sqe->opcode = (__u8) op;
+	sqe->fd = fd;
+	sqe->off = offset;
+	sqe->addr = (unsigned long) addr;
+	sqe->len = len;
+}
+
 /*
  * io_uring_prep_splice() - Either @fd_in or @fd_out must be a pipe.
  *
@@ -720,6 +724,15 @@ IOURINGINLINE void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
 	io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
 }
 
+IOURINGINLINE void io_uring_prep_read_multishot(struct io_uring_sqe *sqe,
+						int fd, unsigned nbytes,
+						__u64 offset, int buf_group)
+{
+	io_uring_prep_rw(IORING_OP_READ_MULTISHOT, sqe, fd, NULL, nbytes,
+			 offset);
+	sqe->buf_group = buf_group;
+}
+
 IOURINGINLINE void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
 				       const void *buf, unsigned nbytes,
 				       __u64 offset)
@@ -1125,13 +1138,89 @@ IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
 	__io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
 }
 
+/*
+ * Prepare commands for sockets
+ */
+IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe,
+					  int cmd_op,
+					  int fd,
+					  int level,
+					  int optname,
+					  void *optval,
+					  int optlen)
+{
+	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0);
+	sqe->optval = (unsigned long) (uintptr_t) optval;
+	sqe->optname = optname;
+	sqe->optlen = optlen;
+	sqe->cmd_op = cmd_op;
+	sqe->level = level;
+}
+
+IOURINGINLINE void io_uring_prep_waitid(struct io_uring_sqe *sqe,
+					idtype_t idtype,
+					id_t id,
+					siginfo_t *infop,
+					int options, unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_WAITID, sqe, id, NULL, (unsigned) idtype, 0);
+	sqe->waitid_flags = flags;
+	sqe->file_index = options;
+	sqe->addr2 = (unsigned long) infop;
+}
+
+IOURINGINLINE void io_uring_prep_futex_wake(struct io_uring_sqe *sqe,
+					    uint32_t *futex, uint64_t val,
+					    uint64_t mask, uint32_t futex_flags,
+					    unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FUTEX_WAKE, sqe, futex_flags, futex, 0, val);
+	sqe->futex_flags = flags;
+	sqe->addr3 = mask;
+}
+
+IOURINGINLINE void io_uring_prep_futex_wait(struct io_uring_sqe *sqe,
+					    uint32_t *futex, uint64_t val,
+					    uint64_t mask, uint32_t futex_flags,
+					    unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FUTEX_WAIT, sqe, futex_flags, futex, 0, val);
+	sqe->futex_flags = flags;
+	sqe->addr3 = mask;
+}
+
+struct futex_waitv;
+IOURINGINLINE void io_uring_prep_futex_waitv(struct io_uring_sqe *sqe,
+					     struct futex_waitv *futex,
+					     uint32_t nr_futex,
+					     unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FUTEX_WAITV, sqe, 0, futex, nr_futex, 0);
+	sqe->futex_flags = flags;
+}
+
+IOURINGINLINE void io_uring_prep_fixed_fd_install(struct io_uring_sqe *sqe,
+						  int fd,
+						  unsigned int flags)
+{
+	io_uring_prep_rw(IORING_OP_FIXED_FD_INSTALL, sqe, fd, NULL, 0, 0);
+	sqe->flags = IOSQE_FIXED_FILE;
+	sqe->install_fd_flags = flags;
+}
+
+IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe,
+				       int fd, loff_t len)
+{
+	io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, 0, 0, len);
+}
+
 /*
  * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
  * the SQ ring
  */
 IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
 {
-	unsigned khead = *ring->sq.khead;
+	unsigned khead;
 
 	/*
 	 * Without a barrier, we could miss an update and think the SQ wasn't
@@ -1140,6 +1229,8 @@ IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring)
 	 */
 	if (ring->flags & IORING_SETUP_SQPOLL)
 		khead = io_uring_smp_load_acquire(ring->sq.khead);
+	else
+		khead = *ring->sq.khead;
 
 	/* always use real head, to avoid losing sync for short submit */
 	return ring->sq.sqe_tail - khead;
@@ -1326,7 +1417,7 @@ IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
 	if (ring->flags & IORING_SETUP_SQE128)
 		shift = 1;
 	if (!(ring->flags & IORING_SETUP_SQPOLL))
-		head = IO_URING_READ_ONCE(*sq->khead);
+		head = *sq->khead;
 	else
 		head = io_uring_smp_load_acquire(sq->khead);
 
@@ -1335,6 +1426,7 @@ IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
 
 		sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift];
 		sq->sqe_tail = next;
+		io_uring_initialize_sqe(sqe);
 		return sqe;
 	}
 
@@ -1386,7 +1478,7 @@ IOURINGINLINE void __io_uring_buf_ring_cq_advance(struct io_uring *ring,
 						  struct io_uring_buf_ring *br,
 						  int cq_count, int buf_count)
 {
-	br->tail += buf_count;
+	io_uring_buf_ring_advance(br, buf_count);
 	io_uring_cq_advance(ring, cq_count);
 }
 
@@ -1404,6 +1496,20 @@ IOURINGINLINE void io_uring_buf_ring_cq_advance(struct io_uring *ring,
 	__io_uring_buf_ring_cq_advance(ring, br, count, count);
 }
 
+IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring,
+					      struct io_uring_buf_ring *br,
+					      unsigned short bgid)
+{
+	uint16_t head;
+	int ret;
+
+	ret = io_uring_buf_ring_head(ring, bgid, &head);
+	if (ret)
+		return ret;
+
+	return (uint16_t) (br->tail - head);
+}
+
 #ifndef LIBURING_INTERNAL
 IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
 {
@@ -1432,7 +1538,7 @@ bool io_uring_check_version(int major, int minor);
 #define IO_URING_CHECK_VERSION(major,minor) \
   (major > IO_URING_VERSION_MAJOR ||        \
    (major == IO_URING_VERSION_MAJOR &&      \
-    minor >= IO_URING_VERSION_MINOR))
+    minor > IO_URING_VERSION_MINOR))
 
 #ifdef __cplusplus
 }

+ 74 - 2
contrib/libs/liburing/src/include/liburing/io_uring.h

@@ -43,6 +43,10 @@ struct io_uring_sqe {
 	union {
 		__u64	addr;	/* pointer to buffer or iovecs */
 		__u64	splice_off_in;
+		struct {
+			__u32	level;
+			__u32	optname;
+		};
 	};
 	__u32	len;		/* buffer size or number of iovecs */
 	union {
@@ -65,6 +69,9 @@ struct io_uring_sqe {
 		__u32		xattr_flags;
 		__u32		msg_ring_flags;
 		__u32		uring_cmd_flags;
+		__u32		waitid_flags;
+		__u32		futex_flags;
+		__u32		install_fd_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -79,6 +86,7 @@ struct io_uring_sqe {
 	union {
 		__s32	splice_fd_in;
 		__u32	file_index;
+		__u32	optlen;
 		struct {
 			__u16	addr_len;
 			__u16	__pad3[1];
@@ -89,6 +97,7 @@ struct io_uring_sqe {
 			__u64	addr3;
 			__u64	__pad2[1];
 		};
+		__u64	optval;
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
 		 * this field is used for 80 bytes of arbitrary command data
@@ -173,6 +182,23 @@ enum {
  */
 #define IORING_SETUP_DEFER_TASKRUN	(1U << 13)
 
+/*
+ * Application provides ring memory
+ */
+#define IORING_SETUP_NO_MMAP		(1U << 14)
+
+/*
+ * Register the ring fd in itself for use with
+ * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
+ * than an fd.
+ */
+#define IORING_SETUP_REGISTERED_FD_ONLY	(1U << 15)
+
+/*
+ * Removes indirection through the SQ index array.
+ */
+#define IORING_SETUP_NO_SQARRAY		(1U << 16)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
@@ -223,6 +249,13 @@ enum io_uring_op {
 	IORING_OP_URING_CMD,
 	IORING_OP_SEND_ZC,
 	IORING_OP_SENDMSG_ZC,
+	IORING_OP_READ_MULTISHOT,
+	IORING_OP_WAITID,
+	IORING_OP_FUTEX_WAIT,
+	IORING_OP_FUTEX_WAKE,
+	IORING_OP_FUTEX_WAITV,
+	IORING_OP_FIXED_FD_INSTALL,
+	IORING_OP_FTRUNCATE,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -351,6 +384,13 @@ enum {
 /* Pass through the flags from sqe->file_index to cqe->flags */
 #define IORING_MSG_RING_FLAGS_PASS	(1U << 1)
 
+/*
+ * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
+ *
+ * IORING_FIXED_FD_NO_CLOEXEC	Don't mark the fd as O_CLOEXEC
+ */
+#define IORING_FIXED_FD_NO_CLOEXEC	(1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
@@ -406,7 +446,7 @@ struct io_sqring_offsets {
 	__u32 dropped;
 	__u32 array;
 	__u32 resv1;
-	__u64 resv2;
+	__u64 user_addr;
 };
 
 /*
@@ -425,7 +465,7 @@ struct io_cqring_offsets {
 	__u32 cqes;
 	__u32 flags;
 	__u32 resv1;
-	__u64 resv2;
+	__u64 user_addr;
 };
 
 /*
@@ -523,6 +563,13 @@ enum {
 	/* register a range of fixed file slots for automatic slot allocation */
 	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,
 
+	/* return status information for a buffer group */
+	IORING_REGISTER_PBUF_STATUS		= 26,
+
+	/* set/clear busy poll settings */
+	IORING_REGISTER_NAPI			= 27,
+	IORING_UNREGISTER_NAPI			= 28,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -649,6 +696,21 @@ struct io_uring_buf_reg {
 	__u64	resv[3];
 };
 
+/* argument for IORING_REGISTER_PBUF_STATUS */
+struct io_uring_buf_status {
+	__u32	buf_group;	/* input */
+	__u32	head;		/* output */
+	__u32	resv[8];
+};
+
+/* argument for IORING_(UN)REGISTER_NAPI */
+struct io_uring_napi {
+	__u32   busy_poll_to;
+	__u8    prefer_busy_poll;
+	__u8    pad[3];
+	__u64   resv;
+};
+
 /*
  * io_uring_restriction->opcode values
  */
@@ -703,6 +765,16 @@ struct io_uring_recvmsg_out {
 	__u32 flags;
 };
 
+/*
+ * Argument for IORING_OP_URING_CMD when file is a socket
+ */
+enum {
+	SOCKET_URING_OP_SIOCINQ		= 0,
+	SOCKET_URING_OP_SIOCOUTQ,
+	SOCKET_URING_OP_GETSOCKOPT,
+	SOCKET_URING_OP_SETSOCKOPT,
+};
+
 #ifdef __cplusplus
 }
 #endif

+ 1 - 1
contrib/libs/liburing/src/include/liburing/io_uring_version.h

@@ -3,6 +3,6 @@
 #define LIBURING_VERSION_H
 
 #define IO_URING_VERSION_MAJOR 2
-#define IO_URING_VERSION_MINOR 4
+#define IO_URING_VERSION_MINOR 6
 
 #endif

+ 1 - 0
contrib/libs/liburing/src/int_flags.h

@@ -5,6 +5,7 @@
 enum {
 	INT_FLAG_REG_RING	= 1,
 	INT_FLAG_REG_REG_RING	= 2,
+	INT_FLAG_APP_MEM	= 4,
 };
 
 #endif

+ 2 - 0
contrib/libs/liburing/src/lib.h

@@ -10,6 +10,8 @@
 #include "arch/x86/lib.h"
 #elif defined(__aarch64__)
 #include "arch/aarch64/lib.h"
+#elif defined(__riscv) && __riscv_xlen == 64
+#error #include "arch/riscv64/lib.h"
 #else
 /*
  * We don't have nolibc support for this arch. Must use libc!

+ 8 - 12
contrib/libs/liburing/src/queue.c

@@ -213,22 +213,18 @@ static unsigned __io_uring_flush_sq(struct io_uring *ring)
 		 * Ensure kernel sees the SQE updates before the tail update.
 		 */
 		if (!(ring->flags & IORING_SETUP_SQPOLL))
-			IO_URING_WRITE_ONCE(*sq->ktail, tail);
+			*sq->ktail = tail;
 		else
 			io_uring_smp_store_release(sq->ktail, tail);
 	}
 	/*
-	 * This _may_ look problematic, as we're not supposed to be reading
-	 * SQ->head without acquire semantics. When we're in SQPOLL mode, the
-	 * kernel submitter could be updating this right now. For non-SQPOLL,
-	 * task itself does it, and there's no potential race. But even for
-	 * SQPOLL, the load is going to be potentially out-of-date the very
-	 * instant it's done, regardless or whether or not it's done
-	 * atomically. Worst case, we're going to be over-estimating what
-	 * we can submit. The point is, we need to be able to deal with this
-	 * situation regardless of any perceived atomicity.
-	 */
-	return tail - *sq->khead;
+	* This load needs to be atomic, since sq->khead is written concurrently
+	* by the kernel, but it doesn't need to be load_acquire, since the
+	* kernel doesn't store to the submission queue; it advances khead just
+	* to indicate that it's finished reading the submission queue entries
+	* so they're available for us to write to.
+	*/
+	return tail - IO_URING_READ_ONCE(*sq->khead);
 }
 
 /*

Some files were not shown because too many files changed in this diff