12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253 |
- #include "../config-host.h"
- /* SPDX-License-Identifier: MIT */
- /*
- * Description: uring_cmd based ublk
- *
- * Covers cancellable uring_cmd feature.
- */
- #include <unistd.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <stdio.h>
- #include <stdarg.h>
- #include <string.h>
- #include <pthread.h>
- #include <limits.h>
- #include <poll.h>
- #include <sys/syscall.h>
- #include <sys/mman.h>
- #include <sys/ioctl.h>
- #include <sys/inotify.h>
- #include <sys/wait.h>
- #include "liburing.h"
- #include "helpers.h"
- #ifdef CONFIG_HAVE_UBLK_HEADER
- #include <linux/ublk_cmd.h>
- /****************** part 1: libublk ********************/
- #define CTRL_DEV "/dev/ublk-control"
- #define UBLKC_DEV "/dev/ublkc"
- #define UBLKB_DEV "/dev/ublkb"
- #define UBLK_CTRL_RING_DEPTH 32
- /* queue idle timeout */
- #define UBLKSRV_IO_IDLE_SECS 20
- #define UBLK_IO_MAX_BYTES 65536
- #define UBLK_MAX_QUEUES 4
- #define UBLK_QUEUE_DEPTH 128
- #define UBLK_DBG_DEV (1U << 0)
- #define UBLK_DBG_QUEUE (1U << 1)
- #define UBLK_DBG_IO_CMD (1U << 2)
- #define UBLK_DBG_IO (1U << 3)
- #define UBLK_DBG_CTRL_CMD (1U << 4)
- #define UBLK_LOG (1U << 5)
- struct ublk_dev;
- struct ublk_queue;
- struct ublk_ctrl_cmd_data {
- __u32 cmd_op;
- #define CTRL_CMD_HAS_DATA 1
- #define CTRL_CMD_HAS_BUF 2
- __u32 flags;
- __u64 data[2];
- __u64 addr;
- __u32 len;
- };
- struct ublk_io {
- char *buf_addr;
- #define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
- #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
- #define UBLKSRV_IO_FREE (1UL << 2)
- unsigned int flags;
- unsigned int result;
- };
- struct ublk_tgt_ops {
- const char *name;
- int (*init_tgt)(struct ublk_dev *);
- void (*deinit_tgt)(struct ublk_dev *);
- int (*queue_io)(struct ublk_queue *, int tag);
- void (*tgt_io_done)(struct ublk_queue *,
- int tag, const struct io_uring_cqe *);
- };
- struct ublk_tgt {
- unsigned long dev_size;
- const struct ublk_tgt_ops *ops;
- struct ublk_params params;
- };
- struct ublk_queue {
- int q_id;
- int q_depth;
- unsigned int cmd_inflight;
- unsigned int io_inflight;
- struct ublk_dev *dev;
- const struct ublk_tgt_ops *tgt_ops;
- char *io_cmd_buf;
- struct io_uring ring;
- struct ublk_io ios[UBLK_QUEUE_DEPTH];
- #define UBLKSRV_QUEUE_STOPPING (1U << 0)
- #define UBLKSRV_QUEUE_IDLE (1U << 1)
- unsigned state;
- pid_t tid;
- pthread_t thread;
- };
- struct ublk_dev {
- struct ublk_tgt tgt;
- struct ublksrv_ctrl_dev_info dev_info;
- struct ublk_queue q[UBLK_MAX_QUEUES];
- int fds[2]; /* fds[0] points to /dev/ublkcN */
- int nr_fds;
- int ctrl_fd;
- struct io_uring ring;
- };
- #ifndef offsetof
- #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
- #endif
- #ifndef container_of
- #define container_of(ptr, type, member) ({ \
- unsigned long __mptr = (unsigned long)(ptr); \
- ((type *)(__mptr - offsetof(type, member))); })
- #endif
- #define round_up(val, rnd) \
- (((val) + ((rnd) - 1)) & ~((rnd) - 1))
- static unsigned int ublk_dbg_mask = 0;
- static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
- static inline int is_target_io(__u64 user_data)
- {
- return (user_data & (1ULL << 63)) != 0;
- }
- static inline __u64 build_user_data(unsigned tag, unsigned op,
- unsigned tgt_data, unsigned is_target_io)
- {
- assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
- return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
- }
- static inline unsigned int user_data_to_tag(__u64 user_data)
- {
- return user_data & 0xffff;
- }
- static inline unsigned int user_data_to_op(__u64 user_data)
- {
- return (user_data >> 16) & 0xff;
- }
- static void ublk_err(const char *fmt, ...)
- {
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- }
- static void ublk_dbg(int level, const char *fmt, ...)
- {
- if (level & ublk_dbg_mask) {
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stdout, fmt, ap);
- }
- }
- static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
- {
- return (void *)&sqe->cmd;
- }
- static inline void ublk_mark_io_done(struct ublk_io *io, int res)
- {
- io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
- io->result = res;
- }
- static inline const struct ublksrv_io_desc *ublk_get_iod(
- const struct ublk_queue *q, int tag)
- {
- return (struct ublksrv_io_desc *)
- &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
- }
- static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
- __u32 cmd_op)
- {
- __u32 *addr = (__u32 *)&sqe->off;
- addr[0] = cmd_op;
- addr[1] = 0;
- }
- static inline int ublk_setup_ring(struct io_uring *r, int depth,
- int cq_depth, unsigned flags)
- {
- struct io_uring_params p;
- memset(&p, 0, sizeof(p));
- p.flags = flags | IORING_SETUP_CQSIZE;
- p.cq_entries = cq_depth;
- return io_uring_queue_init_params(depth, r, &p);
- }
- static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
- struct io_uring_sqe *sqe,
- struct ublk_ctrl_cmd_data *data)
- {
- struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
- struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
- sqe->fd = dev->ctrl_fd;
- sqe->opcode = IORING_OP_URING_CMD;
- sqe->ioprio = 0;
- if (data->flags & CTRL_CMD_HAS_BUF) {
- cmd->addr = data->addr;
- cmd->len = data->len;
- }
- if (data->flags & CTRL_CMD_HAS_DATA)
- cmd->data[0] = data->data[0];
- cmd->dev_id = info->dev_id;
- cmd->queue_id = -1;
- ublk_set_sqe_cmd_op(sqe, data->cmd_op);
- io_uring_sqe_set_data(sqe, cmd);
- }
- static int __ublk_ctrl_cmd(struct ublk_dev *dev,
- struct ublk_ctrl_cmd_data *data)
- {
- struct io_uring_sqe *sqe;
- struct io_uring_cqe *cqe;
- int ret = -EINVAL;
- sqe = io_uring_get_sqe(&dev->ring);
- if (!sqe) {
- ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
- return ret;
- }
- ublk_ctrl_init_cmd(dev, sqe, data);
- ret = io_uring_submit(&dev->ring);
- if (ret < 0) {
- ublk_err("uring submit ret %d\n", ret);
- return ret;
- }
- ret = io_uring_wait_cqe(&dev->ring, &cqe);
- if (ret < 0) {
- ublk_err("wait cqe: %s\n", strerror(-ret));
- return ret;
- }
- io_uring_cqe_seen(&dev->ring, cqe);
- return cqe->res;
- }
- static int ublk_ctrl_start_dev(struct ublk_dev *dev,
- int daemon_pid)
- {
- struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_U_CMD_START_DEV,
- .flags = CTRL_CMD_HAS_DATA,
- };
- dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
- return __ublk_ctrl_cmd(dev, &data);
- }
- static int ublk_ctrl_add_dev(struct ublk_dev *dev)
- {
- struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_U_CMD_ADD_DEV,
- .flags = CTRL_CMD_HAS_BUF,
- .addr = (__u64) (uintptr_t) &dev->dev_info,
- .len = sizeof(struct ublksrv_ctrl_dev_info),
- };
- return __ublk_ctrl_cmd(dev, &data);
- }
- static int ublk_ctrl_del_dev(struct ublk_dev *dev)
- {
- struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_U_CMD_DEL_DEV,
- .flags = 0,
- };
- return __ublk_ctrl_cmd(dev, &data);
- }
- static int ublk_ctrl_get_info(struct ublk_dev *dev)
- {
- struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
- .flags = CTRL_CMD_HAS_BUF,
- .addr = (__u64) (uintptr_t) &dev->dev_info,
- .len = sizeof(struct ublksrv_ctrl_dev_info),
- };
- return __ublk_ctrl_cmd(dev, &data);
- }
- static int ublk_ctrl_set_params(struct ublk_dev *dev,
- struct ublk_params *params)
- {
- struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_U_CMD_SET_PARAMS,
- .flags = CTRL_CMD_HAS_BUF,
- .addr = (__u64) (uintptr_t) params,
- .len = sizeof(*params),
- };
- params->len = sizeof(*params);
- return __ublk_ctrl_cmd(dev, &data);
- }
- static int ublk_ctrl_get_features(struct ublk_dev *dev,
- __u64 *features)
- {
- struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_U_CMD_GET_FEATURES,
- .flags = CTRL_CMD_HAS_BUF,
- .addr = (__u64) (uintptr_t) features,
- .len = sizeof(*features),
- };
- return __ublk_ctrl_cmd(dev, &data);
- }
- static void ublk_ctrl_deinit(struct ublk_dev *dev)
- {
- close(dev->ctrl_fd);
- free(dev);
- }
- static struct ublk_dev *ublk_ctrl_init(void)
- {
- struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
- struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
- int ret;
- dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
- if (dev->ctrl_fd < 0) {
- free(dev);
- return NULL;
- }
- info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
- ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
- UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
- if (ret < 0) {
- ublk_err("queue_init: %s\n", strerror(-ret));
- free(dev);
- return NULL;
- }
- dev->nr_fds = 1;
- return dev;
- }
- static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
- {
- int size = q->q_depth * sizeof(struct ublksrv_io_desc);
- unsigned int page_sz = getpagesize();
- return round_up(size, page_sz);
- }
- static void ublk_queue_deinit(struct ublk_queue *q)
- {
- int i;
- int nr_ios = q->q_depth;
- io_uring_unregister_ring_fd(&q->ring);
- if (q->ring.ring_fd > 0) {
- io_uring_unregister_files(&q->ring);
- close(q->ring.ring_fd);
- q->ring.ring_fd = -1;
- }
- if (q->io_cmd_buf)
- munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
- for (i = 0; i < nr_ios; i++)
- free(q->ios[i].buf_addr);
- }
- static int ublk_queue_init(struct ublk_queue *q)
- {
- struct ublk_dev *dev = q->dev;
- int depth = dev->dev_info.queue_depth;
- int i, ret = -1;
- int cmd_buf_size, io_buf_size;
- unsigned long off;
- int ring_depth = depth, cq_depth = depth;
- q->tgt_ops = dev->tgt.ops;
- q->state = 0;
- q->q_depth = depth;
- q->cmd_inflight = 0;
- q->tid = gettid();
- cmd_buf_size = ublk_queue_cmd_buf_sz(q);
- off = UBLKSRV_CMD_BUF_OFFSET +
- q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
- q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
- MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
- if (q->io_cmd_buf == MAP_FAILED) {
- ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
- q->dev->dev_info.dev_id, q->q_id);
- goto fail;
- }
- io_buf_size = dev->dev_info.max_io_buf_bytes;
- for (i = 0; i < q->q_depth; i++) {
- q->ios[i].buf_addr = NULL;
- if (posix_memalign((void **)&q->ios[i].buf_addr,
- getpagesize(), io_buf_size)) {
- ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
- dev->dev_info.dev_id, q->q_id, i);
- goto fail;
- }
- q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
- }
- ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
- IORING_SETUP_COOP_TASKRUN);
- if (ret < 0) {
- ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
- q->dev->dev_info.dev_id, q->q_id, ret);
- goto fail;
- }
- io_uring_register_ring_fd(&q->ring);
- ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
- if (ret) {
- ublk_err("ublk dev %d queue %d register files failed %d\n",
- q->dev->dev_info.dev_id, q->q_id, ret);
- goto fail;
- }
- return 0;
- fail:
- ublk_queue_deinit(q);
- ublk_err("ublk dev %d queue %d failed\n",
- dev->dev_info.dev_id, q->q_id);
- return -ENOMEM;
- }
- static int ublk_dev_prep(struct ublk_dev *dev)
- {
- int dev_id = dev->dev_info.dev_id;
- char buf[64];
- int ret = 0;
- snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
- dev->fds[0] = open(buf, O_RDWR);
- if (dev->fds[0] < 0) {
- ret = -EBADF;
- ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
- goto fail;
- }
- if (dev->tgt.ops->init_tgt)
- ret = dev->tgt.ops->init_tgt(dev);
- return ret;
- fail:
- close(dev->fds[0]);
- return ret;
- }
- static void ublk_dev_unprep(struct ublk_dev *dev)
- {
- if (dev->tgt.ops->deinit_tgt)
- dev->tgt.ops->deinit_tgt(dev);
- close(dev->fds[0]);
- }
- static int ublk_queue_io_cmd(struct ublk_queue *q,
- struct ublk_io *io, unsigned tag)
- {
- struct ublksrv_io_cmd *cmd;
- struct io_uring_sqe *sqe;
- unsigned int cmd_op = 0;
- __u64 user_data;
- /* only freed io can be issued */
- if (!(io->flags & UBLKSRV_IO_FREE))
- return 0;
- /* we issue because we need either fetching or committing */
- if (!(io->flags &
- (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
- return 0;
- if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
- cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
- else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
- cmd_op = UBLK_U_IO_FETCH_REQ;
- sqe = io_uring_get_sqe(&q->ring);
- if (!sqe) {
- ublk_err("%s: run out of sqe %d, tag %d\n",
- __func__, q->q_id, tag);
- return -1;
- }
- cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
- if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
- cmd->result = io->result;
- /* These fields should be written once, never change */
- ublk_set_sqe_cmd_op(sqe, cmd_op);
- sqe->fd = 0; /* dev->fds[0] */
- sqe->opcode = IORING_OP_URING_CMD;
- sqe->flags = IOSQE_FIXED_FILE;
- sqe->rw_flags = 0;
- cmd->tag = tag;
- cmd->addr = (__u64) (uintptr_t) io->buf_addr;
- cmd->q_id = q->q_id;
- user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
- io_uring_sqe_set_data64(sqe, user_data);
- io->flags = 0;
- q->cmd_inflight += 1;
- ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
- __func__, q->q_id, tag, cmd_op,
- io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
- return 1;
- }
- static int ublk_complete_io(struct ublk_queue *q,
- unsigned tag, int res)
- {
- struct ublk_io *io = &q->ios[tag];
- ublk_mark_io_done(io, res);
- return ublk_queue_io_cmd(q, io, tag);
- }
- static void ublk_submit_fetch_commands(struct ublk_queue *q)
- {
- int i = 0;
- for (i = 0; i < q->q_depth; i++)
- ublk_queue_io_cmd(q, &q->ios[i], i);
- }
- static int ublk_queue_is_idle(struct ublk_queue *q)
- {
- return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
- }
- static int ublk_queue_is_done(struct ublk_queue *q)
- {
- return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
- }
- static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
- struct io_uring_cqe *cqe)
- {
- unsigned tag = user_data_to_tag(cqe->user_data);
- if (cqe->res < 0 && cqe->res != -EAGAIN)
- ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
- __func__, cqe->res, q->q_id,
- user_data_to_tag(cqe->user_data),
- user_data_to_op(cqe->user_data));
- if (q->tgt_ops->tgt_io_done)
- q->tgt_ops->tgt_io_done(q, tag, cqe);
- }
- static void ublk_handle_cqe(struct io_uring *r,
- struct io_uring_cqe *cqe, void *data)
- {
- struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
- unsigned tag = user_data_to_tag(cqe->user_data);
- unsigned cmd_op = user_data_to_op(cqe->user_data);
- int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
- !(q->state & UBLKSRV_QUEUE_STOPPING);
- struct ublk_io *io;
- ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
- __func__, cqe->res, q->q_id, tag, cmd_op,
- is_target_io(cqe->user_data),
- (q->state & UBLKSRV_QUEUE_STOPPING));
- /* Don't retrieve io in case of target io */
- if (is_target_io(cqe->user_data)) {
- ublksrv_handle_tgt_cqe(q, cqe);
- return;
- }
- io = &q->ios[tag];
- q->cmd_inflight--;
- if (!fetch) {
- q->state |= UBLKSRV_QUEUE_STOPPING;
- io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
- }
- if (cqe->res == UBLK_IO_RES_OK) {
- assert(tag < q->q_depth);
- q->tgt_ops->queue_io(q, tag);
- } else {
- /*
- * COMMIT_REQ will be completed immediately since no fetching
- * piggyback is required.
- *
- * Marking IO_FREE only, then this io won't be issued since
- * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
- *
- * */
- io->flags = UBLKSRV_IO_FREE;
- }
- }
- static int ublk_reap_events_uring(struct io_uring *r)
- {
- struct io_uring_cqe *cqe;
- unsigned head;
- int count = 0;
- io_uring_for_each_cqe(r, head, cqe) {
- ublk_handle_cqe(r, cqe, NULL);
- count += 1;
- }
- io_uring_cq_advance(r, count);
- return count;
- }
- static int ublk_process_io(struct ublk_queue *q)
- {
- int ret, reapped;
- ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
- q->dev->dev_info.dev_id,
- q->q_id, io_uring_sq_ready(&q->ring),
- q->cmd_inflight,
- (q->state & UBLKSRV_QUEUE_STOPPING));
- if (ublk_queue_is_done(q))
- return -ENODEV;
- ret = io_uring_submit_and_wait(&q->ring, 1);
- reapped = ublk_reap_events_uring(&q->ring);
- ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
- ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
- (q->state & UBLKSRV_QUEUE_IDLE));
- return reapped;
- }
- static void *ublk_io_handler_fn(void *data)
- {
- struct ublk_queue *q = data;
- int dev_id = q->dev->dev_info.dev_id;
- int ret;
- ret = ublk_queue_init(q);
- if (ret) {
- ublk_err("ublk dev %d queue %d init queue failed\n",
- dev_id, q->q_id);
- return NULL;
- }
- ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
- q->tid, dev_id, q->q_id);
- /* submit all io commands to ublk driver */
- ublk_submit_fetch_commands(q);
- do {
- if (ublk_process_io(q) < 0)
- break;
- } while (1);
- ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
- ublk_queue_deinit(q);
- return NULL;
- }
- static void ublk_set_parameters(struct ublk_dev *dev)
- {
- int ret;
- ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
- if (ret)
- ublk_err("dev %d set basic parameter failed %d\n",
- dev->dev_info.dev_id, ret);
- }
- static int ublk_start_daemon(struct ublk_dev *dev)
- {
- int ret, i;
- void *thread_ret;
- const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
- if (daemon(1, 1) < 0)
- return -errno;
- ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
- ret = ublk_dev_prep(dev);
- if (ret)
- return ret;
- for (i = 0; i < dinfo->nr_hw_queues; i++) {
- dev->q[i].dev = dev;
- dev->q[i].q_id = i;
- pthread_create(&dev->q[i].thread, NULL,
- ublk_io_handler_fn,
- &dev->q[i]);
- }
- /* everything is fine now, start us */
- ublk_set_parameters(dev);
- ret = ublk_ctrl_start_dev(dev, getpid());
- if (ret < 0) {
- ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
- goto fail;
- }
- /* wait until we are terminated */
- for (i = 0; i < dinfo->nr_hw_queues; i++)
- pthread_join(dev->q[i].thread, &thread_ret);
- fail:
- ublk_dev_unprep(dev);
- ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
- return ret;
- }
- static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
- {
- #define EV_SIZE (sizeof(struct inotify_event))
- #define EV_BUF_LEN (128 * (EV_SIZE + 16))
- struct pollfd pfd;
- int fd, wd;
- int ret = -EINVAL;
- fd = inotify_init();
- if (fd < 0) {
- ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
- return fd;
- }
- wd = inotify_add_watch(fd, "/dev", evt_mask);
- if (wd == -1) {
- ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
- goto fail;
- }
- pfd.fd = fd;
- pfd.events = POLL_IN;
- while (1) {
- int i = 0;
- char buffer[EV_BUF_LEN];
- ret = poll(&pfd, 1, 1000 * timeout);
- if (ret == -1) {
- ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
- goto rm_watch;
- } else if (ret == 0) {
- ublk_err("%s: poll inotify timeout\n", __func__);
- ret = -ENOENT;
- goto rm_watch;
- }
- ret = read(fd, buffer, EV_BUF_LEN);
- if (ret < 0) {
- ublk_err("%s: read inotify fd failed\n", __func__);
- goto rm_watch;
- }
- while (i < ret) {
- struct inotify_event *event = (struct inotify_event *)&buffer[i];
- ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
- __func__, event->mask, event->name);
- if (event->mask & evt_mask) {
- if (!strcmp(event->name, dev_name)) {
- ret = 0;
- goto rm_watch;
- }
- }
- i += EV_SIZE + event->len;
- }
- }
- rm_watch:
- inotify_rm_watch(fd, wd);
- fail:
- close(fd);
- return ret;
- }
- static int ublk_stop_io_daemon(const struct ublk_dev *dev)
- {
- int daemon_pid = dev->dev_info.ublksrv_pid;
- int dev_id = dev->dev_info.dev_id;
- char ublkc[64];
- int ret;
- /*
- * Wait until ublk char device is closed, when our daemon is shutdown
- */
- snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
- ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
- waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
- ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
- __func__, daemon_pid, dev_id, ret);
- return ret;
- }
- static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
- unsigned depth)
- {
- const struct ublk_tgt_ops *ops;
- struct ublksrv_ctrl_dev_info *info;
- struct ublk_dev *dev;
- int dev_id = *exp_id;
- char ublkb[64];
- int ret;
- ops = ublk_find_tgt(tgt_type);
- if (!ops) {
- ublk_err("%s: no such tgt type, type %s\n",
- __func__, tgt_type);
- return -ENODEV;
- }
- if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
- ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
- __func__, nr_queues, depth);
- return -EINVAL;
- }
- dev = ublk_ctrl_init();
- if (!dev) {
- ublk_err("%s: can't alloc dev id %d, type %s\n",
- __func__, dev_id, tgt_type);
- return -ENOMEM;
- }
- info = &dev->dev_info;
- info->dev_id = dev_id;
- info->nr_hw_queues = nr_queues;
- info->queue_depth = depth;
- dev->tgt.ops = ops;
- ret = ublk_ctrl_add_dev(dev);
- if (ret < 0) {
- ublk_err("%s: can't add dev id %d, type %s ret %d\n",
- __func__, dev_id, tgt_type, ret);
- goto fail;
- }
- switch (fork()) {
- case -1:
- goto fail;
- case 0:
- ublk_start_daemon(dev);
- return 0;
- }
- /*
- * Wait until ublk disk is added, when our daemon is started
- * successfully
- */
- snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
- ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
- if (ret < 0) {
- ublk_err("%s: can't start daemon id %d, type %s\n",
- __func__, dev_id, tgt_type);
- ublk_ctrl_del_dev(dev);
- } else {
- *exp_id = dev->dev_info.dev_id;
- }
- fail:
- ublk_ctrl_deinit(dev);
- return ret;
- }
- static int cmd_dev_del_by_kill(int number)
- {
- struct ublk_dev *dev;
- int ret;
- dev = ublk_ctrl_init();
- dev->dev_info.dev_id = number;
- ret = ublk_ctrl_get_info(dev);
- if (ret < 0)
- goto fail;
- /* simulate one ublk daemon panic */
- kill(dev->dev_info.ublksrv_pid, 9);
- ret = ublk_stop_io_daemon(dev);
- if (ret < 0)
- ublk_err("%s: can't stop daemon id %d\n", __func__, number);
- ublk_ctrl_del_dev(dev);
- fail:
- if (ret >= 0)
- ret = ublk_ctrl_get_info(dev);
- ublk_ctrl_deinit(dev);
- return (ret != 0) ? 0 : -EIO;
- }
- /****************** part 2: target implementation ********************/
- static int ublk_null_tgt_init(struct ublk_dev *dev)
- {
- const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
- unsigned long dev_size = 250UL << 30;
- dev->tgt.dev_size = dev_size;
- dev->tgt.params = (struct ublk_params) {
- .types = UBLK_PARAM_TYPE_BASIC,
- .basic = {
- .logical_bs_shift = 9,
- .physical_bs_shift = 12,
- .io_opt_shift = 12,
- .io_min_shift = 9,
- .max_sectors = info->max_io_buf_bytes >> 9,
- .dev_sectors = dev_size >> 9,
- },
- };
- return 0;
- }
- static int ublk_null_queue_io(struct ublk_queue *q, int tag)
- {
- const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- ublk_complete_io(q, tag, iod->nr_sectors << 9);
- return 0;
- }
- static const struct ublk_tgt_ops tgt_ops_list[] = {
- {
- .name = "null",
- .init_tgt = ublk_null_tgt_init,
- .queue_io = ublk_null_queue_io,
- },
- };
- static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
- {
- const struct ublk_tgt_ops *ops;
- int i;
- if (name == NULL)
- return NULL;
- for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
- if (strcmp(tgt_ops_list[i].name, name) == 0)
- return &tgt_ops_list[i];
- return NULL;
- }
- /****************** part 3: IO test over ublk disk ********************/
- #include "helpers.h"
- #include "liburing.h"
- #define BS 4096
- #define BUFFERS 128
- struct io_ctx {
- int dev_id;
- int write;
- int seq;
- /* output */
- int res;
- pthread_t handle;
- };
- static int __test_io(struct io_uring *ring, int fd, int write,
- int seq, struct iovec *vecs, int exp_len, off_t start)
- {
- struct io_uring_sqe *sqe;
- struct io_uring_cqe *cqe;
- int i, ret;
- off_t offset;
- offset = start;
- for (i = 0; i < BUFFERS; i++) {
- sqe = io_uring_get_sqe(ring);
- if (!sqe) {
- fprintf(stderr, "sqe get failed\n");
- goto err;
- }
- if (!seq)
- offset = start + BS * (rand() % BUFFERS);
- if (write) {
- io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
- vecs[i].iov_len,
- offset, i);
- } else {
- io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
- vecs[i].iov_len,
- offset, i);
- }
- sqe->user_data = i;
- if (seq)
- offset += BS;
- }
- ret = io_uring_submit(ring);
- if (ret != BUFFERS) {
- fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
- goto err;
- }
- for (i = 0; i < BUFFERS; i++) {
- ret = io_uring_wait_cqe(ring, &cqe);
- if (ret) {
- fprintf(stderr, "wait_cqe=%d\n", ret);
- goto err;
- }
- if (exp_len == -1) {
- int iov_len = vecs[cqe->user_data].iov_len;
- if (cqe->res != iov_len) {
- fprintf(stderr, "cqe res %d, wanted %d\n",
- cqe->res, iov_len);
- goto err;
- }
- } else if (cqe->res != exp_len) {
- fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
- goto err;
- }
- io_uring_cqe_seen(ring, cqe);
- }
- return 0;
- err:
- return 1;
- }
- /* Run IO over ublk block device */
- static int test_io(struct io_ctx *ctx)
- {
- struct io_uring ring;
- int ret, ring_flags = 0;
- char buf[256];
- int fd = -1;
- off_t offset = 0;
- unsigned long long bytes;
- int open_flags = O_DIRECT;
- struct iovec *vecs = t_create_buffers(BUFFERS, BS);
- ret = t_create_ring(BUFFERS, &ring, ring_flags);
- if (ret == T_SETUP_SKIP)
- return 0;
- if (ret != T_SETUP_OK) {
- fprintf(stderr, "ring create failed: %d\n", ret);
- return 1;
- }
- snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
- if (ctx->write)
- open_flags |= O_WRONLY;
- else
- open_flags |= O_RDONLY;
- fd = open(buf, open_flags);
- if (fd < 0) {
- if (errno == EINVAL)
- return 0;
- return 1;
- }
- if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
- return 1;
- ret = t_register_buffers(&ring, vecs, BUFFERS);
- if (ret == T_SETUP_SKIP)
- return 0;
- if (ret != T_SETUP_OK) {
- fprintf(stderr, "buffer reg failed: %d\n", ret);
- return 1;
- }
- for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
- ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
- offset);
- if (ret != T_SETUP_OK) {
- fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
- ctx->dev_id, (unsigned long) offset, ret);
- break;
- }
- }
- close(fd);
- io_uring_unregister_buffers(&ring);
- io_uring_queue_exit(&ring);
- return ret;
- }
- static void *test_io_fn(void *data)
- {
- struct io_ctx *ctx = data;
- ctx->res = test_io(ctx);
- return data;
- }
- static void ignore_stderr(void)
- {
- int devnull = open("/dev/null", O_WRONLY);
- if (devnull >= 0) {
- dup2(devnull, fileno(stderr));
- close(devnull);
- }
- }
- static int test_io_worker(int dev_id)
- {
- const int nr_jobs = 4;
- struct io_ctx ctx[nr_jobs];
- int i, ret = 0;
- for (i = 0; i < nr_jobs; i++) {
- ctx[i].dev_id = dev_id;
- ctx[i].write = (i & 0x1) ? 0 : 1;
- ctx[i].seq = 1;
- pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
- }
- for (i = 0; i < nr_jobs; i++) {
- pthread_join(ctx[i].handle, NULL);
- if (!ret && ctx[i].res)
- ret = ctx[i].res;
- }
- return ret;
- }
- /*
- * Run IO over created ublk device, meantime delete this ublk device
- *
- * Cover cancellable uring_cmd
- * */
- static int test_del_ublk_with_io(void)
- {
- const unsigned wait_ms = 200;
- char *tgt_type = "null";
- int dev_id = -1;
- int ret, pid;
- ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
- if (ret != T_SETUP_OK) {
- fprintf(stderr, "buffer reg failed: %d\n", ret);
- return T_EXIT_FAIL;
- }
- switch ((pid = fork())) {
- case -1:
- fprintf(stderr, "fork failed\n");
- return T_EXIT_FAIL;
- case 0:
- /* io error is expected since the parent is killing ublk */
- ignore_stderr();
- test_io_worker(dev_id);
- return 0;
- default:
- /*
- * Wait a little while until ublk IO pipeline is warm up,
- * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
- *
- * cancellable uring_cmd code path can be covered in this way.
- */
- usleep(wait_ms * 1000);
- ret = cmd_dev_del_by_kill(dev_id);
- waitpid(pid, NULL, 0);
- return ret;
- }
- }
- int main(int argc, char *argv[])
- {
- const int nr_loop = 4;
- struct ublk_dev *dev;
- __u64 features;
- int ret, i;
- if (argc > 1)
- return T_EXIT_SKIP;
- dev = ublk_ctrl_init();
- /* ublk isn't supported or the module isn't loaded */
- if (!dev)
- return T_EXIT_SKIP;
- /* kernel doesn't support get_features */
- ret = ublk_ctrl_get_features(dev, &features);
- if (ret < 0)
- return T_EXIT_SKIP;
- if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
- return T_EXIT_SKIP;
- for (i = 0; i < nr_loop; i++) {
- if (test_del_ublk_with_io())
- return T_EXIT_FAIL;
- }
- ublk_ctrl_deinit(dev);
- return T_EXIT_PASS;
- }
- #else
- int main(int argc, char *argv[])
- {
- return T_EXIT_SKIP;
- }
- #endif
|