uring_cmd_ublk.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253
  1. #include "../config-host.h"
  2. /* SPDX-License-Identifier: MIT */
  3. /*
  4. * Description: uring_cmd based ublk
  5. *
  6. * Covers cancellable uring_cmd feature.
  7. */
  8. #include <unistd.h>
  9. #include <stdlib.h>
  10. #include <assert.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include <string.h>
  14. #include <pthread.h>
  15. #include <limits.h>
  16. #include <poll.h>
  17. #include <sys/syscall.h>
  18. #include <sys/mman.h>
  19. #include <sys/ioctl.h>
  20. #include <sys/inotify.h>
  21. #include <sys/wait.h>
  22. #include "liburing.h"
  23. #include "helpers.h"
  24. #ifdef CONFIG_HAVE_UBLK_HEADER
  25. #include <linux/ublk_cmd.h>
  26. /****************** part 1: libublk ********************/
  27. #define CTRL_DEV "/dev/ublk-control"
  28. #define UBLKC_DEV "/dev/ublkc"
  29. #define UBLKB_DEV "/dev/ublkb"
  30. #define UBLK_CTRL_RING_DEPTH 32
  31. /* queue idle timeout */
  32. #define UBLKSRV_IO_IDLE_SECS 20
  33. #define UBLK_IO_MAX_BYTES 65536
  34. #define UBLK_MAX_QUEUES 4
  35. #define UBLK_QUEUE_DEPTH 128
  36. #define UBLK_DBG_DEV (1U << 0)
  37. #define UBLK_DBG_QUEUE (1U << 1)
  38. #define UBLK_DBG_IO_CMD (1U << 2)
  39. #define UBLK_DBG_IO (1U << 3)
  40. #define UBLK_DBG_CTRL_CMD (1U << 4)
  41. #define UBLK_LOG (1U << 5)
  42. struct ublk_dev;
  43. struct ublk_queue;
  44. struct ublk_ctrl_cmd_data {
  45. __u32 cmd_op;
  46. #define CTRL_CMD_HAS_DATA 1
  47. #define CTRL_CMD_HAS_BUF 2
  48. __u32 flags;
  49. __u64 data[2];
  50. __u64 addr;
  51. __u32 len;
  52. };
  53. struct ublk_io {
  54. char *buf_addr;
  55. #define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
  56. #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
  57. #define UBLKSRV_IO_FREE (1UL << 2)
  58. unsigned int flags;
  59. unsigned int result;
  60. };
  61. struct ublk_tgt_ops {
  62. const char *name;
  63. int (*init_tgt)(struct ublk_dev *);
  64. void (*deinit_tgt)(struct ublk_dev *);
  65. int (*queue_io)(struct ublk_queue *, int tag);
  66. void (*tgt_io_done)(struct ublk_queue *,
  67. int tag, const struct io_uring_cqe *);
  68. };
  69. struct ublk_tgt {
  70. unsigned long dev_size;
  71. const struct ublk_tgt_ops *ops;
  72. struct ublk_params params;
  73. };
  74. struct ublk_queue {
  75. int q_id;
  76. int q_depth;
  77. unsigned int cmd_inflight;
  78. unsigned int io_inflight;
  79. struct ublk_dev *dev;
  80. const struct ublk_tgt_ops *tgt_ops;
  81. char *io_cmd_buf;
  82. struct io_uring ring;
  83. struct ublk_io ios[UBLK_QUEUE_DEPTH];
  84. #define UBLKSRV_QUEUE_STOPPING (1U << 0)
  85. #define UBLKSRV_QUEUE_IDLE (1U << 1)
  86. unsigned state;
  87. pid_t tid;
  88. pthread_t thread;
  89. };
  90. struct ublk_dev {
  91. struct ublk_tgt tgt;
  92. struct ublksrv_ctrl_dev_info dev_info;
  93. struct ublk_queue q[UBLK_MAX_QUEUES];
  94. int fds[2]; /* fds[0] points to /dev/ublkcN */
  95. int nr_fds;
  96. int ctrl_fd;
  97. struct io_uring ring;
  98. };
  99. #ifndef offsetof
  100. #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
  101. #endif
  102. #ifndef container_of
  103. #define container_of(ptr, type, member) ({ \
  104. unsigned long __mptr = (unsigned long)(ptr); \
  105. ((type *)(__mptr - offsetof(type, member))); })
  106. #endif
  107. #define round_up(val, rnd) \
  108. (((val) + ((rnd) - 1)) & ~((rnd) - 1))
  109. static unsigned int ublk_dbg_mask = 0;
  110. static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
  111. static inline int is_target_io(__u64 user_data)
  112. {
  113. return (user_data & (1ULL << 63)) != 0;
  114. }
  115. static inline __u64 build_user_data(unsigned tag, unsigned op,
  116. unsigned tgt_data, unsigned is_target_io)
  117. {
  118. assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
  119. return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
  120. }
  121. static inline unsigned int user_data_to_tag(__u64 user_data)
  122. {
  123. return user_data & 0xffff;
  124. }
  125. static inline unsigned int user_data_to_op(__u64 user_data)
  126. {
  127. return (user_data >> 16) & 0xff;
  128. }
  129. static void ublk_err(const char *fmt, ...)
  130. {
  131. va_list ap;
  132. va_start(ap, fmt);
  133. vfprintf(stderr, fmt, ap);
  134. }
  135. static void ublk_dbg(int level, const char *fmt, ...)
  136. {
  137. if (level & ublk_dbg_mask) {
  138. va_list ap;
  139. va_start(ap, fmt);
  140. vfprintf(stdout, fmt, ap);
  141. }
  142. }
  143. static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
  144. {
  145. return (void *)&sqe->cmd;
  146. }
  147. static inline void ublk_mark_io_done(struct ublk_io *io, int res)
  148. {
  149. io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
  150. io->result = res;
  151. }
  152. static inline const struct ublksrv_io_desc *ublk_get_iod(
  153. const struct ublk_queue *q, int tag)
  154. {
  155. return (struct ublksrv_io_desc *)
  156. &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
  157. }
  158. static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
  159. __u32 cmd_op)
  160. {
  161. __u32 *addr = (__u32 *)&sqe->off;
  162. addr[0] = cmd_op;
  163. addr[1] = 0;
  164. }
  165. static inline int ublk_setup_ring(struct io_uring *r, int depth,
  166. int cq_depth, unsigned flags)
  167. {
  168. struct io_uring_params p;
  169. memset(&p, 0, sizeof(p));
  170. p.flags = flags | IORING_SETUP_CQSIZE;
  171. p.cq_entries = cq_depth;
  172. return io_uring_queue_init_params(depth, r, &p);
  173. }
  174. static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
  175. struct io_uring_sqe *sqe,
  176. struct ublk_ctrl_cmd_data *data)
  177. {
  178. struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
  179. struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
  180. sqe->fd = dev->ctrl_fd;
  181. sqe->opcode = IORING_OP_URING_CMD;
  182. sqe->ioprio = 0;
  183. if (data->flags & CTRL_CMD_HAS_BUF) {
  184. cmd->addr = data->addr;
  185. cmd->len = data->len;
  186. }
  187. if (data->flags & CTRL_CMD_HAS_DATA)
  188. cmd->data[0] = data->data[0];
  189. cmd->dev_id = info->dev_id;
  190. cmd->queue_id = -1;
  191. ublk_set_sqe_cmd_op(sqe, data->cmd_op);
  192. io_uring_sqe_set_data(sqe, cmd);
  193. }
  194. static int __ublk_ctrl_cmd(struct ublk_dev *dev,
  195. struct ublk_ctrl_cmd_data *data)
  196. {
  197. struct io_uring_sqe *sqe;
  198. struct io_uring_cqe *cqe;
  199. int ret = -EINVAL;
  200. sqe = io_uring_get_sqe(&dev->ring);
  201. if (!sqe) {
  202. ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
  203. return ret;
  204. }
  205. ublk_ctrl_init_cmd(dev, sqe, data);
  206. ret = io_uring_submit(&dev->ring);
  207. if (ret < 0) {
  208. ublk_err("uring submit ret %d\n", ret);
  209. return ret;
  210. }
  211. ret = io_uring_wait_cqe(&dev->ring, &cqe);
  212. if (ret < 0) {
  213. ublk_err("wait cqe: %s\n", strerror(-ret));
  214. return ret;
  215. }
  216. io_uring_cqe_seen(&dev->ring, cqe);
  217. return cqe->res;
  218. }
  219. static int ublk_ctrl_start_dev(struct ublk_dev *dev,
  220. int daemon_pid)
  221. {
  222. struct ublk_ctrl_cmd_data data = {
  223. .cmd_op = UBLK_U_CMD_START_DEV,
  224. .flags = CTRL_CMD_HAS_DATA,
  225. };
  226. dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
  227. return __ublk_ctrl_cmd(dev, &data);
  228. }
  229. static int ublk_ctrl_add_dev(struct ublk_dev *dev)
  230. {
  231. struct ublk_ctrl_cmd_data data = {
  232. .cmd_op = UBLK_U_CMD_ADD_DEV,
  233. .flags = CTRL_CMD_HAS_BUF,
  234. .addr = (__u64) (uintptr_t) &dev->dev_info,
  235. .len = sizeof(struct ublksrv_ctrl_dev_info),
  236. };
  237. return __ublk_ctrl_cmd(dev, &data);
  238. }
  239. static int ublk_ctrl_del_dev(struct ublk_dev *dev)
  240. {
  241. struct ublk_ctrl_cmd_data data = {
  242. .cmd_op = UBLK_U_CMD_DEL_DEV,
  243. .flags = 0,
  244. };
  245. return __ublk_ctrl_cmd(dev, &data);
  246. }
  247. static int ublk_ctrl_get_info(struct ublk_dev *dev)
  248. {
  249. struct ublk_ctrl_cmd_data data = {
  250. .cmd_op = UBLK_U_CMD_GET_DEV_INFO,
  251. .flags = CTRL_CMD_HAS_BUF,
  252. .addr = (__u64) (uintptr_t) &dev->dev_info,
  253. .len = sizeof(struct ublksrv_ctrl_dev_info),
  254. };
  255. return __ublk_ctrl_cmd(dev, &data);
  256. }
  257. static int ublk_ctrl_set_params(struct ublk_dev *dev,
  258. struct ublk_params *params)
  259. {
  260. struct ublk_ctrl_cmd_data data = {
  261. .cmd_op = UBLK_U_CMD_SET_PARAMS,
  262. .flags = CTRL_CMD_HAS_BUF,
  263. .addr = (__u64) (uintptr_t) params,
  264. .len = sizeof(*params),
  265. };
  266. params->len = sizeof(*params);
  267. return __ublk_ctrl_cmd(dev, &data);
  268. }
  269. static int ublk_ctrl_get_features(struct ublk_dev *dev,
  270. __u64 *features)
  271. {
  272. struct ublk_ctrl_cmd_data data = {
  273. .cmd_op = UBLK_U_CMD_GET_FEATURES,
  274. .flags = CTRL_CMD_HAS_BUF,
  275. .addr = (__u64) (uintptr_t) features,
  276. .len = sizeof(*features),
  277. };
  278. return __ublk_ctrl_cmd(dev, &data);
  279. }
  280. static void ublk_ctrl_deinit(struct ublk_dev *dev)
  281. {
  282. close(dev->ctrl_fd);
  283. free(dev);
  284. }
  285. static struct ublk_dev *ublk_ctrl_init(void)
  286. {
  287. struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
  288. struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
  289. int ret;
  290. dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
  291. if (dev->ctrl_fd < 0) {
  292. free(dev);
  293. return NULL;
  294. }
  295. info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
  296. ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
  297. UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
  298. if (ret < 0) {
  299. ublk_err("queue_init: %s\n", strerror(-ret));
  300. free(dev);
  301. return NULL;
  302. }
  303. dev->nr_fds = 1;
  304. return dev;
  305. }
  306. static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
  307. {
  308. int size = q->q_depth * sizeof(struct ublksrv_io_desc);
  309. unsigned int page_sz = getpagesize();
  310. return round_up(size, page_sz);
  311. }
  312. static void ublk_queue_deinit(struct ublk_queue *q)
  313. {
  314. int i;
  315. int nr_ios = q->q_depth;
  316. io_uring_unregister_ring_fd(&q->ring);
  317. if (q->ring.ring_fd > 0) {
  318. io_uring_unregister_files(&q->ring);
  319. close(q->ring.ring_fd);
  320. q->ring.ring_fd = -1;
  321. }
  322. if (q->io_cmd_buf)
  323. munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
  324. for (i = 0; i < nr_ios; i++)
  325. free(q->ios[i].buf_addr);
  326. }
  327. static int ublk_queue_init(struct ublk_queue *q)
  328. {
  329. struct ublk_dev *dev = q->dev;
  330. int depth = dev->dev_info.queue_depth;
  331. int i, ret = -1;
  332. int cmd_buf_size, io_buf_size;
  333. unsigned long off;
  334. int ring_depth = depth, cq_depth = depth;
  335. q->tgt_ops = dev->tgt.ops;
  336. q->state = 0;
  337. q->q_depth = depth;
  338. q->cmd_inflight = 0;
  339. q->tid = gettid();
  340. cmd_buf_size = ublk_queue_cmd_buf_sz(q);
  341. off = UBLKSRV_CMD_BUF_OFFSET +
  342. q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
  343. q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
  344. MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
  345. if (q->io_cmd_buf == MAP_FAILED) {
  346. ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
  347. q->dev->dev_info.dev_id, q->q_id);
  348. goto fail;
  349. }
  350. io_buf_size = dev->dev_info.max_io_buf_bytes;
  351. for (i = 0; i < q->q_depth; i++) {
  352. q->ios[i].buf_addr = NULL;
  353. if (posix_memalign((void **)&q->ios[i].buf_addr,
  354. getpagesize(), io_buf_size)) {
  355. ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
  356. dev->dev_info.dev_id, q->q_id, i);
  357. goto fail;
  358. }
  359. q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
  360. }
  361. ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
  362. IORING_SETUP_COOP_TASKRUN);
  363. if (ret < 0) {
  364. ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
  365. q->dev->dev_info.dev_id, q->q_id, ret);
  366. goto fail;
  367. }
  368. io_uring_register_ring_fd(&q->ring);
  369. ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
  370. if (ret) {
  371. ublk_err("ublk dev %d queue %d register files failed %d\n",
  372. q->dev->dev_info.dev_id, q->q_id, ret);
  373. goto fail;
  374. }
  375. return 0;
  376. fail:
  377. ublk_queue_deinit(q);
  378. ublk_err("ublk dev %d queue %d failed\n",
  379. dev->dev_info.dev_id, q->q_id);
  380. return -ENOMEM;
  381. }
  382. static int ublk_dev_prep(struct ublk_dev *dev)
  383. {
  384. int dev_id = dev->dev_info.dev_id;
  385. char buf[64];
  386. int ret = 0;
  387. snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
  388. dev->fds[0] = open(buf, O_RDWR);
  389. if (dev->fds[0] < 0) {
  390. ret = -EBADF;
  391. ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
  392. goto fail;
  393. }
  394. if (dev->tgt.ops->init_tgt)
  395. ret = dev->tgt.ops->init_tgt(dev);
  396. return ret;
  397. fail:
  398. close(dev->fds[0]);
  399. return ret;
  400. }
  401. static void ublk_dev_unprep(struct ublk_dev *dev)
  402. {
  403. if (dev->tgt.ops->deinit_tgt)
  404. dev->tgt.ops->deinit_tgt(dev);
  405. close(dev->fds[0]);
  406. }
  407. static int ublk_queue_io_cmd(struct ublk_queue *q,
  408. struct ublk_io *io, unsigned tag)
  409. {
  410. struct ublksrv_io_cmd *cmd;
  411. struct io_uring_sqe *sqe;
  412. unsigned int cmd_op = 0;
  413. __u64 user_data;
  414. /* only freed io can be issued */
  415. if (!(io->flags & UBLKSRV_IO_FREE))
  416. return 0;
  417. /* we issue because we need either fetching or committing */
  418. if (!(io->flags &
  419. (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
  420. return 0;
  421. if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
  422. cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
  423. else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
  424. cmd_op = UBLK_U_IO_FETCH_REQ;
  425. sqe = io_uring_get_sqe(&q->ring);
  426. if (!sqe) {
  427. ublk_err("%s: run out of sqe %d, tag %d\n",
  428. __func__, q->q_id, tag);
  429. return -1;
  430. }
  431. cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
  432. if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
  433. cmd->result = io->result;
  434. /* These fields should be written once, never change */
  435. ublk_set_sqe_cmd_op(sqe, cmd_op);
  436. sqe->fd = 0; /* dev->fds[0] */
  437. sqe->opcode = IORING_OP_URING_CMD;
  438. sqe->flags = IOSQE_FIXED_FILE;
  439. sqe->rw_flags = 0;
  440. cmd->tag = tag;
  441. cmd->addr = (__u64) (uintptr_t) io->buf_addr;
  442. cmd->q_id = q->q_id;
  443. user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
  444. io_uring_sqe_set_data64(sqe, user_data);
  445. io->flags = 0;
  446. q->cmd_inflight += 1;
  447. ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
  448. __func__, q->q_id, tag, cmd_op,
  449. io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
  450. return 1;
  451. }
  452. static int ublk_complete_io(struct ublk_queue *q,
  453. unsigned tag, int res)
  454. {
  455. struct ublk_io *io = &q->ios[tag];
  456. ublk_mark_io_done(io, res);
  457. return ublk_queue_io_cmd(q, io, tag);
  458. }
  459. static void ublk_submit_fetch_commands(struct ublk_queue *q)
  460. {
  461. int i = 0;
  462. for (i = 0; i < q->q_depth; i++)
  463. ublk_queue_io_cmd(q, &q->ios[i], i);
  464. }
  465. static int ublk_queue_is_idle(struct ublk_queue *q)
  466. {
  467. return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
  468. }
  469. static int ublk_queue_is_done(struct ublk_queue *q)
  470. {
  471. return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
  472. }
  473. static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
  474. struct io_uring_cqe *cqe)
  475. {
  476. unsigned tag = user_data_to_tag(cqe->user_data);
  477. if (cqe->res < 0 && cqe->res != -EAGAIN)
  478. ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
  479. __func__, cqe->res, q->q_id,
  480. user_data_to_tag(cqe->user_data),
  481. user_data_to_op(cqe->user_data));
  482. if (q->tgt_ops->tgt_io_done)
  483. q->tgt_ops->tgt_io_done(q, tag, cqe);
  484. }
  485. static void ublk_handle_cqe(struct io_uring *r,
  486. struct io_uring_cqe *cqe, void *data)
  487. {
  488. struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
  489. unsigned tag = user_data_to_tag(cqe->user_data);
  490. unsigned cmd_op = user_data_to_op(cqe->user_data);
  491. int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
  492. !(q->state & UBLKSRV_QUEUE_STOPPING);
  493. struct ublk_io *io;
  494. ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
  495. __func__, cqe->res, q->q_id, tag, cmd_op,
  496. is_target_io(cqe->user_data),
  497. (q->state & UBLKSRV_QUEUE_STOPPING));
  498. /* Don't retrieve io in case of target io */
  499. if (is_target_io(cqe->user_data)) {
  500. ublksrv_handle_tgt_cqe(q, cqe);
  501. return;
  502. }
  503. io = &q->ios[tag];
  504. q->cmd_inflight--;
  505. if (!fetch) {
  506. q->state |= UBLKSRV_QUEUE_STOPPING;
  507. io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
  508. }
  509. if (cqe->res == UBLK_IO_RES_OK) {
  510. assert(tag < q->q_depth);
  511. q->tgt_ops->queue_io(q, tag);
  512. } else {
  513. /*
  514. * COMMIT_REQ will be completed immediately since no fetching
  515. * piggyback is required.
  516. *
  517. * Marking IO_FREE only, then this io won't be issued since
  518. * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
  519. *
  520. * */
  521. io->flags = UBLKSRV_IO_FREE;
  522. }
  523. }
  524. static int ublk_reap_events_uring(struct io_uring *r)
  525. {
  526. struct io_uring_cqe *cqe;
  527. unsigned head;
  528. int count = 0;
  529. io_uring_for_each_cqe(r, head, cqe) {
  530. ublk_handle_cqe(r, cqe, NULL);
  531. count += 1;
  532. }
  533. io_uring_cq_advance(r, count);
  534. return count;
  535. }
  536. static int ublk_process_io(struct ublk_queue *q)
  537. {
  538. int ret, reapped;
  539. ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
  540. q->dev->dev_info.dev_id,
  541. q->q_id, io_uring_sq_ready(&q->ring),
  542. q->cmd_inflight,
  543. (q->state & UBLKSRV_QUEUE_STOPPING));
  544. if (ublk_queue_is_done(q))
  545. return -ENODEV;
  546. ret = io_uring_submit_and_wait(&q->ring, 1);
  547. reapped = ublk_reap_events_uring(&q->ring);
  548. ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
  549. ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
  550. (q->state & UBLKSRV_QUEUE_IDLE));
  551. return reapped;
  552. }
  553. static void *ublk_io_handler_fn(void *data)
  554. {
  555. struct ublk_queue *q = data;
  556. int dev_id = q->dev->dev_info.dev_id;
  557. int ret;
  558. ret = ublk_queue_init(q);
  559. if (ret) {
  560. ublk_err("ublk dev %d queue %d init queue failed\n",
  561. dev_id, q->q_id);
  562. return NULL;
  563. }
  564. ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
  565. q->tid, dev_id, q->q_id);
  566. /* submit all io commands to ublk driver */
  567. ublk_submit_fetch_commands(q);
  568. do {
  569. if (ublk_process_io(q) < 0)
  570. break;
  571. } while (1);
  572. ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
  573. ublk_queue_deinit(q);
  574. return NULL;
  575. }
  576. static void ublk_set_parameters(struct ublk_dev *dev)
  577. {
  578. int ret;
  579. ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
  580. if (ret)
  581. ublk_err("dev %d set basic parameter failed %d\n",
  582. dev->dev_info.dev_id, ret);
  583. }
  584. static int ublk_start_daemon(struct ublk_dev *dev)
  585. {
  586. int ret, i;
  587. void *thread_ret;
  588. const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
  589. if (daemon(1, 1) < 0)
  590. return -errno;
  591. ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
  592. ret = ublk_dev_prep(dev);
  593. if (ret)
  594. return ret;
  595. for (i = 0; i < dinfo->nr_hw_queues; i++) {
  596. dev->q[i].dev = dev;
  597. dev->q[i].q_id = i;
  598. pthread_create(&dev->q[i].thread, NULL,
  599. ublk_io_handler_fn,
  600. &dev->q[i]);
  601. }
  602. /* everything is fine now, start us */
  603. ublk_set_parameters(dev);
  604. ret = ublk_ctrl_start_dev(dev, getpid());
  605. if (ret < 0) {
  606. ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
  607. goto fail;
  608. }
  609. /* wait until we are terminated */
  610. for (i = 0; i < dinfo->nr_hw_queues; i++)
  611. pthread_join(dev->q[i].thread, &thread_ret);
  612. fail:
  613. ublk_dev_unprep(dev);
  614. ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
  615. return ret;
  616. }
  617. static int wait_ublk_dev(char *dev_name, int evt_mask, unsigned timeout)
  618. {
  619. #define EV_SIZE (sizeof(struct inotify_event))
  620. #define EV_BUF_LEN (128 * (EV_SIZE + 16))
  621. struct pollfd pfd;
  622. int fd, wd;
  623. int ret = -EINVAL;
  624. fd = inotify_init();
  625. if (fd < 0) {
  626. ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
  627. return fd;
  628. }
  629. wd = inotify_add_watch(fd, "/dev", evt_mask);
  630. if (wd == -1) {
  631. ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
  632. goto fail;
  633. }
  634. pfd.fd = fd;
  635. pfd.events = POLL_IN;
  636. while (1) {
  637. int i = 0;
  638. char buffer[EV_BUF_LEN];
  639. ret = poll(&pfd, 1, 1000 * timeout);
  640. if (ret == -1) {
  641. ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
  642. goto rm_watch;
  643. } else if (ret == 0) {
  644. ublk_err("%s: poll inotify timeout\n", __func__);
  645. ret = -ENOENT;
  646. goto rm_watch;
  647. }
  648. ret = read(fd, buffer, EV_BUF_LEN);
  649. if (ret < 0) {
  650. ublk_err("%s: read inotify fd failed\n", __func__);
  651. goto rm_watch;
  652. }
  653. while (i < ret) {
  654. struct inotify_event *event = (struct inotify_event *)&buffer[i];
  655. ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
  656. __func__, event->mask, event->name);
  657. if (event->mask & evt_mask) {
  658. if (!strcmp(event->name, dev_name)) {
  659. ret = 0;
  660. goto rm_watch;
  661. }
  662. }
  663. i += EV_SIZE + event->len;
  664. }
  665. }
  666. rm_watch:
  667. inotify_rm_watch(fd, wd);
  668. fail:
  669. close(fd);
  670. return ret;
  671. }
  672. static int ublk_stop_io_daemon(const struct ublk_dev *dev)
  673. {
  674. int daemon_pid = dev->dev_info.ublksrv_pid;
  675. int dev_id = dev->dev_info.dev_id;
  676. char ublkc[64];
  677. int ret;
  678. /*
  679. * Wait until ublk char device is closed, when our daemon is shutdown
  680. */
  681. snprintf(ublkc, sizeof(ublkc), "%s%d", "ublkc", dev_id);
  682. ret = wait_ublk_dev(ublkc, IN_CLOSE_WRITE, 10);
  683. waitpid(dev->dev_info.ublksrv_pid, NULL, 0);
  684. ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
  685. __func__, daemon_pid, dev_id, ret);
  686. return ret;
  687. }
  688. static int cmd_dev_add(char *tgt_type, int *exp_id, unsigned nr_queues,
  689. unsigned depth)
  690. {
  691. const struct ublk_tgt_ops *ops;
  692. struct ublksrv_ctrl_dev_info *info;
  693. struct ublk_dev *dev;
  694. int dev_id = *exp_id;
  695. char ublkb[64];
  696. int ret;
  697. ops = ublk_find_tgt(tgt_type);
  698. if (!ops) {
  699. ublk_err("%s: no such tgt type, type %s\n",
  700. __func__, tgt_type);
  701. return -ENODEV;
  702. }
  703. if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
  704. ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
  705. __func__, nr_queues, depth);
  706. return -EINVAL;
  707. }
  708. dev = ublk_ctrl_init();
  709. if (!dev) {
  710. ublk_err("%s: can't alloc dev id %d, type %s\n",
  711. __func__, dev_id, tgt_type);
  712. return -ENOMEM;
  713. }
  714. info = &dev->dev_info;
  715. info->dev_id = dev_id;
  716. info->nr_hw_queues = nr_queues;
  717. info->queue_depth = depth;
  718. dev->tgt.ops = ops;
  719. ret = ublk_ctrl_add_dev(dev);
  720. if (ret < 0) {
  721. ublk_err("%s: can't add dev id %d, type %s ret %d\n",
  722. __func__, dev_id, tgt_type, ret);
  723. goto fail;
  724. }
  725. switch (fork()) {
  726. case -1:
  727. goto fail;
  728. case 0:
  729. ublk_start_daemon(dev);
  730. return 0;
  731. }
  732. /*
  733. * Wait until ublk disk is added, when our daemon is started
  734. * successfully
  735. */
  736. snprintf(ublkb, sizeof(ublkb), "%s%u", "ublkb", dev->dev_info.dev_id);
  737. ret = wait_ublk_dev(ublkb, IN_CREATE, 3);
  738. if (ret < 0) {
  739. ublk_err("%s: can't start daemon id %d, type %s\n",
  740. __func__, dev_id, tgt_type);
  741. ublk_ctrl_del_dev(dev);
  742. } else {
  743. *exp_id = dev->dev_info.dev_id;
  744. }
  745. fail:
  746. ublk_ctrl_deinit(dev);
  747. return ret;
  748. }
  749. static int cmd_dev_del_by_kill(int number)
  750. {
  751. struct ublk_dev *dev;
  752. int ret;
  753. dev = ublk_ctrl_init();
  754. dev->dev_info.dev_id = number;
  755. ret = ublk_ctrl_get_info(dev);
  756. if (ret < 0)
  757. goto fail;
  758. /* simulate one ublk daemon panic */
  759. kill(dev->dev_info.ublksrv_pid, 9);
  760. ret = ublk_stop_io_daemon(dev);
  761. if (ret < 0)
  762. ublk_err("%s: can't stop daemon id %d\n", __func__, number);
  763. ublk_ctrl_del_dev(dev);
  764. fail:
  765. if (ret >= 0)
  766. ret = ublk_ctrl_get_info(dev);
  767. ublk_ctrl_deinit(dev);
  768. return (ret != 0) ? 0 : -EIO;
  769. }
  770. /****************** part 2: target implementation ********************/
  771. static int ublk_null_tgt_init(struct ublk_dev *dev)
  772. {
  773. const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
  774. unsigned long dev_size = 250UL << 30;
  775. dev->tgt.dev_size = dev_size;
  776. dev->tgt.params = (struct ublk_params) {
  777. .types = UBLK_PARAM_TYPE_BASIC,
  778. .basic = {
  779. .logical_bs_shift = 9,
  780. .physical_bs_shift = 12,
  781. .io_opt_shift = 12,
  782. .io_min_shift = 9,
  783. .max_sectors = info->max_io_buf_bytes >> 9,
  784. .dev_sectors = dev_size >> 9,
  785. },
  786. };
  787. return 0;
  788. }
  789. static int ublk_null_queue_io(struct ublk_queue *q, int tag)
  790. {
  791. const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
  792. ublk_complete_io(q, tag, iod->nr_sectors << 9);
  793. return 0;
  794. }
  795. static const struct ublk_tgt_ops tgt_ops_list[] = {
  796. {
  797. .name = "null",
  798. .init_tgt = ublk_null_tgt_init,
  799. .queue_io = ublk_null_queue_io,
  800. },
  801. };
  802. static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
  803. {
  804. const struct ublk_tgt_ops *ops;
  805. int i;
  806. if (name == NULL)
  807. return NULL;
  808. for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
  809. if (strcmp(tgt_ops_list[i].name, name) == 0)
  810. return &tgt_ops_list[i];
  811. return NULL;
  812. }
  813. /****************** part 3: IO test over ublk disk ********************/
  814. #include "helpers.h"
  815. #include "liburing.h"
  816. #define BS 4096
  817. #define BUFFERS 128
  818. struct io_ctx {
  819. int dev_id;
  820. int write;
  821. int seq;
  822. /* output */
  823. int res;
  824. pthread_t handle;
  825. };
  826. static int __test_io(struct io_uring *ring, int fd, int write,
  827. int seq, struct iovec *vecs, int exp_len, off_t start)
  828. {
  829. struct io_uring_sqe *sqe;
  830. struct io_uring_cqe *cqe;
  831. int i, ret;
  832. off_t offset;
  833. offset = start;
  834. for (i = 0; i < BUFFERS; i++) {
  835. sqe = io_uring_get_sqe(ring);
  836. if (!sqe) {
  837. fprintf(stderr, "sqe get failed\n");
  838. goto err;
  839. }
  840. if (!seq)
  841. offset = start + BS * (rand() % BUFFERS);
  842. if (write) {
  843. io_uring_prep_write_fixed(sqe, fd, vecs[i].iov_base,
  844. vecs[i].iov_len,
  845. offset, i);
  846. } else {
  847. io_uring_prep_read_fixed(sqe, fd, vecs[i].iov_base,
  848. vecs[i].iov_len,
  849. offset, i);
  850. }
  851. sqe->user_data = i;
  852. if (seq)
  853. offset += BS;
  854. }
  855. ret = io_uring_submit(ring);
  856. if (ret != BUFFERS) {
  857. fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS);
  858. goto err;
  859. }
  860. for (i = 0; i < BUFFERS; i++) {
  861. ret = io_uring_wait_cqe(ring, &cqe);
  862. if (ret) {
  863. fprintf(stderr, "wait_cqe=%d\n", ret);
  864. goto err;
  865. }
  866. if (exp_len == -1) {
  867. int iov_len = vecs[cqe->user_data].iov_len;
  868. if (cqe->res != iov_len) {
  869. fprintf(stderr, "cqe res %d, wanted %d\n",
  870. cqe->res, iov_len);
  871. goto err;
  872. }
  873. } else if (cqe->res != exp_len) {
  874. fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
  875. goto err;
  876. }
  877. io_uring_cqe_seen(ring, cqe);
  878. }
  879. return 0;
  880. err:
  881. return 1;
  882. }
  883. /* Run IO over ublk block device */
  884. static int test_io(struct io_ctx *ctx)
  885. {
  886. struct io_uring ring;
  887. int ret, ring_flags = 0;
  888. char buf[256];
  889. int fd = -1;
  890. off_t offset = 0;
  891. unsigned long long bytes;
  892. int open_flags = O_DIRECT;
  893. struct iovec *vecs = t_create_buffers(BUFFERS, BS);
  894. ret = t_create_ring(BUFFERS, &ring, ring_flags);
  895. if (ret == T_SETUP_SKIP)
  896. return 0;
  897. if (ret != T_SETUP_OK) {
  898. fprintf(stderr, "ring create failed: %d\n", ret);
  899. return 1;
  900. }
  901. snprintf(buf, sizeof(buf), "%s%d", UBLKB_DEV, ctx->dev_id);
  902. if (ctx->write)
  903. open_flags |= O_WRONLY;
  904. else
  905. open_flags |= O_RDONLY;
  906. fd = open(buf, open_flags);
  907. if (fd < 0) {
  908. if (errno == EINVAL)
  909. return 0;
  910. return 1;
  911. }
  912. if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
  913. return 1;
  914. ret = t_register_buffers(&ring, vecs, BUFFERS);
  915. if (ret == T_SETUP_SKIP)
  916. return 0;
  917. if (ret != T_SETUP_OK) {
  918. fprintf(stderr, "buffer reg failed: %d\n", ret);
  919. return 1;
  920. }
  921. for (offset = 0; offset < bytes; offset += BS * BUFFERS) {
  922. ret = __test_io(&ring, fd, ctx->write, ctx->seq, vecs, BS,
  923. offset);
  924. if (ret != T_SETUP_OK) {
  925. fprintf(stderr, "/dev/ublkb%d read failed: offset %lu ret %d\n",
  926. ctx->dev_id, (unsigned long) offset, ret);
  927. break;
  928. }
  929. }
  930. close(fd);
  931. io_uring_unregister_buffers(&ring);
  932. io_uring_queue_exit(&ring);
  933. return ret;
  934. }
  935. static void *test_io_fn(void *data)
  936. {
  937. struct io_ctx *ctx = data;
  938. ctx->res = test_io(ctx);
  939. return data;
  940. }
  941. static void ignore_stderr(void)
  942. {
  943. int devnull = open("/dev/null", O_WRONLY);
  944. if (devnull >= 0) {
  945. dup2(devnull, fileno(stderr));
  946. close(devnull);
  947. }
  948. }
  949. static int test_io_worker(int dev_id)
  950. {
  951. const int nr_jobs = 4;
  952. struct io_ctx ctx[nr_jobs];
  953. int i, ret = 0;
  954. for (i = 0; i < nr_jobs; i++) {
  955. ctx[i].dev_id = dev_id;
  956. ctx[i].write = (i & 0x1) ? 0 : 1;
  957. ctx[i].seq = 1;
  958. pthread_create(&ctx[i].handle, NULL, test_io_fn, &ctx[i]);
  959. }
  960. for (i = 0; i < nr_jobs; i++) {
  961. pthread_join(ctx[i].handle, NULL);
  962. if (!ret && ctx[i].res)
  963. ret = ctx[i].res;
  964. }
  965. return ret;
  966. }
  967. /*
  968. * Run IO over created ublk device, meantime delete this ublk device
  969. *
  970. * Cover cancellable uring_cmd
  971. * */
  972. static int test_del_ublk_with_io(void)
  973. {
  974. const unsigned wait_ms = 200;
  975. char *tgt_type = "null";
  976. int dev_id = -1;
  977. int ret, pid;
  978. ret = cmd_dev_add(tgt_type, &dev_id, 2, BUFFERS);
  979. if (ret != T_SETUP_OK) {
  980. fprintf(stderr, "buffer reg failed: %d\n", ret);
  981. return T_EXIT_FAIL;
  982. }
  983. switch ((pid = fork())) {
  984. case -1:
  985. fprintf(stderr, "fork failed\n");
  986. return T_EXIT_FAIL;
  987. case 0:
  988. /* io error is expected since the parent is killing ublk */
  989. ignore_stderr();
  990. test_io_worker(dev_id);
  991. return 0;
  992. default:
  993. /*
  994. * Wait a little while until ublk IO pipeline is warm up,
  995. * then try to shutdown ublk device by `kill -9 $ublk_daemon_pid`.
  996. *
  997. * cancellable uring_cmd code path can be covered in this way.
  998. */
  999. usleep(wait_ms * 1000);
  1000. ret = cmd_dev_del_by_kill(dev_id);
  1001. waitpid(pid, NULL, 0);
  1002. return ret;
  1003. }
  1004. }
  1005. int main(int argc, char *argv[])
  1006. {
  1007. const int nr_loop = 4;
  1008. struct ublk_dev *dev;
  1009. __u64 features;
  1010. int ret, i;
  1011. if (argc > 1)
  1012. return T_EXIT_SKIP;
  1013. dev = ublk_ctrl_init();
  1014. /* ublk isn't supported or the module isn't loaded */
  1015. if (!dev)
  1016. return T_EXIT_SKIP;
  1017. /* kernel doesn't support get_features */
  1018. ret = ublk_ctrl_get_features(dev, &features);
  1019. if (ret < 0)
  1020. return T_EXIT_SKIP;
  1021. if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
  1022. return T_EXIT_SKIP;
  1023. for (i = 0; i < nr_loop; i++) {
  1024. if (test_del_ublk_with_io())
  1025. return T_EXIT_FAIL;
  1026. }
  1027. ublk_ctrl_deinit(dev);
  1028. return T_EXIT_PASS;
  1029. }
  1030. #else
  1031. int main(int argc, char *argv[])
  1032. {
  1033. return T_EXIT_SKIP;
  1034. }
  1035. #endif