1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Description: uring_cmd based ublk
4  */
5 
6 #include "kublk.h"
7 
8 #define MAX_NR_TGT_ARG 	64
9 
10 unsigned int ublk_dbg_mask = UBLK_LOG;
11 static const struct ublk_tgt_ops *tgt_ops_list[] = {
12 	&null_tgt_ops,
13 	&loop_tgt_ops,
14 	&stripe_tgt_ops,
15 	&fault_inject_tgt_ops,
16 };
17 
ublk_find_tgt(const char * name)18 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
19 {
20 	int i;
21 
22 	if (name == NULL)
23 		return NULL;
24 
25 	for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
26 		if (strcmp(tgt_ops_list[i]->name, name) == 0)
27 			return tgt_ops_list[i];
28 	return NULL;
29 }
30 
ublk_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)31 static inline int ublk_setup_ring(struct io_uring *r, int depth,
32 		int cq_depth, unsigned flags)
33 {
34 	struct io_uring_params p;
35 
36 	memset(&p, 0, sizeof(p));
37 	p.flags = flags | IORING_SETUP_CQSIZE;
38 	p.cq_entries = cq_depth;
39 
40 	return io_uring_queue_init_params(depth, r, &p);
41 }
42 
ublk_ctrl_init_cmd(struct ublk_dev * dev,struct io_uring_sqe * sqe,struct ublk_ctrl_cmd_data * data)43 static void ublk_ctrl_init_cmd(struct ublk_dev *dev,
44 		struct io_uring_sqe *sqe,
45 		struct ublk_ctrl_cmd_data *data)
46 {
47 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
48 	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
49 
50 	sqe->fd = dev->ctrl_fd;
51 	sqe->opcode = IORING_OP_URING_CMD;
52 	sqe->ioprio = 0;
53 
54 	if (data->flags & CTRL_CMD_HAS_BUF) {
55 		cmd->addr = data->addr;
56 		cmd->len = data->len;
57 	}
58 
59 	if (data->flags & CTRL_CMD_HAS_DATA)
60 		cmd->data[0] = data->data[0];
61 
62 	cmd->dev_id = info->dev_id;
63 	cmd->queue_id = -1;
64 
65 	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
66 
67 	io_uring_sqe_set_data(sqe, cmd);
68 }
69 
__ublk_ctrl_cmd(struct ublk_dev * dev,struct ublk_ctrl_cmd_data * data)70 static int __ublk_ctrl_cmd(struct ublk_dev *dev,
71 		struct ublk_ctrl_cmd_data *data)
72 {
73 	struct io_uring_sqe *sqe;
74 	struct io_uring_cqe *cqe;
75 	int ret = -EINVAL;
76 
77 	sqe = io_uring_get_sqe(&dev->ring);
78 	if (!sqe) {
79 		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
80 		return ret;
81 	}
82 
83 	ublk_ctrl_init_cmd(dev, sqe, data);
84 
85 	ret = io_uring_submit(&dev->ring);
86 	if (ret < 0) {
87 		ublk_err("uring submit ret %d\n", ret);
88 		return ret;
89 	}
90 
91 	ret = io_uring_wait_cqe(&dev->ring, &cqe);
92 	if (ret < 0) {
93 		ublk_err("wait cqe: %s\n", strerror(-ret));
94 		return ret;
95 	}
96 	io_uring_cqe_seen(&dev->ring, cqe);
97 
98 	return cqe->res;
99 }
100 
ublk_ctrl_stop_dev(struct ublk_dev * dev)101 static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
102 {
103 	struct ublk_ctrl_cmd_data data = {
104 		.cmd_op	= UBLK_U_CMD_STOP_DEV,
105 	};
106 
107 	return __ublk_ctrl_cmd(dev, &data);
108 }
109 
ublk_ctrl_start_dev(struct ublk_dev * dev,int daemon_pid)110 static int ublk_ctrl_start_dev(struct ublk_dev *dev,
111 		int daemon_pid)
112 {
113 	struct ublk_ctrl_cmd_data data = {
114 		.cmd_op	= UBLK_U_CMD_START_DEV,
115 		.flags	= CTRL_CMD_HAS_DATA,
116 	};
117 
118 	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
119 
120 	return __ublk_ctrl_cmd(dev, &data);
121 }
122 
ublk_ctrl_start_user_recovery(struct ublk_dev * dev)123 static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
124 {
125 	struct ublk_ctrl_cmd_data data = {
126 		.cmd_op	= UBLK_U_CMD_START_USER_RECOVERY,
127 	};
128 
129 	return __ublk_ctrl_cmd(dev, &data);
130 }
131 
ublk_ctrl_end_user_recovery(struct ublk_dev * dev,int daemon_pid)132 static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
133 {
134 	struct ublk_ctrl_cmd_data data = {
135 		.cmd_op	= UBLK_U_CMD_END_USER_RECOVERY,
136 		.flags	= CTRL_CMD_HAS_DATA,
137 	};
138 
139 	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
140 
141 	return __ublk_ctrl_cmd(dev, &data);
142 }
143 
ublk_ctrl_add_dev(struct ublk_dev * dev)144 static int ublk_ctrl_add_dev(struct ublk_dev *dev)
145 {
146 	struct ublk_ctrl_cmd_data data = {
147 		.cmd_op	= UBLK_U_CMD_ADD_DEV,
148 		.flags	= CTRL_CMD_HAS_BUF,
149 		.addr = (__u64) (uintptr_t) &dev->dev_info,
150 		.len = sizeof(struct ublksrv_ctrl_dev_info),
151 	};
152 
153 	return __ublk_ctrl_cmd(dev, &data);
154 }
155 
ublk_ctrl_del_dev(struct ublk_dev * dev)156 static int ublk_ctrl_del_dev(struct ublk_dev *dev)
157 {
158 	struct ublk_ctrl_cmd_data data = {
159 		.cmd_op = UBLK_U_CMD_DEL_DEV,
160 		.flags = 0,
161 	};
162 
163 	return __ublk_ctrl_cmd(dev, &data);
164 }
165 
ublk_ctrl_get_info(struct ublk_dev * dev)166 static int ublk_ctrl_get_info(struct ublk_dev *dev)
167 {
168 	struct ublk_ctrl_cmd_data data = {
169 		.cmd_op	= UBLK_U_CMD_GET_DEV_INFO,
170 		.flags	= CTRL_CMD_HAS_BUF,
171 		.addr = (__u64) (uintptr_t) &dev->dev_info,
172 		.len = sizeof(struct ublksrv_ctrl_dev_info),
173 	};
174 
175 	return __ublk_ctrl_cmd(dev, &data);
176 }
177 
ublk_ctrl_set_params(struct ublk_dev * dev,struct ublk_params * params)178 static int ublk_ctrl_set_params(struct ublk_dev *dev,
179 		struct ublk_params *params)
180 {
181 	struct ublk_ctrl_cmd_data data = {
182 		.cmd_op	= UBLK_U_CMD_SET_PARAMS,
183 		.flags	= CTRL_CMD_HAS_BUF,
184 		.addr = (__u64) (uintptr_t) params,
185 		.len = sizeof(*params),
186 	};
187 	params->len = sizeof(*params);
188 	return __ublk_ctrl_cmd(dev, &data);
189 }
190 
ublk_ctrl_get_params(struct ublk_dev * dev,struct ublk_params * params)191 static int ublk_ctrl_get_params(struct ublk_dev *dev,
192 		struct ublk_params *params)
193 {
194 	struct ublk_ctrl_cmd_data data = {
195 		.cmd_op	= UBLK_U_CMD_GET_PARAMS,
196 		.flags	= CTRL_CMD_HAS_BUF,
197 		.addr = (__u64)params,
198 		.len = sizeof(*params),
199 	};
200 
201 	params->len = sizeof(*params);
202 
203 	return __ublk_ctrl_cmd(dev, &data);
204 }
205 
ublk_ctrl_get_features(struct ublk_dev * dev,__u64 * features)206 static int ublk_ctrl_get_features(struct ublk_dev *dev,
207 		__u64 *features)
208 {
209 	struct ublk_ctrl_cmd_data data = {
210 		.cmd_op	= UBLK_U_CMD_GET_FEATURES,
211 		.flags	= CTRL_CMD_HAS_BUF,
212 		.addr = (__u64) (uintptr_t) features,
213 		.len = sizeof(*features),
214 	};
215 
216 	return __ublk_ctrl_cmd(dev, &data);
217 }
218 
ublk_dev_state_desc(struct ublk_dev * dev)219 static const char *ublk_dev_state_desc(struct ublk_dev *dev)
220 {
221 	switch (dev->dev_info.state) {
222 	case UBLK_S_DEV_DEAD:
223 		return "DEAD";
224 	case UBLK_S_DEV_LIVE:
225 		return "LIVE";
226 	case UBLK_S_DEV_QUIESCED:
227 		return "QUIESCED";
228 	default:
229 		return "UNKNOWN";
230 	};
231 }
232 
ublk_print_cpu_set(const cpu_set_t * set,char * buf,unsigned len)233 static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
234 {
235 	unsigned done = 0;
236 	int i;
237 
238 	for (i = 0; i < CPU_SETSIZE; i++) {
239 		if (CPU_ISSET(i, set))
240 			done += snprintf(&buf[done], len - done, "%d ", i);
241 	}
242 }
243 
ublk_adjust_affinity(cpu_set_t * set)244 static void ublk_adjust_affinity(cpu_set_t *set)
245 {
246 	int j, updated = 0;
247 
248 	/*
249 	 * Just keep the 1st CPU now.
250 	 *
251 	 * In future, auto affinity selection can be tried.
252 	 */
253 	for (j = 0; j < CPU_SETSIZE; j++) {
254 		if (CPU_ISSET(j, set)) {
255 			if (!updated) {
256 				updated = 1;
257 				continue;
258 			}
259 			CPU_CLR(j, set);
260 		}
261 	}
262 }
263 
264 /* Caller must free the allocated buffer */
ublk_ctrl_get_affinity(struct ublk_dev * ctrl_dev,cpu_set_t ** ptr_buf)265 static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
266 {
267 	struct ublk_ctrl_cmd_data data = {
268 		.cmd_op	= UBLK_U_CMD_GET_QUEUE_AFFINITY,
269 		.flags	= CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
270 	};
271 	cpu_set_t *buf;
272 	int i, ret;
273 
274 	buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
275 	if (!buf)
276 		return -ENOMEM;
277 
278 	for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
279 		data.data[0] = i;
280 		data.len = sizeof(cpu_set_t);
281 		data.addr = (__u64)&buf[i];
282 
283 		ret = __ublk_ctrl_cmd(ctrl_dev, &data);
284 		if (ret < 0) {
285 			free(buf);
286 			return ret;
287 		}
288 		ublk_adjust_affinity(&buf[i]);
289 	}
290 
291 	*ptr_buf = buf;
292 	return 0;
293 }
294 
ublk_ctrl_dump(struct ublk_dev * dev)295 static void ublk_ctrl_dump(struct ublk_dev *dev)
296 {
297 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
298 	struct ublk_params p;
299 	cpu_set_t *affinity;
300 	int ret;
301 
302 	ret = ublk_ctrl_get_params(dev, &p);
303 	if (ret < 0) {
304 		ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
305 		return;
306 	}
307 
308 	ret = ublk_ctrl_get_affinity(dev, &affinity);
309 	if (ret < 0) {
310 		ublk_err("failed to get affinity %m\n");
311 		return;
312 	}
313 
314 	ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
315 			info->dev_id, info->nr_hw_queues, info->queue_depth,
316 			1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
317 	ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
318 			info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
319 			ublk_dev_state_desc(dev));
320 
321 	if (affinity) {
322 		char buf[512];
323 		int i;
324 
325 		for (i = 0; i < info->nr_hw_queues; i++) {
326 			ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
327 			printf("\tqueue %u: tid %d affinity(%s)\n",
328 					i, dev->q[i].tid, buf);
329 		}
330 		free(affinity);
331 	}
332 
333 	fflush(stdout);
334 }
335 
ublk_ctrl_deinit(struct ublk_dev * dev)336 static void ublk_ctrl_deinit(struct ublk_dev *dev)
337 {
338 	close(dev->ctrl_fd);
339 	free(dev);
340 }
341 
ublk_ctrl_init(void)342 static struct ublk_dev *ublk_ctrl_init(void)
343 {
344 	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
345 	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
346 	int ret;
347 
348 	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
349 	if (dev->ctrl_fd < 0) {
350 		free(dev);
351 		return NULL;
352 	}
353 
354 	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
355 
356 	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
357 			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
358 	if (ret < 0) {
359 		ublk_err("queue_init: %s\n", strerror(-ret));
360 		free(dev);
361 		return NULL;
362 	}
363 	dev->nr_fds = 1;
364 
365 	return dev;
366 }
367 
__ublk_queue_cmd_buf_sz(unsigned depth)368 static int __ublk_queue_cmd_buf_sz(unsigned depth)
369 {
370 	int size =  depth * sizeof(struct ublksrv_io_desc);
371 	unsigned int page_sz = getpagesize();
372 
373 	return round_up(size, page_sz);
374 }
375 
ublk_queue_max_cmd_buf_sz(void)376 static int ublk_queue_max_cmd_buf_sz(void)
377 {
378 	return __ublk_queue_cmd_buf_sz(UBLK_MAX_QUEUE_DEPTH);
379 }
380 
ublk_queue_cmd_buf_sz(struct ublk_queue * q)381 static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
382 {
383 	return __ublk_queue_cmd_buf_sz(q->q_depth);
384 }
385 
ublk_queue_deinit(struct ublk_queue * q)386 static void ublk_queue_deinit(struct ublk_queue *q)
387 {
388 	int i;
389 	int nr_ios = q->q_depth;
390 
391 	io_uring_unregister_buffers(&q->ring);
392 
393 	io_uring_unregister_ring_fd(&q->ring);
394 
395 	if (q->ring.ring_fd > 0) {
396 		io_uring_unregister_files(&q->ring);
397 		close(q->ring.ring_fd);
398 		q->ring.ring_fd = -1;
399 	}
400 
401 	if (q->io_cmd_buf)
402 		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
403 
404 	for (i = 0; i < nr_ios; i++)
405 		free(q->ios[i].buf_addr);
406 }
407 
ublk_queue_init(struct ublk_queue * q)408 static int ublk_queue_init(struct ublk_queue *q)
409 {
410 	struct ublk_dev *dev = q->dev;
411 	int depth = dev->dev_info.queue_depth;
412 	int i, ret = -1;
413 	int cmd_buf_size, io_buf_size;
414 	unsigned long off;
415 	int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
416 
417 	q->tgt_ops = dev->tgt.ops;
418 	q->state = 0;
419 	q->q_depth = depth;
420 	q->cmd_inflight = 0;
421 	q->tid = gettid();
422 
423 	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
424 		q->state |= UBLKSRV_NO_BUF;
425 		q->state |= UBLKSRV_ZC;
426 	}
427 
428 	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
429 	off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
430 	q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
431 			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
432 	if (q->io_cmd_buf == MAP_FAILED) {
433 		ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
434 				q->dev->dev_info.dev_id, q->q_id);
435 		goto fail;
436 	}
437 
438 	io_buf_size = dev->dev_info.max_io_buf_bytes;
439 	for (i = 0; i < q->q_depth; i++) {
440 		q->ios[i].buf_addr = NULL;
441 		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
442 
443 		if (q->state & UBLKSRV_NO_BUF)
444 			continue;
445 
446 		if (posix_memalign((void **)&q->ios[i].buf_addr,
447 					getpagesize(), io_buf_size)) {
448 			ublk_err("ublk dev %d queue %d io %d posix_memalign failed %m\n",
449 					dev->dev_info.dev_id, q->q_id, i);
450 			goto fail;
451 		}
452 	}
453 
454 	ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
455 			IORING_SETUP_COOP_TASKRUN |
456 			IORING_SETUP_SINGLE_ISSUER |
457 			IORING_SETUP_DEFER_TASKRUN);
458 	if (ret < 0) {
459 		ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
460 				q->dev->dev_info.dev_id, q->q_id, ret);
461 		goto fail;
462 	}
463 
464 	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
465 		ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
466 		if (ret) {
467 			ublk_err("ublk dev %d queue %d register spare buffers failed %d",
468 					dev->dev_info.dev_id, q->q_id, ret);
469 			goto fail;
470 		}
471 	}
472 
473 	io_uring_register_ring_fd(&q->ring);
474 
475 	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
476 	if (ret) {
477 		ublk_err("ublk dev %d queue %d register files failed %d\n",
478 				q->dev->dev_info.dev_id, q->q_id, ret);
479 		goto fail;
480 	}
481 
482 	return 0;
483  fail:
484 	ublk_queue_deinit(q);
485 	ublk_err("ublk dev %d queue %d failed\n",
486 			dev->dev_info.dev_id, q->q_id);
487 	return -ENOMEM;
488 }
489 
490 #define WAIT_USEC 	100000
491 #define MAX_WAIT_USEC 	(3 * 1000000)
ublk_dev_prep(const struct dev_ctx * ctx,struct ublk_dev * dev)492 static int ublk_dev_prep(const struct dev_ctx *ctx, struct ublk_dev *dev)
493 {
494 	int dev_id = dev->dev_info.dev_id;
495 	unsigned int wait_usec = 0;
496 	int ret = 0, fd = -1;
497 	char buf[64];
498 
499 	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
500 
501 	while (wait_usec < MAX_WAIT_USEC) {
502 		fd = open(buf, O_RDWR);
503 		if (fd >= 0)
504 			break;
505 		usleep(WAIT_USEC);
506 		wait_usec += WAIT_USEC;
507 	}
508 	if (fd < 0) {
509 		ublk_err("can't open %s %s\n", buf, strerror(errno));
510 		return -1;
511 	}
512 
513 	dev->fds[0] = fd;
514 	if (dev->tgt.ops->init_tgt)
515 		ret = dev->tgt.ops->init_tgt(ctx, dev);
516 	if (ret)
517 		close(dev->fds[0]);
518 	return ret;
519 }
520 
ublk_dev_unprep(struct ublk_dev * dev)521 static void ublk_dev_unprep(struct ublk_dev *dev)
522 {
523 	if (dev->tgt.ops->deinit_tgt)
524 		dev->tgt.ops->deinit_tgt(dev);
525 	close(dev->fds[0]);
526 }
527 
ublk_queue_io_cmd(struct ublk_queue * q,struct ublk_io * io,unsigned tag)528 int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
529 {
530 	struct ublksrv_io_cmd *cmd;
531 	struct io_uring_sqe *sqe[1];
532 	unsigned int cmd_op = 0;
533 	__u64 user_data;
534 
535 	/* only freed io can be issued */
536 	if (!(io->flags & UBLKSRV_IO_FREE))
537 		return 0;
538 
539 	/*
540 	 * we issue because we need either fetching or committing or
541 	 * getting data
542 	 */
543 	if (!(io->flags &
544 		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
545 		return 0;
546 
547 	if (io->flags & UBLKSRV_NEED_GET_DATA)
548 		cmd_op = UBLK_U_IO_NEED_GET_DATA;
549 	else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
550 		cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
551 	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
552 		cmd_op = UBLK_U_IO_FETCH_REQ;
553 
554 	if (io_uring_sq_space_left(&q->ring) < 1)
555 		io_uring_submit(&q->ring);
556 
557 	ublk_queue_alloc_sqes(q, sqe, 1);
558 	if (!sqe[0]) {
559 		ublk_err("%s: run out of sqe %d, tag %d\n",
560 				__func__, q->q_id, tag);
561 		return -1;
562 	}
563 
564 	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe[0]);
565 
566 	if (cmd_op == UBLK_U_IO_COMMIT_AND_FETCH_REQ)
567 		cmd->result = io->result;
568 
569 	/* These fields should be written once, never change */
570 	ublk_set_sqe_cmd_op(sqe[0], cmd_op);
571 	sqe[0]->fd		= 0;	/* dev->fds[0] */
572 	sqe[0]->opcode	= IORING_OP_URING_CMD;
573 	sqe[0]->flags	= IOSQE_FIXED_FILE;
574 	sqe[0]->rw_flags	= 0;
575 	cmd->tag	= tag;
576 	cmd->q_id	= q->q_id;
577 	if (!(q->state & UBLKSRV_NO_BUF))
578 		cmd->addr	= (__u64) (uintptr_t) io->buf_addr;
579 	else
580 		cmd->addr	= 0;
581 
582 	user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
583 	io_uring_sqe_set_data64(sqe[0], user_data);
584 
585 	io->flags = 0;
586 
587 	q->cmd_inflight += 1;
588 
589 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
590 			__func__, q->q_id, tag, cmd_op,
591 			io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
592 	return 1;
593 }
594 
ublk_submit_fetch_commands(struct ublk_queue * q)595 static void ublk_submit_fetch_commands(struct ublk_queue *q)
596 {
597 	int i = 0;
598 
599 	for (i = 0; i < q->q_depth; i++)
600 		ublk_queue_io_cmd(q, &q->ios[i], i);
601 }
602 
ublk_queue_is_idle(struct ublk_queue * q)603 static int ublk_queue_is_idle(struct ublk_queue *q)
604 {
605 	return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
606 }
607 
ublk_queue_is_done(struct ublk_queue * q)608 static int ublk_queue_is_done(struct ublk_queue *q)
609 {
610 	return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
611 }
612 
ublksrv_handle_tgt_cqe(struct ublk_queue * q,struct io_uring_cqe * cqe)613 static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
614 		struct io_uring_cqe *cqe)
615 {
616 	unsigned tag = user_data_to_tag(cqe->user_data);
617 
618 	if (cqe->res < 0 && cqe->res != -EAGAIN)
619 		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
620 			__func__, cqe->res, q->q_id,
621 			user_data_to_tag(cqe->user_data),
622 			user_data_to_op(cqe->user_data));
623 
624 	if (q->tgt_ops->tgt_io_done)
625 		q->tgt_ops->tgt_io_done(q, tag, cqe);
626 }
627 
ublk_handle_cqe(struct io_uring * r,struct io_uring_cqe * cqe,void * data)628 static void ublk_handle_cqe(struct io_uring *r,
629 		struct io_uring_cqe *cqe, void *data)
630 {
631 	struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
632 	unsigned tag = user_data_to_tag(cqe->user_data);
633 	unsigned cmd_op = user_data_to_op(cqe->user_data);
634 	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
635 		!(q->state & UBLKSRV_QUEUE_STOPPING);
636 	struct ublk_io *io;
637 
638 	if (cqe->res < 0 && cqe->res != -ENODEV)
639 		ublk_err("%s: res %d userdata %llx queue state %x\n", __func__,
640 				cqe->res, cqe->user_data, q->state);
641 
642 	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n",
643 			__func__, cqe->res, q->q_id, tag, cmd_op,
644 			is_target_io(cqe->user_data),
645 			user_data_to_tgt_data(cqe->user_data),
646 			(q->state & UBLKSRV_QUEUE_STOPPING));
647 
648 	/* Don't retrieve io in case of target io */
649 	if (is_target_io(cqe->user_data)) {
650 		ublksrv_handle_tgt_cqe(q, cqe);
651 		return;
652 	}
653 
654 	io = &q->ios[tag];
655 	q->cmd_inflight--;
656 
657 	if (!fetch) {
658 		q->state |= UBLKSRV_QUEUE_STOPPING;
659 		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
660 	}
661 
662 	if (cqe->res == UBLK_IO_RES_OK) {
663 		assert(tag < q->q_depth);
664 		if (q->tgt_ops->queue_io)
665 			q->tgt_ops->queue_io(q, tag);
666 	} else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
667 		io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
668 		ublk_queue_io_cmd(q, io, tag);
669 	} else {
670 		/*
671 		 * COMMIT_REQ will be completed immediately since no fetching
672 		 * piggyback is required.
673 		 *
674 		 * Marking IO_FREE only, then this io won't be issued since
675 		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
676 		 *
677 		 * */
678 		io->flags = UBLKSRV_IO_FREE;
679 	}
680 }
681 
ublk_reap_events_uring(struct io_uring * r)682 static int ublk_reap_events_uring(struct io_uring *r)
683 {
684 	struct io_uring_cqe *cqe;
685 	unsigned head;
686 	int count = 0;
687 
688 	io_uring_for_each_cqe(r, head, cqe) {
689 		ublk_handle_cqe(r, cqe, NULL);
690 		count += 1;
691 	}
692 	io_uring_cq_advance(r, count);
693 
694 	return count;
695 }
696 
ublk_process_io(struct ublk_queue * q)697 static int ublk_process_io(struct ublk_queue *q)
698 {
699 	int ret, reapped;
700 
701 	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
702 				q->dev->dev_info.dev_id,
703 				q->q_id, io_uring_sq_ready(&q->ring),
704 				q->cmd_inflight,
705 				(q->state & UBLKSRV_QUEUE_STOPPING));
706 
707 	if (ublk_queue_is_done(q))
708 		return -ENODEV;
709 
710 	ret = io_uring_submit_and_wait(&q->ring, 1);
711 	reapped = ublk_reap_events_uring(&q->ring);
712 
713 	ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
714 			ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
715 			(q->state & UBLKSRV_QUEUE_IDLE));
716 
717 	return reapped;
718 }
719 
ublk_queue_set_sched_affinity(const struct ublk_queue * q,cpu_set_t * cpuset)720 static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
721 		cpu_set_t *cpuset)
722 {
723         if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
724                 ublk_err("ublk dev %u queue %u set affinity failed",
725                                 q->dev->dev_info.dev_id, q->q_id);
726 }
727 
728 struct ublk_queue_info {
729 	struct ublk_queue 	*q;
730 	sem_t 			*queue_sem;
731 	cpu_set_t 		*affinity;
732 };
733 
ublk_io_handler_fn(void * data)734 static void *ublk_io_handler_fn(void *data)
735 {
736 	struct ublk_queue_info *info = data;
737 	struct ublk_queue *q = info->q;
738 	int dev_id = q->dev->dev_info.dev_id;
739 	int ret;
740 
741 	ret = ublk_queue_init(q);
742 	if (ret) {
743 		ublk_err("ublk dev %d queue %d init queue failed\n",
744 				dev_id, q->q_id);
745 		return NULL;
746 	}
747 	/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
748 	ublk_queue_set_sched_affinity(q, info->affinity);
749 	sem_post(info->queue_sem);
750 
751 	ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
752 			q->tid, dev_id, q->q_id);
753 
754 	/* submit all io commands to ublk driver */
755 	ublk_submit_fetch_commands(q);
756 	do {
757 		if (ublk_process_io(q) < 0)
758 			break;
759 	} while (1);
760 
761 	ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
762 	ublk_queue_deinit(q);
763 	return NULL;
764 }
765 
ublk_set_parameters(struct ublk_dev * dev)766 static void ublk_set_parameters(struct ublk_dev *dev)
767 {
768 	int ret;
769 
770 	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
771 	if (ret)
772 		ublk_err("dev %d set basic parameter failed %d\n",
773 				dev->dev_info.dev_id, ret);
774 }
775 
ublk_send_dev_event(const struct dev_ctx * ctx,struct ublk_dev * dev,int dev_id)776 static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
777 {
778 	uint64_t id;
779 	int evtfd = ctx->_evtfd;
780 
781 	if (evtfd < 0)
782 		return -EBADF;
783 
784 	if (dev_id >= 0)
785 		id = dev_id + 1;
786 	else
787 		id = ERROR_EVTFD_DEVID;
788 
789 	if (dev && ctx->shadow_dev)
790 		memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
791 
792 	if (write(evtfd, &id, sizeof(id)) != sizeof(id))
793 		return -EINVAL;
794 
795 	close(evtfd);
796 	shmdt(ctx->shadow_dev);
797 
798 	return 0;
799 }
800 
801 
ublk_start_daemon(const struct dev_ctx * ctx,struct ublk_dev * dev)802 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
803 {
804 	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
805 	struct ublk_queue_info *qinfo;
806 	cpu_set_t *affinity_buf;
807 	void *thread_ret;
808 	sem_t queue_sem;
809 	int ret, i;
810 
811 	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
812 
813 	qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
814 			dinfo->nr_hw_queues);
815 	if (!qinfo)
816 		return -ENOMEM;
817 
818 	sem_init(&queue_sem, 0, 0);
819 	ret = ublk_dev_prep(ctx, dev);
820 	if (ret)
821 		return ret;
822 
823 	ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
824 	if (ret)
825 		return ret;
826 
827 	for (i = 0; i < dinfo->nr_hw_queues; i++) {
828 		dev->q[i].dev = dev;
829 		dev->q[i].q_id = i;
830 
831 		qinfo[i].q = &dev->q[i];
832 		qinfo[i].queue_sem = &queue_sem;
833 		qinfo[i].affinity = &affinity_buf[i];
834 		pthread_create(&dev->q[i].thread, NULL,
835 				ublk_io_handler_fn,
836 				&qinfo[i]);
837 	}
838 
839 	for (i = 0; i < dinfo->nr_hw_queues; i++)
840 		sem_wait(&queue_sem);
841 	free(qinfo);
842 	free(affinity_buf);
843 
844 	/* everything is fine now, start us */
845 	if (ctx->recovery)
846 		ret = ublk_ctrl_end_user_recovery(dev, getpid());
847 	else {
848 		ublk_set_parameters(dev);
849 		ret = ublk_ctrl_start_dev(dev, getpid());
850 	}
851 	if (ret < 0) {
852 		ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
853 		goto fail;
854 	}
855 
856 	ublk_ctrl_get_info(dev);
857 	if (ctx->fg)
858 		ublk_ctrl_dump(dev);
859 	else
860 		ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
861 
862 	/* wait until we are terminated */
863 	for (i = 0; i < dinfo->nr_hw_queues; i++)
864 		pthread_join(dev->q[i].thread, &thread_ret);
865  fail:
866 	ublk_dev_unprep(dev);
867 	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
868 
869 	return ret;
870 }
871 
wait_ublk_dev(const char * path,int evt_mask,unsigned timeout)872 static int wait_ublk_dev(const char *path, int evt_mask, unsigned timeout)
873 {
874 #define EV_SIZE (sizeof(struct inotify_event))
875 #define EV_BUF_LEN (128 * (EV_SIZE + 16))
876 	struct pollfd pfd;
877 	int fd, wd;
878 	int ret = -EINVAL;
879 	const char *dev_name = basename(path);
880 
881 	fd = inotify_init();
882 	if (fd < 0) {
883 		ublk_dbg(UBLK_DBG_DEV, "%s: inotify init failed\n", __func__);
884 		return fd;
885 	}
886 
887 	wd = inotify_add_watch(fd, "/dev", evt_mask);
888 	if (wd == -1) {
889 		ublk_dbg(UBLK_DBG_DEV, "%s: add watch for /dev failed\n", __func__);
890 		goto fail;
891 	}
892 
893 	pfd.fd = fd;
894 	pfd.events = POLL_IN;
895 	while (1) {
896 		int i = 0;
897 		char buffer[EV_BUF_LEN];
898 		ret = poll(&pfd, 1, 1000 * timeout);
899 
900 		if (ret == -1) {
901 			ublk_err("%s: poll inotify failed: %d\n", __func__, ret);
902 			goto rm_watch;
903 		} else if (ret == 0) {
904 			ublk_err("%s: poll inotify timeout\n", __func__);
905 			ret = -ETIMEDOUT;
906 			goto rm_watch;
907 		}
908 
909 		ret = read(fd, buffer, EV_BUF_LEN);
910 		if (ret < 0) {
911 			ublk_err("%s: read inotify fd failed\n", __func__);
912 			goto rm_watch;
913 		}
914 
915 		while (i < ret) {
916 			struct inotify_event *event = (struct inotify_event *)&buffer[i];
917 
918 			ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
919 					__func__, event->mask, event->name);
920 			if (event->mask & evt_mask) {
921 				if (!strcmp(event->name, dev_name)) {
922 					ret = 0;
923 					goto rm_watch;
924 				}
925 			}
926 			i += EV_SIZE + event->len;
927 		}
928 	}
929 rm_watch:
930 	inotify_rm_watch(fd, wd);
931 fail:
932 	close(fd);
933 	return ret;
934 }
935 
ublk_stop_io_daemon(const struct ublk_dev * dev)936 static int ublk_stop_io_daemon(const struct ublk_dev *dev)
937 {
938 	int daemon_pid = dev->dev_info.ublksrv_pid;
939 	int dev_id = dev->dev_info.dev_id;
940 	char ublkc[64];
941 	int ret = 0;
942 
943 	if (daemon_pid < 0)
944 		return 0;
945 
946 	/* daemon may be dead already */
947 	if (kill(daemon_pid, 0) < 0)
948 		goto wait;
949 
950 	snprintf(ublkc, sizeof(ublkc), "/dev/%s%d", "ublkc", dev_id);
951 
952 	/* ublk char device may be gone already */
953 	if (access(ublkc, F_OK) != 0)
954 		goto wait;
955 
956 	/* Wait until ublk char device is closed, when the daemon is shutdown */
957 	ret = wait_ublk_dev(ublkc, IN_CLOSE, 10);
958 	/* double check and since it may be closed before starting inotify */
959 	if (ret == -ETIMEDOUT)
960 		ret = kill(daemon_pid, 0) < 0;
961 wait:
962 	waitpid(daemon_pid, NULL, 0);
963 	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
964 			__func__, daemon_pid, dev_id, ret);
965 
966 	return ret;
967 }
968 
__cmd_dev_add(const struct dev_ctx * ctx)969 static int __cmd_dev_add(const struct dev_ctx *ctx)
970 {
971 	unsigned nr_queues = ctx->nr_hw_queues;
972 	const char *tgt_type = ctx->tgt_type;
973 	unsigned depth = ctx->queue_depth;
974 	__u64 features;
975 	const struct ublk_tgt_ops *ops;
976 	struct ublksrv_ctrl_dev_info *info;
977 	struct ublk_dev *dev;
978 	int dev_id = ctx->dev_id;
979 	int ret, i;
980 
981 	ops = ublk_find_tgt(tgt_type);
982 	if (!ops) {
983 		ublk_err("%s: no such tgt type, type %s\n",
984 				__func__, tgt_type);
985 		return -ENODEV;
986 	}
987 
988 	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
989 		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
990 				__func__, nr_queues, depth);
991 		return -EINVAL;
992 	}
993 
994 	dev = ublk_ctrl_init();
995 	if (!dev) {
996 		ublk_err("%s: can't alloc dev id %d, type %s\n",
997 				__func__, dev_id, tgt_type);
998 		return -ENOMEM;
999 	}
1000 
1001 	/* kernel doesn't support get_features */
1002 	ret = ublk_ctrl_get_features(dev, &features);
1003 	if (ret < 0)
1004 		return -EINVAL;
1005 
1006 	if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
1007 		return -ENOTSUP;
1008 
1009 	info = &dev->dev_info;
1010 	info->dev_id = ctx->dev_id;
1011 	info->nr_hw_queues = nr_queues;
1012 	info->queue_depth = depth;
1013 	info->flags = ctx->flags;
1014 	dev->tgt.ops = ops;
1015 	dev->tgt.sq_depth = depth;
1016 	dev->tgt.cq_depth = depth;
1017 
1018 	for (i = 0; i < MAX_BACK_FILES; i++) {
1019 		if (ctx->files[i]) {
1020 			strcpy(dev->tgt.backing_file[i], ctx->files[i]);
1021 			dev->tgt.nr_backing_files++;
1022 		}
1023 	}
1024 
1025 	if (ctx->recovery)
1026 		ret = ublk_ctrl_start_user_recovery(dev);
1027 	else
1028 		ret = ublk_ctrl_add_dev(dev);
1029 	if (ret < 0) {
1030 		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
1031 				__func__, dev_id, tgt_type, ret);
1032 		goto fail;
1033 	}
1034 
1035 	ret = ublk_start_daemon(ctx, dev);
1036 	ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret);
1037 	if (ret < 0)
1038 		ublk_ctrl_del_dev(dev);
1039 
1040 fail:
1041 	if (ret < 0)
1042 		ublk_send_dev_event(ctx, dev, -1);
1043 	ublk_ctrl_deinit(dev);
1044 	return ret;
1045 }
1046 
1047 static int __cmd_dev_list(struct dev_ctx *ctx);
1048 
cmd_dev_add(struct dev_ctx * ctx)1049 static int cmd_dev_add(struct dev_ctx *ctx)
1050 {
1051 	int res;
1052 
1053 	if (ctx->fg)
1054 		goto run;
1055 
1056 	ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
1057 	if (ctx->_shmid < 0) {
1058 		ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
1059 		exit(-1);
1060 	}
1061 	ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
1062 	if (ctx->shadow_dev == (struct ublk_dev *)-1) {
1063 		ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
1064 		exit(-1);
1065 	}
1066 	ctx->_evtfd = eventfd(0, 0);
1067 	if (ctx->_evtfd < 0) {
1068 		ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
1069 		exit(-1);
1070 	}
1071 
1072 	res = fork();
1073 	if (res == 0) {
1074 		int res2;
1075 
1076 		setsid();
1077 		res2 = fork();
1078 		if (res2 == 0) {
1079 			/* prepare for detaching */
1080 			close(STDIN_FILENO);
1081 			close(STDOUT_FILENO);
1082 			close(STDERR_FILENO);
1083 run:
1084 			res = __cmd_dev_add(ctx);
1085 			return res;
1086 		} else {
1087 			/* detached from the foreground task */
1088 			exit(EXIT_SUCCESS);
1089 		}
1090 	} else if (res > 0) {
1091 		uint64_t id;
1092 		int exit_code = EXIT_FAILURE;
1093 
1094 		res = read(ctx->_evtfd, &id, sizeof(id));
1095 		close(ctx->_evtfd);
1096 		if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
1097 			ctx->dev_id = id - 1;
1098 			if (__cmd_dev_list(ctx) >= 0)
1099 				exit_code = EXIT_SUCCESS;
1100 		}
1101 		shmdt(ctx->shadow_dev);
1102 		shmctl(ctx->_shmid, IPC_RMID, NULL);
1103 		/* wait for child and detach from it */
1104 		wait(NULL);
1105 		exit(exit_code);
1106 	} else {
1107 		exit(EXIT_FAILURE);
1108 	}
1109 }
1110 
__cmd_dev_del(struct dev_ctx * ctx)1111 static int __cmd_dev_del(struct dev_ctx *ctx)
1112 {
1113 	int number = ctx->dev_id;
1114 	struct ublk_dev *dev;
1115 	int ret;
1116 
1117 	dev = ublk_ctrl_init();
1118 	dev->dev_info.dev_id = number;
1119 
1120 	ret = ublk_ctrl_get_info(dev);
1121 	if (ret < 0)
1122 		goto fail;
1123 
1124 	ret = ublk_ctrl_stop_dev(dev);
1125 	if (ret < 0)
1126 		ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret);
1127 
1128 	ret = ublk_stop_io_daemon(dev);
1129 	if (ret < 0)
1130 		ublk_err("%s: stop daemon id %d dev %d, ret %d\n",
1131 				__func__, dev->dev_info.ublksrv_pid, number, ret);
1132 	ublk_ctrl_del_dev(dev);
1133 fail:
1134 	ublk_ctrl_deinit(dev);
1135 
1136 	return (ret >= 0) ? 0 : ret;
1137 }
1138 
cmd_dev_del(struct dev_ctx * ctx)1139 static int cmd_dev_del(struct dev_ctx *ctx)
1140 {
1141 	int i;
1142 
1143 	if (ctx->dev_id >= 0 || !ctx->all)
1144 		return __cmd_dev_del(ctx);
1145 
1146 	for (i = 0; i < 255; i++) {
1147 		ctx->dev_id = i;
1148 		__cmd_dev_del(ctx);
1149 	}
1150 	return 0;
1151 }
1152 
__cmd_dev_list(struct dev_ctx * ctx)1153 static int __cmd_dev_list(struct dev_ctx *ctx)
1154 {
1155 	struct ublk_dev *dev = ublk_ctrl_init();
1156 	int ret;
1157 
1158 	if (!dev)
1159 		return -ENODEV;
1160 
1161 	dev->dev_info.dev_id = ctx->dev_id;
1162 
1163 	ret = ublk_ctrl_get_info(dev);
1164 	if (ret < 0) {
1165 		if (ctx->logging)
1166 			ublk_err("%s: can't get dev info from %d: %d\n",
1167 					__func__, ctx->dev_id, ret);
1168 	} else {
1169 		if (ctx->shadow_dev)
1170 			memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
1171 
1172 		ublk_ctrl_dump(dev);
1173 	}
1174 
1175 	ublk_ctrl_deinit(dev);
1176 
1177 	return ret;
1178 }
1179 
cmd_dev_list(struct dev_ctx * ctx)1180 static int cmd_dev_list(struct dev_ctx *ctx)
1181 {
1182 	int i;
1183 
1184 	if (ctx->dev_id >= 0 || !ctx->all)
1185 		return __cmd_dev_list(ctx);
1186 
1187 	ctx->logging = false;
1188 	for (i = 0; i < 255; i++) {
1189 		ctx->dev_id = i;
1190 		__cmd_dev_list(ctx);
1191 	}
1192 	return 0;
1193 }
1194 
cmd_dev_get_features(void)1195 static int cmd_dev_get_features(void)
1196 {
1197 #define const_ilog2(x) (63 - __builtin_clzll(x))
1198 	static const char *feat_map[] = {
1199 		[const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY",
1200 		[const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK",
1201 		[const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA",
1202 		[const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY",
1203 		[const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE",
1204 		[const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV",
1205 		[const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE",
1206 		[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
1207 		[const_ilog2(UBLK_F_ZONED)] = "ZONED",
1208 		[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
1209 	};
1210 	struct ublk_dev *dev;
1211 	__u64 features = 0;
1212 	int ret;
1213 
1214 	dev = ublk_ctrl_init();
1215 	if (!dev) {
1216 		fprintf(stderr, "ublksrv_ctrl_init failed id\n");
1217 		return -EOPNOTSUPP;
1218 	}
1219 
1220 	ret = ublk_ctrl_get_features(dev, &features);
1221 	if (!ret) {
1222 		int i;
1223 
1224 		printf("ublk_drv features: 0x%llx\n", features);
1225 
1226 		for (i = 0; i < sizeof(features) * 8; i++) {
1227 			const char *feat;
1228 
1229 			if (!((1ULL << i)  & features))
1230 				continue;
1231 			if (i < sizeof(feat_map) / sizeof(feat_map[0]))
1232 				feat = feat_map[i];
1233 			else
1234 				feat = "unknown";
1235 			printf("\t%-20s: 0x%llx\n", feat, 1ULL << i);
1236 		}
1237 	}
1238 
1239 	return ret;
1240 }
1241 
__cmd_create_help(char * exe,bool recovery)1242 static void __cmd_create_help(char *exe, bool recovery)
1243 {
1244 	int i;
1245 
1246 	printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
1247 			exe, recovery ? "recover" : "add");
1248 	printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
1249 	printf("\t[-e 0|1 ] [-i 0|1]\n");
1250 	printf("\t[target options] [backfile1] [backfile2] ...\n");
1251 	printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
1252 
1253 	for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
1254 		const struct ublk_tgt_ops *ops = tgt_ops_list[i];
1255 
1256 		if (ops->usage)
1257 			ops->usage(ops);
1258 	}
1259 }
1260 
cmd_add_help(char * exe)1261 static void cmd_add_help(char *exe)
1262 {
1263 	__cmd_create_help(exe, false);
1264 	printf("\n");
1265 }
1266 
cmd_recover_help(char * exe)1267 static void cmd_recover_help(char *exe)
1268 {
1269 	__cmd_create_help(exe, true);
1270 	printf("\tPlease provide exact command line for creating this device with real dev_id\n");
1271 	printf("\n");
1272 }
1273 
cmd_dev_help(char * exe)1274 static int cmd_dev_help(char *exe)
1275 {
1276 	cmd_add_help(exe);
1277 	cmd_recover_help(exe);
1278 
1279 	printf("%s del [-n dev_id] -a \n", exe);
1280 	printf("\t -a delete all devices -n delete specified device\n\n");
1281 	printf("%s list [-n dev_id] -a \n", exe);
1282 	printf("\t -a list all devices, -n list specified device, default -a \n\n");
1283 	printf("%s features\n", exe);
1284 	return 0;
1285 }
1286 
main(int argc,char * argv[])1287 int main(int argc, char *argv[])
1288 {
1289 	static const struct option longopts[] = {
1290 		{ "all",		0,	NULL, 'a' },
1291 		{ "type",		1,	NULL, 't' },
1292 		{ "number",		1,	NULL, 'n' },
1293 		{ "queues",		1,	NULL, 'q' },
1294 		{ "depth",		1,	NULL, 'd' },
1295 		{ "debug_mask",		1,	NULL,  0  },
1296 		{ "quiet",		0,	NULL,  0  },
1297 		{ "zero_copy",          0,      NULL, 'z' },
1298 		{ "foreground",		0,	NULL,  0  },
1299 		{ "recovery", 		1,      NULL, 'r' },
1300 		{ "recovery_fail_io",	1,	NULL, 'e'},
1301 		{ "recovery_reissue",	1,	NULL, 'i'},
1302 		{ "get_data",		1,	NULL, 'g'},
1303 		{ 0, 0, 0, 0 }
1304 	};
1305 	const struct ublk_tgt_ops *ops = NULL;
1306 	int option_idx, opt;
1307 	const char *cmd = argv[1];
1308 	struct dev_ctx ctx = {
1309 		.queue_depth	=	128,
1310 		.nr_hw_queues	=	2,
1311 		.dev_id		=	-1,
1312 		.tgt_type	=	"unknown",
1313 	};
1314 	int ret = -EINVAL, i;
1315 	int tgt_argc = 1;
1316 	char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
1317 	int value;
1318 
1319 	if (argc == 1)
1320 		return ret;
1321 
1322 	opterr = 0;
1323 	optind = 2;
1324 	while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz",
1325 				  longopts, &option_idx)) != -1) {
1326 		switch (opt) {
1327 		case 'a':
1328 			ctx.all = 1;
1329 			break;
1330 		case 'n':
1331 			ctx.dev_id = strtol(optarg, NULL, 10);
1332 			break;
1333 		case 't':
1334 			if (strlen(optarg) < sizeof(ctx.tgt_type))
1335 				strcpy(ctx.tgt_type, optarg);
1336 			break;
1337 		case 'q':
1338 			ctx.nr_hw_queues = strtol(optarg, NULL, 10);
1339 			break;
1340 		case 'd':
1341 			ctx.queue_depth = strtol(optarg, NULL, 10);
1342 			break;
1343 		case 'z':
1344 			ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
1345 			break;
1346 		case 'r':
1347 			value = strtol(optarg, NULL, 10);
1348 			if (value)
1349 				ctx.flags |= UBLK_F_USER_RECOVERY;
1350 			break;
1351 		case 'e':
1352 			value = strtol(optarg, NULL, 10);
1353 			if (value)
1354 				ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
1355 			break;
1356 		case 'i':
1357 			value = strtol(optarg, NULL, 10);
1358 			if (value)
1359 				ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
1360 			break;
1361 		case 'g':
1362 			ctx.flags |= UBLK_F_NEED_GET_DATA;
1363 			break;
1364 		case 0:
1365 			if (!strcmp(longopts[option_idx].name, "debug_mask"))
1366 				ublk_dbg_mask = strtol(optarg, NULL, 16);
1367 			if (!strcmp(longopts[option_idx].name, "quiet"))
1368 				ublk_dbg_mask = 0;
1369 			if (!strcmp(longopts[option_idx].name, "foreground"))
1370 				ctx.fg = 1;
1371 			break;
1372 		case '?':
1373 			/*
1374 			 * target requires every option must have argument
1375 			 */
1376 			if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
1377 				fprintf(stderr, "every target option requires argument: %s %s\n",
1378 						argv[optind - 1], argv[optind]);
1379 				exit(EXIT_FAILURE);
1380 			}
1381 
1382 			if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
1383 				tgt_argv[tgt_argc++] = argv[optind - 1];
1384 				tgt_argv[tgt_argc++] = argv[optind];
1385 			} else {
1386 				fprintf(stderr, "too many target options\n");
1387 				exit(EXIT_FAILURE);
1388 			}
1389 			optind += 1;
1390 			break;
1391 		}
1392 	}
1393 
1394 	i = optind;
1395 	while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
1396 		ctx.files[ctx.nr_files++] = argv[i++];
1397 	}
1398 
1399 	ops = ublk_find_tgt(ctx.tgt_type);
1400 	if (ops && ops->parse_cmd_line) {
1401 		optind = 0;
1402 
1403 		tgt_argv[0] = ctx.tgt_type;
1404 		ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
1405 	}
1406 
1407 	if (!strcmp(cmd, "add"))
1408 		ret = cmd_dev_add(&ctx);
1409 	else if (!strcmp(cmd, "recover")) {
1410 		if (ctx.dev_id < 0) {
1411 			fprintf(stderr, "device id isn't provided for recovering\n");
1412 			ret = -EINVAL;
1413 		} else {
1414 			ctx.recovery = 1;
1415 			ret = cmd_dev_add(&ctx);
1416 		}
1417 	} else if (!strcmp(cmd, "del"))
1418 		ret = cmd_dev_del(&ctx);
1419 	else if (!strcmp(cmd, "list")) {
1420 		ctx.all = 1;
1421 		ret = cmd_dev_list(&ctx);
1422 	} else if (!strcmp(cmd, "help"))
1423 		ret = cmd_dev_help(argv[0]);
1424 	else if (!strcmp(cmd, "features"))
1425 		ret = cmd_dev_get_features();
1426 	else
1427 		cmd_dev_help(argv[0]);
1428 
1429 	return ret;
1430 }
1431