xref: /linux/drivers/block/zloop.c (revision 2988dfed8a5dc752921a5790b81c06e781af51ce)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2025, Christoph Hellwig.
4  * Copyright (c) 2025, Western Digital Corporation or its affiliates.
5  *
6  * Zoned Loop Device driver - exports a zoned block device using one file per
7  * zone as backing storage.
8  */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/module.h>
12 #include <linux/blk-mq.h>
13 #include <linux/blkzoned.h>
14 #include <linux/pagemap.h>
15 #include <linux/miscdevice.h>
16 #include <linux/falloc.h>
17 #include <linux/mutex.h>
18 #include <linux/parser.h>
19 #include <linux/seq_file.h>
20 
21 /*
22  * Options for adding (and removing) a device.
23  */
24 enum {
25 	ZLOOP_OPT_ERR			= 0,
26 	ZLOOP_OPT_ID			= (1 << 0),
27 	ZLOOP_OPT_CAPACITY		= (1 << 1),
28 	ZLOOP_OPT_ZONE_SIZE		= (1 << 2),
29 	ZLOOP_OPT_ZONE_CAPACITY		= (1 << 3),
30 	ZLOOP_OPT_NR_CONV_ZONES		= (1 << 4),
31 	ZLOOP_OPT_BASE_DIR		= (1 << 5),
32 	ZLOOP_OPT_NR_QUEUES		= (1 << 6),
33 	ZLOOP_OPT_QUEUE_DEPTH		= (1 << 7),
34 	ZLOOP_OPT_BUFFERED_IO		= (1 << 8),
35 };
36 
37 static const match_table_t zloop_opt_tokens = {
38 	{ ZLOOP_OPT_ID,			"id=%d"	},
39 	{ ZLOOP_OPT_CAPACITY,		"capacity_mb=%u"	},
40 	{ ZLOOP_OPT_ZONE_SIZE,		"zone_size_mb=%u"	},
41 	{ ZLOOP_OPT_ZONE_CAPACITY,	"zone_capacity_mb=%u"	},
42 	{ ZLOOP_OPT_NR_CONV_ZONES,	"conv_zones=%u"		},
43 	{ ZLOOP_OPT_BASE_DIR,		"base_dir=%s"		},
44 	{ ZLOOP_OPT_NR_QUEUES,		"nr_queues=%u"		},
45 	{ ZLOOP_OPT_QUEUE_DEPTH,	"queue_depth=%u"	},
46 	{ ZLOOP_OPT_BUFFERED_IO,	"buffered_io"		},
47 	{ ZLOOP_OPT_ERR,		NULL			}
48 };
49 
50 /* Default values for the "add" operation. */
51 #define ZLOOP_DEF_ID			-1
52 #define ZLOOP_DEF_ZONE_SIZE		((256ULL * SZ_1M) >> SECTOR_SHIFT)
53 #define ZLOOP_DEF_NR_ZONES		64
54 #define ZLOOP_DEF_NR_CONV_ZONES		8
55 #define ZLOOP_DEF_BASE_DIR		"/var/local/zloop"
56 #define ZLOOP_DEF_NR_QUEUES		1
57 #define ZLOOP_DEF_QUEUE_DEPTH		128
58 #define ZLOOP_DEF_BUFFERED_IO		false
59 
60 /* Arbitrary limit on the zone size (16GB). */
61 #define ZLOOP_MAX_ZONE_SIZE_MB		16384
62 
63 struct zloop_options {
64 	unsigned int		mask;
65 	int			id;
66 	sector_t		capacity;
67 	sector_t		zone_size;
68 	sector_t		zone_capacity;
69 	unsigned int		nr_conv_zones;
70 	char			*base_dir;
71 	unsigned int		nr_queues;
72 	unsigned int		queue_depth;
73 	bool			buffered_io;
74 };
75 
76 /*
77  * Device states.
78  */
79 enum {
80 	Zlo_creating = 0,
81 	Zlo_live,
82 	Zlo_deleting,
83 };
84 
85 enum zloop_zone_flags {
86 	ZLOOP_ZONE_CONV = 0,
87 	ZLOOP_ZONE_SEQ_ERROR,
88 };
89 
90 struct zloop_zone {
91 	struct file		*file;
92 
93 	unsigned long		flags;
94 	struct mutex		lock;
95 	enum blk_zone_cond	cond;
96 	sector_t		start;
97 	sector_t		wp;
98 
99 	gfp_t			old_gfp_mask;
100 };
101 
102 struct zloop_device {
103 	unsigned int		id;
104 	unsigned int		state;
105 
106 	struct blk_mq_tag_set	tag_set;
107 	struct gendisk		*disk;
108 
109 	struct workqueue_struct *workqueue;
110 	bool			buffered_io;
111 
112 	const char		*base_dir;
113 	struct file		*data_dir;
114 
115 	unsigned int		zone_shift;
116 	sector_t		zone_size;
117 	sector_t		zone_capacity;
118 	unsigned int		nr_zones;
119 	unsigned int		nr_conv_zones;
120 	unsigned int		block_size;
121 
122 	struct zloop_zone	zones[] __counted_by(nr_zones);
123 };
124 
125 struct zloop_cmd {
126 	struct work_struct	work;
127 	atomic_t		ref;
128 	sector_t		sector;
129 	sector_t		nr_sectors;
130 	long			ret;
131 	struct kiocb		iocb;
132 	struct bio_vec		*bvec;
133 };
134 
135 static DEFINE_IDR(zloop_index_idr);
136 static DEFINE_MUTEX(zloop_ctl_mutex);
137 
rq_zone_no(struct request * rq)138 static unsigned int rq_zone_no(struct request *rq)
139 {
140 	struct zloop_device *zlo = rq->q->queuedata;
141 
142 	return blk_rq_pos(rq) >> zlo->zone_shift;
143 }
144 
zloop_update_seq_zone(struct zloop_device * zlo,unsigned int zone_no)145 static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
146 {
147 	struct zloop_zone *zone = &zlo->zones[zone_no];
148 	struct kstat stat;
149 	sector_t file_sectors;
150 	int ret;
151 
152 	lockdep_assert_held(&zone->lock);
153 
154 	ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
155 	if (ret < 0) {
156 		pr_err("Failed to get zone %u file stat (err=%d)\n",
157 		       zone_no, ret);
158 		set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
159 		return ret;
160 	}
161 
162 	file_sectors = stat.size >> SECTOR_SHIFT;
163 	if (file_sectors > zlo->zone_capacity) {
164 		pr_err("Zone %u file too large (%llu sectors > %llu)\n",
165 		       zone_no, file_sectors, zlo->zone_capacity);
166 		return -EINVAL;
167 	}
168 
169 	if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
170 		pr_err("Zone %u file size not aligned to block size %u\n",
171 		       zone_no, zlo->block_size);
172 		return -EINVAL;
173 	}
174 
175 	if (!file_sectors) {
176 		zone->cond = BLK_ZONE_COND_EMPTY;
177 		zone->wp = zone->start;
178 	} else if (file_sectors == zlo->zone_capacity) {
179 		zone->cond = BLK_ZONE_COND_FULL;
180 		zone->wp = zone->start + zlo->zone_size;
181 	} else {
182 		zone->cond = BLK_ZONE_COND_CLOSED;
183 		zone->wp = zone->start + file_sectors;
184 	}
185 
186 	return 0;
187 }
188 
zloop_open_zone(struct zloop_device * zlo,unsigned int zone_no)189 static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no)
190 {
191 	struct zloop_zone *zone = &zlo->zones[zone_no];
192 	int ret = 0;
193 
194 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
195 		return -EIO;
196 
197 	mutex_lock(&zone->lock);
198 
199 	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
200 		ret = zloop_update_seq_zone(zlo, zone_no);
201 		if (ret)
202 			goto unlock;
203 	}
204 
205 	switch (zone->cond) {
206 	case BLK_ZONE_COND_EXP_OPEN:
207 		break;
208 	case BLK_ZONE_COND_EMPTY:
209 	case BLK_ZONE_COND_CLOSED:
210 	case BLK_ZONE_COND_IMP_OPEN:
211 		zone->cond = BLK_ZONE_COND_EXP_OPEN;
212 		break;
213 	case BLK_ZONE_COND_FULL:
214 	default:
215 		ret = -EIO;
216 		break;
217 	}
218 
219 unlock:
220 	mutex_unlock(&zone->lock);
221 
222 	return ret;
223 }
224 
zloop_close_zone(struct zloop_device * zlo,unsigned int zone_no)225 static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
226 {
227 	struct zloop_zone *zone = &zlo->zones[zone_no];
228 	int ret = 0;
229 
230 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
231 		return -EIO;
232 
233 	mutex_lock(&zone->lock);
234 
235 	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
236 		ret = zloop_update_seq_zone(zlo, zone_no);
237 		if (ret)
238 			goto unlock;
239 	}
240 
241 	switch (zone->cond) {
242 	case BLK_ZONE_COND_CLOSED:
243 		break;
244 	case BLK_ZONE_COND_IMP_OPEN:
245 	case BLK_ZONE_COND_EXP_OPEN:
246 		if (zone->wp == zone->start)
247 			zone->cond = BLK_ZONE_COND_EMPTY;
248 		else
249 			zone->cond = BLK_ZONE_COND_CLOSED;
250 		break;
251 	case BLK_ZONE_COND_EMPTY:
252 	case BLK_ZONE_COND_FULL:
253 	default:
254 		ret = -EIO;
255 		break;
256 	}
257 
258 unlock:
259 	mutex_unlock(&zone->lock);
260 
261 	return ret;
262 }
263 
zloop_reset_zone(struct zloop_device * zlo,unsigned int zone_no)264 static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
265 {
266 	struct zloop_zone *zone = &zlo->zones[zone_no];
267 	int ret = 0;
268 
269 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
270 		return -EIO;
271 
272 	mutex_lock(&zone->lock);
273 
274 	if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
275 	    zone->cond == BLK_ZONE_COND_EMPTY)
276 		goto unlock;
277 
278 	if (vfs_truncate(&zone->file->f_path, 0)) {
279 		set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
280 		ret = -EIO;
281 		goto unlock;
282 	}
283 
284 	zone->cond = BLK_ZONE_COND_EMPTY;
285 	zone->wp = zone->start;
286 	clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
287 
288 unlock:
289 	mutex_unlock(&zone->lock);
290 
291 	return ret;
292 }
293 
zloop_reset_all_zones(struct zloop_device * zlo)294 static int zloop_reset_all_zones(struct zloop_device *zlo)
295 {
296 	unsigned int i;
297 	int ret;
298 
299 	for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) {
300 		ret = zloop_reset_zone(zlo, i);
301 		if (ret)
302 			return ret;
303 	}
304 
305 	return 0;
306 }
307 
zloop_finish_zone(struct zloop_device * zlo,unsigned int zone_no)308 static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
309 {
310 	struct zloop_zone *zone = &zlo->zones[zone_no];
311 	int ret = 0;
312 
313 	if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
314 		return -EIO;
315 
316 	mutex_lock(&zone->lock);
317 
318 	if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
319 	    zone->cond == BLK_ZONE_COND_FULL)
320 		goto unlock;
321 
322 	if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) {
323 		set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
324 		ret = -EIO;
325 		goto unlock;
326 	}
327 
328 	zone->cond = BLK_ZONE_COND_FULL;
329 	zone->wp = zone->start + zlo->zone_size;
330 	clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
331 
332  unlock:
333 	mutex_unlock(&zone->lock);
334 
335 	return ret;
336 }
337 
zloop_put_cmd(struct zloop_cmd * cmd)338 static void zloop_put_cmd(struct zloop_cmd *cmd)
339 {
340 	struct request *rq = blk_mq_rq_from_pdu(cmd);
341 
342 	if (!atomic_dec_and_test(&cmd->ref))
343 		return;
344 	kfree(cmd->bvec);
345 	cmd->bvec = NULL;
346 	if (likely(!blk_should_fake_timeout(rq->q)))
347 		blk_mq_complete_request(rq);
348 }
349 
zloop_rw_complete(struct kiocb * iocb,long ret)350 static void zloop_rw_complete(struct kiocb *iocb, long ret)
351 {
352 	struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb);
353 
354 	cmd->ret = ret;
355 	zloop_put_cmd(cmd);
356 }
357 
zloop_rw(struct zloop_cmd * cmd)358 static void zloop_rw(struct zloop_cmd *cmd)
359 {
360 	struct request *rq = blk_mq_rq_from_pdu(cmd);
361 	struct zloop_device *zlo = rq->q->queuedata;
362 	unsigned int zone_no = rq_zone_no(rq);
363 	sector_t sector = blk_rq_pos(rq);
364 	sector_t nr_sectors = blk_rq_sectors(rq);
365 	bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
366 	bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
367 	int rw = is_write ? ITER_SOURCE : ITER_DEST;
368 	struct req_iterator rq_iter;
369 	struct zloop_zone *zone;
370 	struct iov_iter iter;
371 	struct bio_vec tmp;
372 	sector_t zone_end;
373 	int nr_bvec = 0;
374 	int ret;
375 
376 	atomic_set(&cmd->ref, 2);
377 	cmd->sector = sector;
378 	cmd->nr_sectors = nr_sectors;
379 	cmd->ret = 0;
380 
381 	/* We should never get an I/O beyond the device capacity. */
382 	if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
383 		ret = -EIO;
384 		goto out;
385 	}
386 	zone = &zlo->zones[zone_no];
387 	zone_end = zone->start + zlo->zone_capacity;
388 
389 	/*
390 	 * The block layer should never send requests that are not fully
391 	 * contained within the zone.
392 	 */
393 	if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
394 		ret = -EIO;
395 		goto out;
396 	}
397 
398 	if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
399 		mutex_lock(&zone->lock);
400 		ret = zloop_update_seq_zone(zlo, zone_no);
401 		mutex_unlock(&zone->lock);
402 		if (ret)
403 			goto out;
404 	}
405 
406 	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
407 		mutex_lock(&zone->lock);
408 
409 		if (is_append) {
410 			sector = zone->wp;
411 			cmd->sector = sector;
412 		}
413 
414 		/*
415 		 * Write operations must be aligned to the write pointer and
416 		 * fully contained within the zone capacity.
417 		 */
418 		if (sector != zone->wp || zone->wp + nr_sectors > zone_end) {
419 			pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
420 			       zone_no, sector, zone->wp);
421 			ret = -EIO;
422 			goto unlock;
423 		}
424 
425 		/* Implicitly open the target zone. */
426 		if (zone->cond == BLK_ZONE_COND_CLOSED ||
427 		    zone->cond == BLK_ZONE_COND_EMPTY)
428 			zone->cond = BLK_ZONE_COND_IMP_OPEN;
429 
430 		/*
431 		 * Advance the write pointer of sequential zones. If the write
432 		 * fails, the wp position will be corrected when the next I/O
433 		 * copmpletes.
434 		 */
435 		zone->wp += nr_sectors;
436 		if (zone->wp == zone_end)
437 			zone->cond = BLK_ZONE_COND_FULL;
438 	}
439 
440 	rq_for_each_bvec(tmp, rq, rq_iter)
441 		nr_bvec++;
442 
443 	if (rq->bio != rq->biotail) {
444 		struct bio_vec *bvec;
445 
446 		cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO);
447 		if (!cmd->bvec) {
448 			ret = -EIO;
449 			goto unlock;
450 		}
451 
452 		/*
453 		 * The bios of the request may be started from the middle of
454 		 * the 'bvec' because of bio splitting, so we can't directly
455 		 * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
456 		 * API will take care of all details for us.
457 		 */
458 		bvec = cmd->bvec;
459 		rq_for_each_bvec(tmp, rq, rq_iter) {
460 			*bvec = tmp;
461 			bvec++;
462 		}
463 		iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq));
464 	} else {
465 		/*
466 		 * Same here, this bio may be started from the middle of the
467 		 * 'bvec' because of bio splitting, so offset from the bvec
468 		 * must be passed to iov iterator
469 		 */
470 		iov_iter_bvec(&iter, rw,
471 			__bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
472 					nr_bvec, blk_rq_bytes(rq));
473 		iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
474 	}
475 
476 	cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
477 	cmd->iocb.ki_filp = zone->file;
478 	cmd->iocb.ki_complete = zloop_rw_complete;
479 	if (!zlo->buffered_io)
480 		cmd->iocb.ki_flags = IOCB_DIRECT;
481 	cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
482 
483 	if (rw == ITER_SOURCE)
484 		ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
485 	else
486 		ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
487 unlock:
488 	if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
489 		mutex_unlock(&zone->lock);
490 out:
491 	if (ret != -EIOCBQUEUED)
492 		zloop_rw_complete(&cmd->iocb, ret);
493 	zloop_put_cmd(cmd);
494 }
495 
zloop_handle_cmd(struct zloop_cmd * cmd)496 static void zloop_handle_cmd(struct zloop_cmd *cmd)
497 {
498 	struct request *rq = blk_mq_rq_from_pdu(cmd);
499 	struct zloop_device *zlo = rq->q->queuedata;
500 
501 	switch (req_op(rq)) {
502 	case REQ_OP_READ:
503 	case REQ_OP_WRITE:
504 	case REQ_OP_ZONE_APPEND:
505 		/*
506 		 * zloop_rw() always executes asynchronously or completes
507 		 * directly.
508 		 */
509 		zloop_rw(cmd);
510 		return;
511 	case REQ_OP_FLUSH:
512 		/*
513 		 * Sync the entire FS containing the zone files instead of
514 		 * walking all files
515 		 */
516 		cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb);
517 		break;
518 	case REQ_OP_ZONE_RESET:
519 		cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq));
520 		break;
521 	case REQ_OP_ZONE_RESET_ALL:
522 		cmd->ret = zloop_reset_all_zones(zlo);
523 		break;
524 	case REQ_OP_ZONE_FINISH:
525 		cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq));
526 		break;
527 	case REQ_OP_ZONE_OPEN:
528 		cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq));
529 		break;
530 	case REQ_OP_ZONE_CLOSE:
531 		cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq));
532 		break;
533 	default:
534 		WARN_ON_ONCE(1);
535 		pr_err("Unsupported operation %d\n", req_op(rq));
536 		cmd->ret = -EOPNOTSUPP;
537 		break;
538 	}
539 
540 	blk_mq_complete_request(rq);
541 }
542 
zloop_cmd_workfn(struct work_struct * work)543 static void zloop_cmd_workfn(struct work_struct *work)
544 {
545 	struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work);
546 	int orig_flags = current->flags;
547 
548 	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
549 	zloop_handle_cmd(cmd);
550 	current->flags = orig_flags;
551 }
552 
zloop_complete_rq(struct request * rq)553 static void zloop_complete_rq(struct request *rq)
554 {
555 	struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
556 	struct zloop_device *zlo = rq->q->queuedata;
557 	unsigned int zone_no = cmd->sector >> zlo->zone_shift;
558 	struct zloop_zone *zone = &zlo->zones[zone_no];
559 	blk_status_t sts = BLK_STS_OK;
560 
561 	switch (req_op(rq)) {
562 	case REQ_OP_READ:
563 		if (cmd->ret < 0)
564 			pr_err("Zone %u: failed read sector %llu, %llu sectors\n",
565 			       zone_no, cmd->sector, cmd->nr_sectors);
566 
567 		if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
568 			/* short read */
569 			struct bio *bio;
570 
571 			__rq_for_each_bio(bio, rq)
572 				zero_fill_bio(bio);
573 		}
574 		break;
575 	case REQ_OP_WRITE:
576 	case REQ_OP_ZONE_APPEND:
577 		if (cmd->ret < 0)
578 			pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n",
579 			       zone_no,
580 			       req_op(rq) == REQ_OP_WRITE ? "" : "append ",
581 			       cmd->sector, cmd->nr_sectors);
582 
583 		if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
584 			pr_err("Zone %u: partial write %ld/%u B\n",
585 			       zone_no, cmd->ret, blk_rq_bytes(rq));
586 			cmd->ret = -EIO;
587 		}
588 
589 		if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
590 			/*
591 			 * A write to a sequential zone file failed: mark the
592 			 * zone as having an error. This will be corrected and
593 			 * cleared when the next IO is submitted.
594 			 */
595 			set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
596 			break;
597 		}
598 		if (req_op(rq) == REQ_OP_ZONE_APPEND)
599 			rq->__sector = cmd->sector;
600 
601 		break;
602 	default:
603 		break;
604 	}
605 
606 	if (cmd->ret < 0)
607 		sts = errno_to_blk_status(cmd->ret);
608 	blk_mq_end_request(rq, sts);
609 }
610 
zloop_queue_rq(struct blk_mq_hw_ctx * hctx,const struct blk_mq_queue_data * bd)611 static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
612 				   const struct blk_mq_queue_data *bd)
613 {
614 	struct request *rq = bd->rq;
615 	struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
616 	struct zloop_device *zlo = rq->q->queuedata;
617 
618 	if (zlo->state == Zlo_deleting)
619 		return BLK_STS_IOERR;
620 
621 	blk_mq_start_request(rq);
622 
623 	INIT_WORK(&cmd->work, zloop_cmd_workfn);
624 	queue_work(zlo->workqueue, &cmd->work);
625 
626 	return BLK_STS_OK;
627 }
628 
629 static const struct blk_mq_ops zloop_mq_ops = {
630 	.queue_rq       = zloop_queue_rq,
631 	.complete	= zloop_complete_rq,
632 };
633 
zloop_open(struct gendisk * disk,blk_mode_t mode)634 static int zloop_open(struct gendisk *disk, blk_mode_t mode)
635 {
636 	struct zloop_device *zlo = disk->private_data;
637 	int ret;
638 
639 	ret = mutex_lock_killable(&zloop_ctl_mutex);
640 	if (ret)
641 		return ret;
642 
643 	if (zlo->state != Zlo_live)
644 		ret = -ENXIO;
645 	mutex_unlock(&zloop_ctl_mutex);
646 	return ret;
647 }
648 
zloop_report_zones(struct gendisk * disk,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)649 static int zloop_report_zones(struct gendisk *disk, sector_t sector,
650 		unsigned int nr_zones, report_zones_cb cb, void *data)
651 {
652 	struct zloop_device *zlo = disk->private_data;
653 	struct blk_zone blkz = {};
654 	unsigned int first, i;
655 	int ret;
656 
657 	first = disk_zone_no(disk, sector);
658 	if (first >= zlo->nr_zones)
659 		return 0;
660 	nr_zones = min(nr_zones, zlo->nr_zones - first);
661 
662 	for (i = 0; i < nr_zones; i++) {
663 		unsigned int zone_no = first + i;
664 		struct zloop_zone *zone = &zlo->zones[zone_no];
665 
666 		mutex_lock(&zone->lock);
667 
668 		if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
669 			ret = zloop_update_seq_zone(zlo, zone_no);
670 			if (ret) {
671 				mutex_unlock(&zone->lock);
672 				return ret;
673 			}
674 		}
675 
676 		blkz.start = zone->start;
677 		blkz.len = zlo->zone_size;
678 		blkz.wp = zone->wp;
679 		blkz.cond = zone->cond;
680 		if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
681 			blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
682 			blkz.capacity = zlo->zone_size;
683 		} else {
684 			blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
685 			blkz.capacity = zlo->zone_capacity;
686 		}
687 
688 		mutex_unlock(&zone->lock);
689 
690 		ret = cb(&blkz, i, data);
691 		if (ret)
692 			return ret;
693 	}
694 
695 	return nr_zones;
696 }
697 
zloop_free_disk(struct gendisk * disk)698 static void zloop_free_disk(struct gendisk *disk)
699 {
700 	struct zloop_device *zlo = disk->private_data;
701 	unsigned int i;
702 
703 	blk_mq_free_tag_set(&zlo->tag_set);
704 
705 	for (i = 0; i < zlo->nr_zones; i++) {
706 		struct zloop_zone *zone = &zlo->zones[i];
707 
708 		mapping_set_gfp_mask(zone->file->f_mapping,
709 				zone->old_gfp_mask);
710 		fput(zone->file);
711 	}
712 
713 	fput(zlo->data_dir);
714 	destroy_workqueue(zlo->workqueue);
715 	kfree(zlo->base_dir);
716 	kvfree(zlo);
717 }
718 
719 static const struct block_device_operations zloop_fops = {
720 	.owner			= THIS_MODULE,
721 	.open			= zloop_open,
722 	.report_zones		= zloop_report_zones,
723 	.free_disk		= zloop_free_disk,
724 };
725 
726 __printf(3, 4)
zloop_filp_open_fmt(int oflags,umode_t mode,const char * fmt,...)727 static struct file *zloop_filp_open_fmt(int oflags, umode_t mode,
728 		const char *fmt, ...)
729 {
730 	struct file *file;
731 	va_list ap;
732 	char *p;
733 
734 	va_start(ap, fmt);
735 	p = kvasprintf(GFP_KERNEL, fmt, ap);
736 	va_end(ap);
737 
738 	if (!p)
739 		return ERR_PTR(-ENOMEM);
740 	file = filp_open(p, oflags, mode);
741 	kfree(p);
742 	return file;
743 }
744 
zloop_get_block_size(struct zloop_device * zlo,struct zloop_zone * zone)745 static int zloop_get_block_size(struct zloop_device *zlo,
746 				struct zloop_zone *zone)
747 {
748 	struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev;
749 	struct kstat st;
750 
751 	/*
752 	 * If the FS block size is lower than or equal to 4K, use that as the
753 	 * device block size. Otherwise, fallback to the FS direct IO alignment
754 	 * constraint if that is provided, and to the FS underlying device
755 	 * physical block size if the direct IO alignment is unknown.
756 	 */
757 	if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
758 		zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
759 	else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
760 		 (st.result_mask & STATX_DIOALIGN))
761 		zlo->block_size = st.dio_offset_align;
762 	else if (sb_bdev)
763 		zlo->block_size = bdev_physical_block_size(sb_bdev);
764 	else
765 		zlo->block_size = SECTOR_SIZE;
766 
767 	if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
768 		pr_err("Zone capacity is not aligned to block size %u\n",
769 		       zlo->block_size);
770 		return -EINVAL;
771 	}
772 
773 	return 0;
774 }
775 
zloop_init_zone(struct zloop_device * zlo,struct zloop_options * opts,unsigned int zone_no,bool restore)776 static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
777 			   unsigned int zone_no, bool restore)
778 {
779 	struct zloop_zone *zone = &zlo->zones[zone_no];
780 	int oflags = O_RDWR;
781 	struct kstat stat;
782 	sector_t file_sectors;
783 	int ret;
784 
785 	mutex_init(&zone->lock);
786 	zone->start = (sector_t)zone_no << zlo->zone_shift;
787 
788 	if (!restore)
789 		oflags |= O_CREAT;
790 
791 	if (!opts->buffered_io)
792 		oflags |= O_DIRECT;
793 
794 	if (zone_no < zlo->nr_conv_zones) {
795 		/* Conventional zone file. */
796 		set_bit(ZLOOP_ZONE_CONV, &zone->flags);
797 		zone->cond = BLK_ZONE_COND_NOT_WP;
798 		zone->wp = U64_MAX;
799 
800 		zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u",
801 					zlo->base_dir, zlo->id, zone_no);
802 		if (IS_ERR(zone->file)) {
803 			pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)",
804 			       zone_no, zlo->base_dir, zlo->id, zone_no,
805 			       PTR_ERR(zone->file));
806 			return PTR_ERR(zone->file);
807 		}
808 
809 		if (!zlo->block_size) {
810 			ret = zloop_get_block_size(zlo, zone);
811 			if (ret)
812 				return ret;
813 		}
814 
815 		ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
816 		if (ret < 0) {
817 			pr_err("Failed to get zone %u file stat\n", zone_no);
818 			return ret;
819 		}
820 		file_sectors = stat.size >> SECTOR_SHIFT;
821 
822 		if (restore && file_sectors != zlo->zone_size) {
823 			pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n",
824 			       zone_no, file_sectors, zlo->zone_capacity);
825 			return ret;
826 		}
827 
828 		ret = vfs_truncate(&zone->file->f_path,
829 				   zlo->zone_size << SECTOR_SHIFT);
830 		if (ret < 0) {
831 			pr_err("Failed to truncate zone %u file (err=%d)\n",
832 			       zone_no, ret);
833 			return ret;
834 		}
835 
836 		return 0;
837 	}
838 
839 	/* Sequential zone file. */
840 	zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u",
841 					 zlo->base_dir, zlo->id, zone_no);
842 	if (IS_ERR(zone->file)) {
843 		pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)",
844 		       zone_no, zlo->base_dir, zlo->id, zone_no,
845 		       PTR_ERR(zone->file));
846 		return PTR_ERR(zone->file);
847 	}
848 
849 	if (!zlo->block_size) {
850 		ret = zloop_get_block_size(zlo, zone);
851 		if (ret)
852 			return ret;
853 	}
854 
855 	zloop_get_block_size(zlo, zone);
856 
857 	mutex_lock(&zone->lock);
858 	ret = zloop_update_seq_zone(zlo, zone_no);
859 	mutex_unlock(&zone->lock);
860 
861 	return ret;
862 }
863 
zloop_dev_exists(struct zloop_device * zlo)864 static bool zloop_dev_exists(struct zloop_device *zlo)
865 {
866 	struct file *cnv, *seq;
867 	bool exists;
868 
869 	cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u",
870 				  zlo->base_dir, zlo->id, 0);
871 	seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u",
872 				  zlo->base_dir, zlo->id, 0);
873 	exists = !IS_ERR(cnv) || !IS_ERR(seq);
874 
875 	if (!IS_ERR(cnv))
876 		fput(cnv);
877 	if (!IS_ERR(seq))
878 		fput(seq);
879 
880 	return exists;
881 }
882 
zloop_ctl_add(struct zloop_options * opts)883 static int zloop_ctl_add(struct zloop_options *opts)
884 {
885 	struct queue_limits lim = {
886 		.max_hw_sectors		= SZ_1M >> SECTOR_SHIFT,
887 		.max_hw_zone_append_sectors = SZ_1M >> SECTOR_SHIFT,
888 		.chunk_sectors		= opts->zone_size,
889 		.features		= BLK_FEAT_ZONED,
890 	};
891 	unsigned int nr_zones, i, j;
892 	struct zloop_device *zlo;
893 	int ret = -EINVAL;
894 	bool restore;
895 
896 	__module_get(THIS_MODULE);
897 
898 	nr_zones = opts->capacity >> ilog2(opts->zone_size);
899 	if (opts->nr_conv_zones >= nr_zones) {
900 		pr_err("Invalid number of conventional zones %u\n",
901 		       opts->nr_conv_zones);
902 		goto out;
903 	}
904 
905 	zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL);
906 	if (!zlo) {
907 		ret = -ENOMEM;
908 		goto out;
909 	}
910 	zlo->state = Zlo_creating;
911 
912 	ret = mutex_lock_killable(&zloop_ctl_mutex);
913 	if (ret)
914 		goto out_free_dev;
915 
916 	/* Allocate id, if @opts->id >= 0, we're requesting that specific id */
917 	if (opts->id >= 0) {
918 		ret = idr_alloc(&zloop_index_idr, zlo,
919 				  opts->id, opts->id + 1, GFP_KERNEL);
920 		if (ret == -ENOSPC)
921 			ret = -EEXIST;
922 	} else {
923 		ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL);
924 	}
925 	mutex_unlock(&zloop_ctl_mutex);
926 	if (ret < 0)
927 		goto out_free_dev;
928 
929 	zlo->id = ret;
930 	zlo->zone_shift = ilog2(opts->zone_size);
931 	zlo->zone_size = opts->zone_size;
932 	if (opts->zone_capacity)
933 		zlo->zone_capacity = opts->zone_capacity;
934 	else
935 		zlo->zone_capacity = zlo->zone_size;
936 	zlo->nr_zones = nr_zones;
937 	zlo->nr_conv_zones = opts->nr_conv_zones;
938 	zlo->buffered_io = opts->buffered_io;
939 
940 	zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
941 				opts->nr_queues * opts->queue_depth, zlo->id);
942 	if (!zlo->workqueue) {
943 		ret = -ENOMEM;
944 		goto out_free_idr;
945 	}
946 
947 	if (opts->base_dir)
948 		zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL);
949 	else
950 		zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL);
951 	if (!zlo->base_dir) {
952 		ret = -ENOMEM;
953 		goto out_destroy_workqueue;
954 	}
955 
956 	zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u",
957 					    zlo->base_dir, zlo->id);
958 	if (IS_ERR(zlo->data_dir)) {
959 		ret = PTR_ERR(zlo->data_dir);
960 		pr_warn("Failed to open directory %s/%u (err=%d)\n",
961 			zlo->base_dir, zlo->id, ret);
962 		goto out_free_base_dir;
963 	}
964 
965 	/*
966 	 * If we already have zone files, we are restoring a device created by a
967 	 * previous add operation. In this case, zloop_init_zone() will check
968 	 * that the zone files are consistent with the zone configuration given.
969 	 */
970 	restore = zloop_dev_exists(zlo);
971 	for (i = 0; i < nr_zones; i++) {
972 		ret = zloop_init_zone(zlo, opts, i, restore);
973 		if (ret)
974 			goto out_close_files;
975 	}
976 
977 	lim.physical_block_size = zlo->block_size;
978 	lim.logical_block_size = zlo->block_size;
979 
980 	zlo->tag_set.ops = &zloop_mq_ops;
981 	zlo->tag_set.nr_hw_queues = opts->nr_queues;
982 	zlo->tag_set.queue_depth = opts->queue_depth;
983 	zlo->tag_set.numa_node = NUMA_NO_NODE;
984 	zlo->tag_set.cmd_size = sizeof(struct zloop_cmd);
985 	zlo->tag_set.driver_data = zlo;
986 
987 	ret = blk_mq_alloc_tag_set(&zlo->tag_set);
988 	if (ret) {
989 		pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret);
990 		goto out_close_files;
991 	}
992 
993 	zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo);
994 	if (IS_ERR(zlo->disk)) {
995 		pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret);
996 		ret = PTR_ERR(zlo->disk);
997 		goto out_cleanup_tags;
998 	}
999 	zlo->disk->flags = GENHD_FL_NO_PART;
1000 	zlo->disk->fops = &zloop_fops;
1001 	zlo->disk->private_data = zlo;
1002 	sprintf(zlo->disk->disk_name, "zloop%d", zlo->id);
1003 	set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones);
1004 
1005 	ret = blk_revalidate_disk_zones(zlo->disk);
1006 	if (ret)
1007 		goto out_cleanup_disk;
1008 
1009 	ret = add_disk(zlo->disk);
1010 	if (ret) {
1011 		pr_err("add_disk failed (err=%d)\n", ret);
1012 		goto out_cleanup_disk;
1013 	}
1014 
1015 	mutex_lock(&zloop_ctl_mutex);
1016 	zlo->state = Zlo_live;
1017 	mutex_unlock(&zloop_ctl_mutex);
1018 
1019 	pr_info("Added device %d: %u zones of %llu MB, %u B block size\n",
1020 		zlo->id, zlo->nr_zones,
1021 		((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
1022 		zlo->block_size);
1023 
1024 	return 0;
1025 
1026 out_cleanup_disk:
1027 	put_disk(zlo->disk);
1028 out_cleanup_tags:
1029 	blk_mq_free_tag_set(&zlo->tag_set);
1030 out_close_files:
1031 	for (j = 0; j < i; j++) {
1032 		struct zloop_zone *zone = &zlo->zones[j];
1033 
1034 		if (!IS_ERR_OR_NULL(zone->file))
1035 			fput(zone->file);
1036 	}
1037 	fput(zlo->data_dir);
1038 out_free_base_dir:
1039 	kfree(zlo->base_dir);
1040 out_destroy_workqueue:
1041 	destroy_workqueue(zlo->workqueue);
1042 out_free_idr:
1043 	mutex_lock(&zloop_ctl_mutex);
1044 	idr_remove(&zloop_index_idr, zlo->id);
1045 	mutex_unlock(&zloop_ctl_mutex);
1046 out_free_dev:
1047 	kvfree(zlo);
1048 out:
1049 	module_put(THIS_MODULE);
1050 	if (ret == -ENOENT)
1051 		ret = -EINVAL;
1052 	return ret;
1053 }
1054 
zloop_ctl_remove(struct zloop_options * opts)1055 static int zloop_ctl_remove(struct zloop_options *opts)
1056 {
1057 	struct zloop_device *zlo;
1058 	int ret;
1059 
1060 	if (!(opts->mask & ZLOOP_OPT_ID)) {
1061 		pr_err("No ID specified\n");
1062 		return -EINVAL;
1063 	}
1064 
1065 	ret = mutex_lock_killable(&zloop_ctl_mutex);
1066 	if (ret)
1067 		return ret;
1068 
1069 	zlo = idr_find(&zloop_index_idr, opts->id);
1070 	if (!zlo || zlo->state == Zlo_creating) {
1071 		ret = -ENODEV;
1072 	} else if (zlo->state == Zlo_deleting) {
1073 		ret = -EINVAL;
1074 	} else {
1075 		idr_remove(&zloop_index_idr, zlo->id);
1076 		zlo->state = Zlo_deleting;
1077 	}
1078 
1079 	mutex_unlock(&zloop_ctl_mutex);
1080 	if (ret)
1081 		return ret;
1082 
1083 	del_gendisk(zlo->disk);
1084 	put_disk(zlo->disk);
1085 
1086 	pr_info("Removed device %d\n", opts->id);
1087 
1088 	module_put(THIS_MODULE);
1089 
1090 	return 0;
1091 }
1092 
zloop_parse_options(struct zloop_options * opts,const char * buf)1093 static int zloop_parse_options(struct zloop_options *opts, const char *buf)
1094 {
1095 	substring_t args[MAX_OPT_ARGS];
1096 	char *options, *o, *p;
1097 	unsigned int token;
1098 	int ret = 0;
1099 
1100 	/* Set defaults. */
1101 	opts->mask = 0;
1102 	opts->id = ZLOOP_DEF_ID;
1103 	opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
1104 	opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
1105 	opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
1106 	opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
1107 	opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
1108 	opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
1109 
1110 	if (!buf)
1111 		return 0;
1112 
1113 	/* Skip leading spaces before the options. */
1114 	while (isspace(*buf))
1115 		buf++;
1116 
1117 	options = o = kstrdup(buf, GFP_KERNEL);
1118 	if (!options)
1119 		return -ENOMEM;
1120 
1121 	/* Parse the options, doing only some light invalid value checks. */
1122 	while ((p = strsep(&o, ",\n")) != NULL) {
1123 		if (!*p)
1124 			continue;
1125 
1126 		token = match_token(p, zloop_opt_tokens, args);
1127 		opts->mask |= token;
1128 		switch (token) {
1129 		case ZLOOP_OPT_ID:
1130 			if (match_int(args, &opts->id)) {
1131 				ret = -EINVAL;
1132 				goto out;
1133 			}
1134 			break;
1135 		case ZLOOP_OPT_CAPACITY:
1136 			if (match_uint(args, &token)) {
1137 				ret = -EINVAL;
1138 				goto out;
1139 			}
1140 			if (!token) {
1141 				pr_err("Invalid capacity\n");
1142 				ret = -EINVAL;
1143 				goto out;
1144 			}
1145 			opts->capacity =
1146 				((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1147 			break;
1148 		case ZLOOP_OPT_ZONE_SIZE:
1149 			if (match_uint(args, &token)) {
1150 				ret = -EINVAL;
1151 				goto out;
1152 			}
1153 			if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB ||
1154 			    !is_power_of_2(token)) {
1155 				pr_err("Invalid zone size %u\n", token);
1156 				ret = -EINVAL;
1157 				goto out;
1158 			}
1159 			opts->zone_size =
1160 				((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1161 			break;
1162 		case ZLOOP_OPT_ZONE_CAPACITY:
1163 			if (match_uint(args, &token)) {
1164 				ret = -EINVAL;
1165 				goto out;
1166 			}
1167 			if (!token) {
1168 				pr_err("Invalid zone capacity\n");
1169 				ret = -EINVAL;
1170 				goto out;
1171 			}
1172 			opts->zone_capacity =
1173 				((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1174 			break;
1175 		case ZLOOP_OPT_NR_CONV_ZONES:
1176 			if (match_uint(args, &token)) {
1177 				ret = -EINVAL;
1178 				goto out;
1179 			}
1180 			opts->nr_conv_zones = token;
1181 			break;
1182 		case ZLOOP_OPT_BASE_DIR:
1183 			p = match_strdup(args);
1184 			if (!p) {
1185 				ret = -ENOMEM;
1186 				goto out;
1187 			}
1188 			kfree(opts->base_dir);
1189 			opts->base_dir = p;
1190 			break;
1191 		case ZLOOP_OPT_NR_QUEUES:
1192 			if (match_uint(args, &token)) {
1193 				ret = -EINVAL;
1194 				goto out;
1195 			}
1196 			if (!token) {
1197 				pr_err("Invalid number of queues\n");
1198 				ret = -EINVAL;
1199 				goto out;
1200 			}
1201 			opts->nr_queues = min(token, num_online_cpus());
1202 			break;
1203 		case ZLOOP_OPT_QUEUE_DEPTH:
1204 			if (match_uint(args, &token)) {
1205 				ret = -EINVAL;
1206 				goto out;
1207 			}
1208 			if (!token) {
1209 				pr_err("Invalid queue depth\n");
1210 				ret = -EINVAL;
1211 				goto out;
1212 			}
1213 			opts->queue_depth = token;
1214 			break;
1215 		case ZLOOP_OPT_BUFFERED_IO:
1216 			opts->buffered_io = true;
1217 			break;
1218 		case ZLOOP_OPT_ERR:
1219 		default:
1220 			pr_warn("unknown parameter or missing value '%s'\n", p);
1221 			ret = -EINVAL;
1222 			goto out;
1223 		}
1224 	}
1225 
1226 	ret = -EINVAL;
1227 	if (opts->capacity <= opts->zone_size) {
1228 		pr_err("Invalid capacity\n");
1229 		goto out;
1230 	}
1231 
1232 	if (opts->zone_capacity > opts->zone_size) {
1233 		pr_err("Invalid zone capacity\n");
1234 		goto out;
1235 	}
1236 
1237 	ret = 0;
1238 out:
1239 	kfree(options);
1240 	return ret;
1241 }
1242 
1243 enum {
1244 	ZLOOP_CTL_ADD,
1245 	ZLOOP_CTL_REMOVE,
1246 };
1247 
1248 static struct zloop_ctl_op {
1249 	int		code;
1250 	const char	*name;
1251 } zloop_ctl_ops[] = {
1252 	{ ZLOOP_CTL_ADD,	"add" },
1253 	{ ZLOOP_CTL_REMOVE,	"remove" },
1254 	{ -1,	NULL },
1255 };
1256 
zloop_ctl_write(struct file * file,const char __user * ubuf,size_t count,loff_t * pos)1257 static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf,
1258 			       size_t count, loff_t *pos)
1259 {
1260 	struct zloop_options opts = { };
1261 	struct zloop_ctl_op *op;
1262 	const char *buf, *opts_buf;
1263 	int i, ret;
1264 
1265 	if (count > PAGE_SIZE)
1266 		return -ENOMEM;
1267 
1268 	buf = memdup_user_nul(ubuf, count);
1269 	if (IS_ERR(buf))
1270 		return PTR_ERR(buf);
1271 
1272 	for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
1273 		op = &zloop_ctl_ops[i];
1274 		if (!op->name) {
1275 			pr_err("Invalid operation\n");
1276 			ret = -EINVAL;
1277 			goto out;
1278 		}
1279 		if (!strncmp(buf, op->name, strlen(op->name)))
1280 			break;
1281 	}
1282 
1283 	if (count <= strlen(op->name))
1284 		opts_buf = NULL;
1285 	else
1286 		opts_buf = buf + strlen(op->name);
1287 
1288 	ret = zloop_parse_options(&opts, opts_buf);
1289 	if (ret) {
1290 		pr_err("Failed to parse options\n");
1291 		goto out;
1292 	}
1293 
1294 	switch (op->code) {
1295 	case ZLOOP_CTL_ADD:
1296 		ret = zloop_ctl_add(&opts);
1297 		break;
1298 	case ZLOOP_CTL_REMOVE:
1299 		ret = zloop_ctl_remove(&opts);
1300 		break;
1301 	default:
1302 		pr_err("Invalid operation\n");
1303 		ret = -EINVAL;
1304 		goto out;
1305 	}
1306 
1307 out:
1308 	kfree(opts.base_dir);
1309 	kfree(buf);
1310 	return ret ? ret : count;
1311 }
1312 
zloop_ctl_show(struct seq_file * seq_file,void * private)1313 static int zloop_ctl_show(struct seq_file *seq_file, void *private)
1314 {
1315 	const struct match_token *tok;
1316 	int i;
1317 
1318 	/* Add operation */
1319 	seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name);
1320 	for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) {
1321 		tok = &zloop_opt_tokens[i];
1322 		if (!tok->pattern)
1323 			break;
1324 		if (i)
1325 			seq_putc(seq_file, ',');
1326 		seq_puts(seq_file, tok->pattern);
1327 	}
1328 	seq_putc(seq_file, '\n');
1329 
1330 	/* Remove operation */
1331 	seq_puts(seq_file, zloop_ctl_ops[1].name);
1332 	seq_puts(seq_file, " id=%d\n");
1333 
1334 	return 0;
1335 }
1336 
zloop_ctl_open(struct inode * inode,struct file * file)1337 static int zloop_ctl_open(struct inode *inode, struct file *file)
1338 {
1339 	file->private_data = NULL;
1340 	return single_open(file, zloop_ctl_show, NULL);
1341 }
1342 
zloop_ctl_release(struct inode * inode,struct file * file)1343 static int zloop_ctl_release(struct inode *inode, struct file *file)
1344 {
1345 	return single_release(inode, file);
1346 }
1347 
1348 static const struct file_operations zloop_ctl_fops = {
1349 	.owner		= THIS_MODULE,
1350 	.open		= zloop_ctl_open,
1351 	.release	= zloop_ctl_release,
1352 	.write		= zloop_ctl_write,
1353 	.read		= seq_read,
1354 };
1355 
1356 static struct miscdevice zloop_misc = {
1357 	.minor		= MISC_DYNAMIC_MINOR,
1358 	.name		= "zloop-control",
1359 	.fops		= &zloop_ctl_fops,
1360 };
1361 
zloop_init(void)1362 static int __init zloop_init(void)
1363 {
1364 	int ret;
1365 
1366 	ret = misc_register(&zloop_misc);
1367 	if (ret) {
1368 		pr_err("Failed to register misc device: %d\n", ret);
1369 		return ret;
1370 	}
1371 	pr_info("Module loaded\n");
1372 
1373 	return 0;
1374 }
1375 
zloop_exit(void)1376 static void __exit zloop_exit(void)
1377 {
1378 	misc_deregister(&zloop_misc);
1379 	idr_destroy(&zloop_index_idr);
1380 }
1381 
1382 module_init(zloop_init);
1383 module_exit(zloop_exit);
1384 
1385 MODULE_DESCRIPTION("Zoned loopback device");
1386 MODULE_LICENSE("GPL");
1387