xref: /linux/tools/testing/selftests/ublk/stripe.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "kublk.h"
4 
5 #define NR_STRIPE  MAX_BACK_FILES
6 
7 struct stripe_conf {
8 	unsigned nr_files;
9 	unsigned shift;
10 };
11 
12 struct stripe {
13 	loff_t 		start;
14 	unsigned 	nr_sects;
15 	int 		seq;
16 
17 	struct iovec 	*vec;
18 	unsigned 	nr_vec;
19 	unsigned 	cap;
20 };
21 
22 struct stripe_array {
23 	struct stripe 	s[NR_STRIPE];
24 	unsigned 	nr;
25 	struct iovec 	_vec[];
26 };
27 
get_chunk_shift(const struct ublk_queue * q)28 static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
29 {
30 	return (struct stripe_conf *)q->dev->private_data;
31 }
32 
calculate_nr_vec(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod)33 static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
34 		const struct ublksrv_io_desc *iod)
35 {
36 	const unsigned shift = conf->shift - 9;
37 	const unsigned unit_sects = conf->nr_files << shift;
38 	loff_t start = iod->start_sector;
39 	loff_t end = start + iod->nr_sectors;
40 
41 	return (end / unit_sects) - (start / unit_sects) + 1;
42 }
43 
alloc_stripe_array(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod)44 static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
45 		const struct ublksrv_io_desc *iod)
46 {
47 	unsigned nr_vecs = calculate_nr_vec(conf, iod);
48 	unsigned total = nr_vecs * conf->nr_files;
49 	struct stripe_array *s;
50 	int i;
51 
52 	s = malloc(sizeof(*s) + total * sizeof(struct iovec));
53 
54 	s->nr = 0;
55 	for (i = 0; i < conf->nr_files; i++) {
56 		struct stripe *t = &s->s[i];
57 
58 		t->nr_vec = 0;
59 		t->vec = &s->_vec[i * nr_vecs];
60 		t->nr_sects = 0;
61 		t->cap = nr_vecs;
62 	}
63 
64 	return s;
65 }
66 
free_stripe_array(struct stripe_array * s)67 static void free_stripe_array(struct stripe_array *s)
68 {
69 	free(s);
70 }
71 
calculate_stripe_array(const struct stripe_conf * conf,const struct ublksrv_io_desc * iod,struct stripe_array * s,void * base)72 static void calculate_stripe_array(const struct stripe_conf *conf,
73 		const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base)
74 {
75 	const unsigned shift = conf->shift - 9;
76 	const unsigned chunk_sects = 1 << shift;
77 	const unsigned unit_sects = conf->nr_files << shift;
78 	off64_t start = iod->start_sector;
79 	off64_t end = start + iod->nr_sectors;
80 	unsigned long done = 0;
81 	unsigned idx = 0;
82 
83 	while (start < end) {
84 		unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
85 		loff_t unit_off = (start / unit_sects) * unit_sects;
86 		unsigned seq = (start - unit_off) >> shift;
87 		struct stripe *this = &s->s[idx];
88 		loff_t stripe_off = (unit_off / conf->nr_files) +
89 			(start & (chunk_sects - 1));
90 
91 		if (nr_sects > end - start)
92 			nr_sects = end - start;
93 		if (this->nr_sects == 0) {
94 			this->nr_sects = nr_sects;
95 			this->start = stripe_off;
96 			this->seq = seq;
97 			s->nr += 1;
98 		} else {
99 			assert(seq == this->seq);
100 			assert(this->start + this->nr_sects == stripe_off);
101 			this->nr_sects += nr_sects;
102 		}
103 
104 		assert(this->nr_vec < this->cap);
105 		this->vec[this->nr_vec].iov_base = (void *)(base + done);
106 		this->vec[this->nr_vec++].iov_len = nr_sects << 9;
107 
108 		start += nr_sects;
109 		done += nr_sects << 9;
110 		idx = (idx + 1) % conf->nr_files;
111 	}
112 }
113 
stripe_to_uring_op(const struct ublksrv_io_desc * iod,int zc)114 static inline enum io_uring_op stripe_to_uring_op(
115 		const struct ublksrv_io_desc *iod, int zc)
116 {
117 	unsigned ublk_op = ublksrv_get_op(iod);
118 
119 	if (ublk_op == UBLK_IO_OP_READ)
120 		return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
121 	else if (ublk_op == UBLK_IO_OP_WRITE)
122 		return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
123 	assert(0);
124 }
125 
stripe_queue_tgt_rw_io(struct ublk_thread * t,struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)126 static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
127 				  const struct ublksrv_io_desc *iod, int tag)
128 {
129 	const struct stripe_conf *conf = get_chunk_shift(q);
130 	unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
131 	unsigned zc = (ublk_queue_use_zc(q) != 0);
132 	enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc);
133 	struct io_uring_sqe *sqe[NR_STRIPE];
134 	struct stripe_array *s = alloc_stripe_array(conf, iod);
135 	struct ublk_io *io = ublk_get_io(q, tag);
136 	int i, extra = zc ? 2 : 0;
137 	void *base = (zc | auto_zc) ? NULL : (void *)iod->addr;
138 
139 	io->private_data = s;
140 	calculate_stripe_array(conf, iod, s, base);
141 
142 	ublk_io_alloc_sqes(t, sqe, s->nr + extra);
143 
144 	if (zc) {
145 		io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index);
146 		sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
147 		sqe[0]->user_data = build_user_data(tag,
148 			ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
149 	}
150 
151 	for (i = zc; i < s->nr + extra - zc; i++) {
152 		struct stripe *t = &s->s[i - zc];
153 
154 		io_uring_prep_rw(op, sqe[i],
155 				t->seq + 1,
156 				(void *)t->vec,
157 				t->nr_vec,
158 				t->start << 9);
159 		io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
160 		if (auto_zc || zc) {
161 			sqe[i]->buf_index = tag;
162 			if (zc)
163 				sqe[i]->flags |= IOSQE_IO_HARDLINK;
164 		}
165 		/* bit63 marks us as tgt io */
166 		sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, q->q_id, 1);
167 	}
168 	if (zc) {
169 		struct io_uring_sqe *unreg = sqe[s->nr + 1];
170 
171 		io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index);
172 		unreg->user_data = build_user_data(
173 			tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
174 	}
175 
176 	/* register buffer is skip_success */
177 	return s->nr + zc;
178 }
179 
handle_flush(struct ublk_thread * t,struct ublk_queue * q,const struct ublksrv_io_desc * iod,int tag)180 static int handle_flush(struct ublk_thread *t, struct ublk_queue *q,
181 			const struct ublksrv_io_desc *iod, int tag)
182 {
183 	const struct stripe_conf *conf = get_chunk_shift(q);
184 	struct io_uring_sqe *sqe[NR_STRIPE];
185 	int i;
186 
187 	ublk_io_alloc_sqes(t, sqe, conf->nr_files);
188 	for (i = 0; i < conf->nr_files; i++) {
189 		io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
190 		io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
191 		sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, q->q_id, 1);
192 	}
193 	return conf->nr_files;
194 }
195 
stripe_queue_tgt_io(struct ublk_thread * t,struct ublk_queue * q,int tag)196 static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
197 			       int tag)
198 {
199 	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
200 	unsigned ublk_op = ublksrv_get_op(iod);
201 	int ret = 0;
202 
203 	switch (ublk_op) {
204 	case UBLK_IO_OP_FLUSH:
205 		ret = handle_flush(t, q, iod, tag);
206 		break;
207 	case UBLK_IO_OP_WRITE_ZEROES:
208 	case UBLK_IO_OP_DISCARD:
209 		ret = -ENOTSUP;
210 		break;
211 	case UBLK_IO_OP_READ:
212 	case UBLK_IO_OP_WRITE:
213 		ret = stripe_queue_tgt_rw_io(t, q, iod, tag);
214 		break;
215 	default:
216 		ret = -EINVAL;
217 		break;
218 	}
219 	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
220 			iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
221 	return ret;
222 }
223 
ublk_stripe_queue_io(struct ublk_thread * t,struct ublk_queue * q,int tag)224 static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q,
225 				int tag)
226 {
227 	int queued = stripe_queue_tgt_io(t, q, tag);
228 
229 	ublk_queued_tgt_io(t, q, tag, queued);
230 	return 0;
231 }
232 
ublk_stripe_io_done(struct ublk_thread * t,struct ublk_queue * q,const struct io_uring_cqe * cqe)233 static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q,
234 				const struct io_uring_cqe *cqe)
235 {
236 	unsigned tag = user_data_to_tag(cqe->user_data);
237 	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
238 	unsigned op = user_data_to_op(cqe->user_data);
239 	struct ublk_io *io = ublk_get_io(q, tag);
240 	int res = cqe->res;
241 
242 	if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
243 		if (!io->result)
244 			io->result = res;
245 		if (res < 0)
246 			ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
247 	}
248 
249 	/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
250 	if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
251 		io->tgt_ios += 1;
252 
253 	/* fail short READ/WRITE simply */
254 	if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
255 		unsigned seq = user_data_to_tgt_data(cqe->user_data);
256 		struct stripe_array *s = io->private_data;
257 
258 		if (res < s->s[seq].nr_sects << 9) {
259 			io->result = -EIO;
260 			ublk_err("%s: short rw op %u res %d exp %u tag %u\n",
261 					__func__, op, res, s->s[seq].vec->iov_len, tag);
262 		}
263 	}
264 
265 	if (ublk_completed_tgt_io(t, q, tag)) {
266 		int res = io->result;
267 
268 		if (!res)
269 			res = iod->nr_sectors << 9;
270 
271 		ublk_complete_io(t, q, tag, res);
272 
273 		free_stripe_array(io->private_data);
274 		io->private_data = NULL;
275 	}
276 }
277 
ublk_stripe_tgt_init(const struct dev_ctx * ctx,struct ublk_dev * dev)278 static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
279 {
280 	struct ublk_params p = {
281 		.types = UBLK_PARAM_TYPE_BASIC,
282 		.basic = {
283 			.attrs = UBLK_ATTR_VOLATILE_CACHE,
284 			.logical_bs_shift	= 9,
285 			.physical_bs_shift	= 12,
286 			.io_opt_shift	= 12,
287 			.io_min_shift	= 9,
288 			.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
289 		},
290 	};
291 	unsigned chunk_size = ctx->stripe.chunk_size;
292 	struct stripe_conf *conf;
293 	unsigned chunk_shift;
294 	loff_t bytes = 0;
295 	int ret, i, mul = 1;
296 
297 	if (ctx->auto_zc_fallback) {
298 		ublk_err("%s: not support auto_zc_fallback\n", __func__);
299 		return -EINVAL;
300 	}
301 
302 	if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
303 		ublk_err("invalid chunk size %u\n", chunk_size);
304 		return -EINVAL;
305 	}
306 
307 	if (chunk_size < 4096 || chunk_size > 512 * 1024) {
308 		ublk_err("invalid chunk size %u\n", chunk_size);
309 		return -EINVAL;
310 	}
311 
312 	chunk_shift = ilog2(chunk_size);
313 
314 	ret = backing_file_tgt_init(dev);
315 	if (ret)
316 		return ret;
317 
318 	if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
319 		return -EINVAL;
320 
321 	assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
322 
323 	for (i = 0; i < dev->tgt.nr_backing_files; i++)
324 		dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
325 
326 	for (i = 0; i < dev->tgt.nr_backing_files; i++) {
327 		unsigned long size = dev->tgt.backing_file_size[i];
328 
329 		if (size != dev->tgt.backing_file_size[0])
330 			return -EINVAL;
331 		bytes += size;
332 	}
333 
334 	conf = malloc(sizeof(*conf));
335 	conf->shift = chunk_shift;
336 	conf->nr_files = dev->tgt.nr_backing_files;
337 
338 	dev->private_data = conf;
339 	dev->tgt.dev_size = bytes;
340 	p.basic.dev_sectors = bytes >> 9;
341 	dev->tgt.params = p;
342 
343 	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
344 		mul = 2;
345 	dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
346 	dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
347 
348 	printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
349 
350 	return 0;
351 }
352 
ublk_stripe_tgt_deinit(struct ublk_dev * dev)353 static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
354 {
355 	free(dev->private_data);
356 	backing_file_tgt_deinit(dev);
357 }
358 
ublk_stripe_cmd_line(struct dev_ctx * ctx,int argc,char * argv[])359 static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
360 {
361 	static const struct option longopts[] = {
362 		{ "chunk_size", 	1,	NULL,  0  },
363 		{ 0, 0, 0, 0 }
364 	};
365 	int option_idx, opt;
366 
367 	ctx->stripe.chunk_size = 65536;
368 	while ((opt = getopt_long(argc, argv, "",
369 				  longopts, &option_idx)) != -1) {
370 		switch (opt) {
371 		case 0:
372 			if (!strcmp(longopts[option_idx].name, "chunk_size"))
373 				ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
374 		}
375 	}
376 }
377 
ublk_stripe_usage(const struct ublk_tgt_ops * ops)378 static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
379 {
380 	printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
381 }
382 
383 const struct ublk_tgt_ops stripe_tgt_ops = {
384 	.name = "stripe",
385 	.init_tgt = ublk_stripe_tgt_init,
386 	.deinit_tgt = ublk_stripe_tgt_deinit,
387 	.queue_io = ublk_stripe_queue_io,
388 	.tgt_io_done = ublk_stripe_io_done,
389 	.parse_cmd_line = ublk_stripe_cmd_line,
390 	.usage = ublk_stripe_usage,
391 };
392