xref: /linux/include/uapi/linux/ublk_cmd.h (revision 7fe6ac157b7e15c8976bd62ad7cb98e248884e83)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 #ifndef USER_BLK_DRV_CMD_INC_H
3 #define USER_BLK_DRV_CMD_INC_H
4 
5 #include <linux/types.h>
6 
7 /* ublk server command definition */
8 
9 /*
10  * Admin commands, issued by ublk server, and handled by ublk driver.
11  *
12  * Legacy command definition, don't use in new application, and don't
13  * add new such definition any more
14  */
15 #define	UBLK_CMD_GET_QUEUE_AFFINITY	0x01
16 #define	UBLK_CMD_GET_DEV_INFO	0x02
17 #define	UBLK_CMD_ADD_DEV		0x04
18 #define	UBLK_CMD_DEL_DEV		0x05
19 #define	UBLK_CMD_START_DEV	0x06
20 #define	UBLK_CMD_STOP_DEV	0x07
21 #define	UBLK_CMD_SET_PARAMS	0x08
22 #define	UBLK_CMD_GET_PARAMS	0x09
23 #define	UBLK_CMD_START_USER_RECOVERY	0x10
24 #define	UBLK_CMD_END_USER_RECOVERY	0x11
25 #define	UBLK_CMD_GET_DEV_INFO2		0x12
26 
27 /* Any new ctrl command should encode by __IO*() */
28 #define UBLK_U_CMD_GET_QUEUE_AFFINITY	\
29 	_IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd)
30 #define UBLK_U_CMD_GET_DEV_INFO		\
31 	_IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd)
32 #define UBLK_U_CMD_ADD_DEV		\
33 	_IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd)
34 #define UBLK_U_CMD_DEL_DEV		\
35 	_IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd)
36 #define UBLK_U_CMD_START_DEV		\
37 	_IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd)
38 #define UBLK_U_CMD_STOP_DEV		\
39 	_IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd)
40 #define UBLK_U_CMD_SET_PARAMS		\
41 	_IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd)
42 #define UBLK_U_CMD_GET_PARAMS		\
43 	_IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd)
44 #define UBLK_U_CMD_START_USER_RECOVERY	\
45 	_IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd)
46 #define UBLK_U_CMD_END_USER_RECOVERY	\
47 	_IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd)
48 #define UBLK_U_CMD_GET_DEV_INFO2	\
49 	_IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd)
50 #define UBLK_U_CMD_GET_FEATURES	\
51 	_IOR('u', 0x13, struct ublksrv_ctrl_cmd)
52 #define UBLK_U_CMD_DEL_DEV_ASYNC	\
53 	_IOR('u', 0x14, struct ublksrv_ctrl_cmd)
54 #define UBLK_U_CMD_UPDATE_SIZE		\
55 	_IOWR('u', 0x15, struct ublksrv_ctrl_cmd)
56 #define UBLK_U_CMD_QUIESCE_DEV		\
57 	_IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
58 #define UBLK_U_CMD_TRY_STOP_DEV		\
59 	_IOWR('u', 0x17, struct ublksrv_ctrl_cmd)
60 /*
61  * Register a shared memory buffer for zero-copy I/O.
62  * Input:  ctrl_cmd.addr points to struct ublk_shmem_buf_reg (buffer VA + size)
63  *         ctrl_cmd.len  = sizeof(struct ublk_shmem_buf_reg)
64  * Result: >= 0 is the assigned buffer index, < 0 is error
65  *
66  * The kernel pins pages from the calling process's address space
67  * and inserts PFN ranges into a per-device maple tree. When a block
68  * request's pages match registered pages, the driver sets
69  * UBLK_IO_F_SHMEM_ZC and encodes the buffer index + offset in addr,
70  * allowing the server to access the data via its own mapping of the
71  * same shared memory — true zero copy.
72  *
73  * The memory can be backed by memfd, hugetlbfs, or any GUP-compatible
74  * shared mapping. Queue freeze is handled internally.
75  *
76  * The buffer VA and size are passed via a user buffer (not inline in
77  * ctrl_cmd) so that unprivileged devices can prepend the device path
78  * to ctrl_cmd.addr without corrupting the VA.
79  */
80 #define UBLK_U_CMD_REG_BUF		\
81 	_IOWR('u', 0x18, struct ublksrv_ctrl_cmd)
82 /*
83  * Unregister a shared memory buffer.
84  * Input:  ctrl_cmd.data[0] = buffer index
85  */
86 #define UBLK_U_CMD_UNREG_BUF		\
87 	_IOWR('u', 0x19, struct ublksrv_ctrl_cmd)
88 
89 /* Parameter buffer for UBLK_U_CMD_REG_BUF, pointed to by ctrl_cmd.addr */
90 struct ublk_shmem_buf_reg {
91 	__u64	addr;	/* userspace virtual address of shared memory */
92 	__u64	len;	/* buffer size in bytes, page-aligned, default max 4GB */
93 	__u32	flags;
94 	__u32	reserved;
95 };
96 
97 /* Pin pages without FOLL_WRITE; usable with write-sealed memfd */
98 #define UBLK_SHMEM_BUF_READ_ONLY	(1U << 0)
99 /*
100  * 64bits are enough now, and it should be easy to extend in case of
101  * running out of feature flags
102  */
103 #define UBLK_FEATURES_LEN  8
104 
105 /*
106  * IO commands, issued by ublk server, and handled by ublk driver.
107  *
108  * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request
109  *      from ublk driver, should be issued only when starting device. After
110  *      the associated cqe is returned, request's tag can be retrieved via
111  *      cqe->userdata.
112  *
113  * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled
114  *      this IO request, request's handling result is committed to ublk
115  *      driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
116  *      handled before completing io request.
117  *
118  * NEED_GET_DATA: only used for write requests to set io addr and copy data
119  *      When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA
120  *      command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA.
121  *
122  *      It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag
123  *      while starting a ublk device.
124  */
125 
126 /*
127  * Legacy IO command definition, don't use in new application, and don't
128  * add new such definition any more
129  */
130 #define	UBLK_IO_FETCH_REQ		0x20
131 #define	UBLK_IO_COMMIT_AND_FETCH_REQ	0x21
132 #define	UBLK_IO_NEED_GET_DATA	0x22
133 
134 /* Any new IO command should encode by __IOWR() */
135 #define	UBLK_U_IO_FETCH_REQ		\
136 	_IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd)
137 #define	UBLK_U_IO_COMMIT_AND_FETCH_REQ	\
138 	_IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
139 #define	UBLK_U_IO_NEED_GET_DATA		\
140 	_IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
141 #define	UBLK_U_IO_REGISTER_IO_BUF	\
142 	_IOWR('u', 0x23, struct ublksrv_io_cmd)
143 #define	UBLK_U_IO_UNREGISTER_IO_BUF	\
144 	_IOWR('u', 0x24, struct ublksrv_io_cmd)
145 
146 /*
147  * return 0 if the command is run successfully, otherwise failure code
148  * is returned
149  */
150 #define	UBLK_U_IO_PREP_IO_CMDS	\
151 	_IOWR('u', 0x25, struct ublk_batch_io)
152 /*
153  * If failure code is returned, nothing in the command buffer is handled.
154  * Otherwise, the returned value means how many bytes in command buffer
155  * are handled actually, then number of handled IOs can be calculated with
156  * `elem_bytes` for each IO. IOs in the remained bytes are not committed,
157  * userspace has to check return value for dealing with partial committing
158  * correctly.
159  */
160 #define	UBLK_U_IO_COMMIT_IO_CMDS	\
161 	_IOWR('u', 0x26, struct ublk_batch_io)
162 
163 /*
164  * Fetch io commands to provided buffer in multishot style,
165  * `IORING_URING_CMD_MULTISHOT` is required for this command.
166  */
167 #define	UBLK_U_IO_FETCH_IO_CMDS 	\
168 	_IOWR('u', 0x27, struct ublk_batch_io)
169 
170 /* only ABORT means that no re-fetch */
171 #define UBLK_IO_RES_OK			0
172 #define UBLK_IO_RES_NEED_GET_DATA	1
173 #define UBLK_IO_RES_ABORT		(-ENODEV)
174 
175 #define UBLKSRV_CMD_BUF_OFFSET	0
176 #define UBLKSRV_IO_BUF_OFFSET	0x80000000
177 
178 /* tag bit is 16bit, so far limit at most 4096 IOs for each queue */
179 #define UBLK_MAX_QUEUE_DEPTH	4096
180 
181 /* single IO buffer max size is 32MB */
182 #define UBLK_IO_BUF_OFF		0
183 #define UBLK_IO_BUF_BITS	25
184 #define UBLK_IO_BUF_BITS_MASK	((1ULL << UBLK_IO_BUF_BITS) - 1)
185 
186 /* so at most 64K IOs for each queue */
187 #define UBLK_TAG_OFF		UBLK_IO_BUF_BITS
188 #define UBLK_TAG_BITS		16
189 #define UBLK_TAG_BITS_MASK	((1ULL << UBLK_TAG_BITS) - 1)
190 
191 /* max 4096 queues */
192 #define UBLK_QID_OFF		(UBLK_TAG_OFF + UBLK_TAG_BITS)
193 #define UBLK_QID_BITS		12
194 #define UBLK_QID_BITS_MASK	((1ULL << UBLK_QID_BITS) - 1)
195 
196 #define UBLK_MAX_NR_QUEUES	(1U << UBLK_QID_BITS)
197 
198 #define UBLKSRV_IO_BUF_TOTAL_BITS	(UBLK_QID_OFF + UBLK_QID_BITS)
199 #define UBLKSRV_IO_BUF_TOTAL_SIZE	(1ULL << UBLKSRV_IO_BUF_TOTAL_BITS)
200 
201 /* Copy to/from request integrity buffer instead of data buffer */
202 #define UBLK_INTEGRITY_FLAG_OFF 62
203 #define UBLKSRV_IO_INTEGRITY_FLAG (1ULL << UBLK_INTEGRITY_FLAG_OFF)
204 
205 /*
206  * ublk server can register data buffers for incoming I/O requests with a sparse
207  * io_uring buffer table. The request buffer can then be used as the data buffer
208  * for io_uring operations via the fixed buffer index.
209  * Note that the ublk server can never directly access the request data memory.
210  *
211  * To use this feature, the ublk server must first register a sparse buffer
212  * table on an io_uring instance.
213  * When an incoming ublk request is received, the ublk server submits a
214  * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The
215  * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register
216  * and addr is the index in the io_uring's buffer table to install the buffer.
217  * SQEs can now be submitted to the io_uring to read/write the request's buffer
218  * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or
219  * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index.
220  * Once the last io_uring operation using the request's buffer has completed,
221  * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag,
222  * and addr again specifying the request buffer to unregister.
223  * The ublk request is completed when its buffer is unregistered from all
224  * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ.
225  *
226  * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak
227  * uninitialized kernel memory by not reading into the full request buffer.
228  */
229 #define UBLK_F_SUPPORT_ZERO_COPY	(1ULL << 0)
230 
231 /*
232  * Force to complete io cmd via io_uring_cmd_complete_in_task so that
233  * performance comparison is done easily with using task_work_add
234  */
235 #define UBLK_F_URING_CMD_COMP_IN_TASK	(1ULL << 1)
236 
237 /*
238  * User should issue io cmd again for write requests to
239  * set io buffer address and copy data from bio vectors
240  * to the userspace io buffer.
241  *
242  * In this mode, task_work is not used.
243  */
244 #define UBLK_F_NEED_GET_DATA (1UL << 2)
245 
246 /*
247  * - Block devices are recoverable if ublk server exits and restarts
248  * - Outstanding I/O when ublk server exits is met with errors
249  * - I/O issued while there is no ublk server queues
250  */
251 #define UBLK_F_USER_RECOVERY	(1UL << 3)
252 
253 /*
254  * - Block devices are recoverable if ublk server exits and restarts
255  * - Outstanding I/O when ublk server exits is reissued
256  * - I/O issued while there is no ublk server queues
257  */
258 #define UBLK_F_USER_RECOVERY_REISSUE	(1UL << 4)
259 
260 /*
261  * Unprivileged user can create /dev/ublkcN and /dev/ublkbN.
262  *
263  * /dev/ublk-control needs to be available for unprivileged user, and it
264  * can be done via udev rule to make all control commands available to
265  * unprivileged user. Except for the command of UBLK_CMD_ADD_DEV, all
266  * other commands are only allowed for the owner of the specified device.
267  *
268  * When userspace sends UBLK_CMD_ADD_DEV, the device pair's owner_uid and
269  * owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only
270  * the current user's uid/gid is stored, that said owner of the created
271  * device is always the current user.
272  *
273  * We still need udev rule to apply OWNER/GROUP with the stored owner_uid
274  * and owner_gid.
275  *
276  * Then ublk server can be run as unprivileged user, and /dev/ublkbN can
277  * be accessed and managed by its owner represented by owner_uid/owner_gid.
278  */
279 #define UBLK_F_UNPRIVILEGED_DEV	(1UL << 5)
280 
281 /* use ioctl encoding for uring command */
282 #define UBLK_F_CMD_IOCTL_ENCODE	(1UL << 6)
283 
284 /*
285  *  Copy between request and user buffer by pread()/pwrite()
286  *
287  *  Not available for UBLK_F_UNPRIVILEGED_DEV, otherwise userspace may
288  *  deceive us by not filling request buffer, then kernel uninitialized
289  *  data may be leaked.
290  */
291 #define UBLK_F_USER_COPY	(1UL << 7)
292 
293 /*
294  * User space sets this flag when setting up the device to request zoned storage support. Kernel may
295  * deny the request by returning an error.
296  */
297 #define UBLK_F_ZONED (1ULL << 8)
298 
299 /*
300  * - Block devices are recoverable if ublk server exits and restarts
301  * - Outstanding I/O when ublk server exits is met with errors
302  * - I/O issued while there is no ublk server is met with errors
303  */
304 #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
305 
306 /*
307  * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE
308  * New size is passed in cmd->data[0] and is in units of sectors
309  */
310 #define UBLK_F_UPDATE_SIZE		 (1ULL << 10)
311 
312 /*
313  * request buffer is registered automatically to uring_cmd's io_uring
314  * context before delivering this io command to ublk server, meantime
315  * it is un-registered automatically when completing this io command.
316  *
317  * For using this feature:
318  *
319  * - ublk server has to create sparse buffer table on the same `io_ring_ctx`
320  *   for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`.
321  *   If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's
322  *   responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF`
323  *   manually, otherwise this ublk request won't complete.
324  *
325  * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
326  *   `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
327  *   the definition of ublk_sqe_addr_to_auto_buf_reg()
328  *
329  * - pass buffer index from `ublk_auto_buf_reg.index`
330  *
331  * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
332  *
333  * - pass flags from `ublk_auto_buf_reg.flags` if needed
334  *
335  * This way avoids extra cost from two uring_cmd, but also simplifies backend
336  * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
337  * IO_UNREGISTER_IO_BUF becomes not necessary.
338  *
339  * If wrong data or flags are provided, both IO_FETCH_REQ and
340  * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
341  * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
342  * successfully
343  */
344 #define UBLK_F_AUTO_BUF_REG 	(1ULL << 11)
345 
346 /*
347  * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device,
348  * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or
349  * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for
350  * handling `UBLK_IO_RES_ABORT` correctly.
351  *
352  * Typical use case is for supporting to upgrade ublk server application,
353  * meantime keep ublk block device persistent during the period.
354  *
355  * This feature is only available when UBLK_F_USER_RECOVERY is enabled.
356  *
357  * Note, this command returns -EBUSY in case that all IO commands are being
358  * handled by ublk server and not completed in specified time period which
359  * is passed from the control command parameter.
360  */
361 #define UBLK_F_QUIESCE		(1ULL << 12)
362 
363 /*
364  * If this feature is set, ublk_drv supports each (qid,tag) pair having
365  * its own independent daemon task that is responsible for handling it.
366  * If it is not set, daemons are per-queue instead, so for two pairs
367  * (qid1,tag1) and (qid2,tag2), if qid1 == qid2, then the same task must
368  * be responsible for handling (qid1,tag1) and (qid2,tag2).
369  */
370 #define UBLK_F_PER_IO_DAEMON (1ULL << 13)
371 
372 /*
373  * If this feature is set, UBLK_U_IO_REGISTER_IO_BUF/UBLK_U_IO_UNREGISTER_IO_BUF
374  * can be issued for an I/O on any task. q_id and tag are also ignored in
375  * UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd.
376  * If it is unset, zero-copy buffers can only be registered and unregistered by
377  * the I/O's daemon task. The q_id and tag of the registered buffer are required
378  * in UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd.
379  */
380 #define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14)
381 
382 /*
383  * Support the following commands for delivering & committing io command
384  * in batch.
385  *
386  * 	- UBLK_U_IO_PREP_IO_CMDS
387  * 	- UBLK_U_IO_COMMIT_IO_CMDS
388  * 	- UBLK_U_IO_FETCH_IO_CMDS
389  * 	- UBLK_U_IO_REGISTER_IO_BUF
390  * 	- UBLK_U_IO_UNREGISTER_IO_BUF
391  *
392  * The existing UBLK_U_IO_FETCH_REQ, UBLK_U_IO_COMMIT_AND_FETCH_REQ and
393  * UBLK_U_IO_NEED_GET_DATA uring_cmd are not supported for this feature.
394  */
395 #define UBLK_F_BATCH_IO		(1ULL << 15)
396 
397 /*
398  * ublk device supports requests with integrity/metadata buffer.
399  * Requires UBLK_F_USER_COPY.
400  */
401 #define UBLK_F_INTEGRITY (1ULL << 16)
402 
403 /*
404  * The device supports the UBLK_CMD_TRY_STOP_DEV command, which
405  * allows stopping the device only if there are no openers.
406  */
407 #define UBLK_F_SAFE_STOP_DEV	(1ULL << 17)
408 
409 /* Disable automatic partition scanning when device is started */
410 #define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18)
411 
412 /*
413  * Enable shared memory zero copy. When enabled, the server can register
414  * shared memory buffers via UBLK_U_CMD_REG_BUF. If a block request's
415  * pages match a registered buffer, UBLK_IO_F_SHMEM_ZC is set and addr
416  * encodes the buffer index + offset instead of a userspace buffer address.
417  */
418 #define UBLK_F_SHMEM_ZC	(1ULL << 19)
419 
420 /* device state */
421 #define UBLK_S_DEV_DEAD	0
422 #define UBLK_S_DEV_LIVE	1
423 #define UBLK_S_DEV_QUIESCED	2
424 #define UBLK_S_DEV_FAIL_IO 	3
425 
426 /* shipped via sqe->cmd of io_uring command */
427 struct ublksrv_ctrl_cmd {
428 	/* sent to which device, must be valid */
429 	__u32	dev_id;
430 
431 	/* sent to which queue, must be -1 if the cmd isn't for queue */
432 	__u16	queue_id;
433 	/*
434 	 * cmd specific buffer, can be IN or OUT.
435 	 */
436 	__u16	len;
437 	__u64	addr;
438 
439 	/* inline data */
440 	__u64	data[1];
441 
442 	/*
443 	 * Used for UBLK_F_UNPRIVILEGED_DEV and UBLK_CMD_GET_DEV_INFO2
444 	 * only, include null char
445 	 */
446 	__u16	dev_path_len;
447 	__u16	pad;
448 	__u32	reserved;
449 };
450 
451 struct ublksrv_ctrl_dev_info {
452 	__u16	nr_hw_queues;
453 	__u16	queue_depth;
454 	__u16	state;
455 	__u16	pad0;
456 
457 	__u32	max_io_buf_bytes;
458 	__u32	dev_id;
459 
460 	__s32	ublksrv_pid;
461 	__u32	pad1;
462 
463 	__u64	flags;
464 
465 	/* For ublksrv internal use, invisible to ublk driver */
466 	__u64	ublksrv_flags;
467 
468 	__u32	owner_uid;	/* store by kernel */
469 	__u32	owner_gid;	/* store by kernel */
470 	__u64	reserved1;
471 	__u64   reserved2;
472 };
473 
474 #define		UBLK_IO_OP_READ		0
475 #define		UBLK_IO_OP_WRITE		1
476 #define		UBLK_IO_OP_FLUSH		2
477 #define		UBLK_IO_OP_DISCARD		3
478 #define		UBLK_IO_OP_WRITE_SAME		4
479 #define		UBLK_IO_OP_WRITE_ZEROES		5
480 #define		UBLK_IO_OP_ZONE_OPEN		10
481 #define		UBLK_IO_OP_ZONE_CLOSE		11
482 #define		UBLK_IO_OP_ZONE_FINISH		12
483 #define		UBLK_IO_OP_ZONE_APPEND		13
484 #define		UBLK_IO_OP_ZONE_RESET_ALL	14
485 #define		UBLK_IO_OP_ZONE_RESET		15
486 /*
487  * Construct a zone report. The report request is carried in `struct
488  * ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone
489  * and shall indicate the first zone of the report. The `nr_zones` shall
490  * indicate how many zones should be reported at most. The report shall be
491  * delivered as a `struct blk_zone` array. To report fewer zones than requested,
492  * zero the last entry of the returned array.
493  *
494  * Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in
495  * include/uapi/linux/blkzoned.h are part of ublk UAPI.
496  */
497 #define		UBLK_IO_OP_REPORT_ZONES		18
498 
499 #define		UBLK_IO_F_FAILFAST_DEV		(1U << 8)
500 #define		UBLK_IO_F_FAILFAST_TRANSPORT	(1U << 9)
501 #define		UBLK_IO_F_FAILFAST_DRIVER	(1U << 10)
502 #define		UBLK_IO_F_META			(1U << 11)
503 #define		UBLK_IO_F_FUA			(1U << 13)
504 #define		UBLK_IO_F_NOUNMAP		(1U << 15)
505 #define		UBLK_IO_F_SWAP			(1U << 16)
506 /*
507  * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
508  *
509  * This flag is set if auto buffer register is failed & ublk server passes
510  * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
511  * manually for handling the delivered IO command if this flag is observed
512  *
513  * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
514  * passed in.
515  */
516 #define		UBLK_IO_F_NEED_REG_BUF		(1U << 17)
517 /* Request has an integrity data buffer */
518 #define		UBLK_IO_F_INTEGRITY		(1UL << 18)
519 /*
520  * I/O buffer is in a registered shared memory buffer. When set, the addr
521  * field in ublksrv_io_desc encodes buffer index and byte offset instead
522  * of a userspace virtual address.
523  */
524 #define		UBLK_IO_F_SHMEM_ZC		(1U << 19)
525 
526 /*
527  * io cmd is described by this structure, and stored in share memory, indexed
528  * by request tag.
529  *
530  * The data is stored by ublk driver, and read by ublksrv after one fetch command
531  * returns.
532  */
533 struct ublksrv_io_desc {
534 	/* op: bit 0-7, flags: bit 8-31 */
535 	__u32		op_flags;
536 
537 	union {
538 		__u32		nr_sectors;
539 		__u32		nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */
540 	};
541 
542 	/* start sector for this io */
543 	__u64		start_sector;
544 
545 	/* buffer address in ublksrv daemon vm space, from ublk driver */
546 	__u64		addr;
547 };
548 
ublksrv_get_op(const struct ublksrv_io_desc * iod)549 static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod)
550 {
551 	return iod->op_flags & 0xff;
552 }
553 
ublksrv_get_flags(const struct ublksrv_io_desc * iod)554 static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
555 {
556 	return iod->op_flags >> 8;
557 }
558 
559 /*
560  * If this flag is set, fallback by completing the uring_cmd and setting
561  * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
562  * otherwise the client ublk request is failed silently
563  *
564  * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
565  * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
566  * ublk server needs to register io buffer manually for handling IO command.
567  */
568 #define UBLK_AUTO_BUF_REG_FALLBACK 	(1 << 0)
569 #define UBLK_AUTO_BUF_REG_F_MASK 	UBLK_AUTO_BUF_REG_FALLBACK
570 
571 struct ublk_auto_buf_reg {
572 	/* index for registering the delivered request buffer */
573 	__u16  index;
574 	__u8   flags;
575 	__u8   reserved0;
576 
577 	/*
578 	 * io_ring FD can be passed via the reserve field in future for
579 	 * supporting to register io buffer to external io_uring
580 	 */
581 	__u32  reserved1;
582 };
583 
584 /*
585  * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
586  * uring_cmd's sqe->addr:
587  *
588  * 	- bit0 ~ bit15: buffer index
589  * 	- bit16 ~ bit23: flags
590  * 	- bit24 ~ bit31: reserved0
591  * 	- bit32 ~ bit63: reserved1
592  */
ublk_sqe_addr_to_auto_buf_reg(__u64 sqe_addr)593 static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
594 		__u64 sqe_addr)
595 {
596 	struct ublk_auto_buf_reg reg = {
597 		.index = (__u16)sqe_addr,
598 		.flags = (__u8)(sqe_addr >> 16),
599 		.reserved0 = (__u8)(sqe_addr >> 24),
600 		.reserved1 = (__u32)(sqe_addr >> 32),
601 	};
602 
603 	return reg;
604 }
605 
606 static inline __u64
ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg * buf)607 ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
608 {
609 	__u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
610 		(__u64)buf->reserved1 << 32;
611 
612 	return addr;
613 }
614 
615 /* issued to ublk driver via /dev/ublkcN */
616 struct ublksrv_io_cmd {
617 	__u16	q_id;
618 
619 	/* for fetch/commit which result */
620 	__u16	tag;
621 
622 	/* io result, it is valid for COMMIT* command only */
623 	__s32	result;
624 
625 	union {
626 		/*
627 		 * userspace buffer address in ublksrv daemon process, valid for
628 		 * FETCH* command only
629 		 *
630 		 * `addr` should not be used when UBLK_F_USER_COPY is enabled,
631 		 * because userspace handles data copy by pread()/pwrite() over
632 		 * /dev/ublkcN. But in case of UBLK_F_ZONED, this union is
633 		 * re-used to pass back the allocated LBA for
634 		 * UBLK_IO_OP_ZONE_APPEND which actually depends on
635 		 * UBLK_F_USER_COPY
636 		 */
637 		__u64	addr;
638 		__u64	zone_append_lba;
639 	};
640 };
641 
642 struct ublk_elem_header {
643 	__u16 tag;	/* IO tag */
644 
645 	/*
646 	 * Buffer index for incoming io command, only valid iff
647 	 * UBLK_F_AUTO_BUF_REG is set
648 	 */
649 	__u16 buf_index;
650 	__s32 result;	/* I/O completion result (commit only) */
651 };
652 
653 /*
654  * uring_cmd buffer structure for batch commands
655  *
656  * buffer includes multiple elements, which number is specified by
657  * `nr_elem`. Each element buffer is organized in the following order:
658  *
659  * struct ublk_elem_buffer {
660  * 	// Mandatory fields (8 bytes)
661  * 	struct ublk_elem_header header;
662  *
663  * 	// Optional fields (8 bytes each, included based on flags)
664  *
665  * 	// Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data
666  * 	// between ublk request and ublk server buffer
667  * 	__u64 buf_addr;
668  *
669  * 	// returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA)
670  * 	__u64 zone_lba;
671  * }
672  *
673  * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS`
674  */
675 struct ublk_batch_io {
676 	__u16  q_id;
677 #define UBLK_BATCH_F_HAS_ZONE_LBA	(1 << 0)
678 #define UBLK_BATCH_F_HAS_BUF_ADDR 	(1 << 1)
679 #define UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK	(1 << 2)
680 	__u16	flags;
681 	__u16	nr_elem;
682 	__u8	elem_bytes;
683 	__u8	reserved;
684 	__u64   reserved2;
685 };
686 
687 struct ublk_param_basic {
688 #define UBLK_ATTR_READ_ONLY            (1 << 0)
689 #define UBLK_ATTR_ROTATIONAL           (1 << 1)
690 #define UBLK_ATTR_VOLATILE_CACHE       (1 << 2)
691 #define UBLK_ATTR_FUA                  (1 << 3)
692 	__u32	attrs;
693 	__u8	logical_bs_shift;
694 	__u8	physical_bs_shift;
695 	__u8	io_opt_shift;
696 	__u8	io_min_shift;
697 
698 	__u32	max_sectors;
699 	__u32	chunk_sectors;
700 
701 	__u64   dev_sectors;
702 	__u64   virt_boundary_mask;
703 };
704 
705 struct ublk_param_discard {
706 	__u32	discard_alignment;
707 
708 	__u32	discard_granularity;
709 	__u32	max_discard_sectors;
710 
711 	__u32	max_write_zeroes_sectors;
712 	__u16	max_discard_segments;
713 	__u16	reserved0;
714 };
715 
716 /*
717  * read-only, can't set via UBLK_CMD_SET_PARAMS, disk_devt is available
718  * after device is started
719  */
720 struct ublk_param_devt {
721 	__u32   char_major;
722 	__u32   char_minor;
723 	__u32   disk_major;
724 	__u32   disk_minor;
725 };
726 
727 struct ublk_param_zoned {
728 	__u32	max_open_zones;
729 	__u32	max_active_zones;
730 	__u32	max_zone_append_sectors;
731 	__u8	reserved[20];
732 };
733 
734 struct ublk_param_dma_align {
735 	__u32	alignment;
736 	__u8	pad[4];
737 };
738 
739 #define UBLK_MIN_SEGMENT_SIZE   4096
740 /*
741  * If any one of the three segment parameter is set as 0, the behavior is
742  * undefined.
743  */
744 struct ublk_param_segment {
745 	/*
746 	 * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has
747 	 * to be >= UBLK_MIN_SEGMENT_SIZE(4096)
748 	 */
749 	__u64 	seg_boundary_mask;
750 
751 	/*
752 	 * max_segment_size could be override by virt_boundary_mask, so be
753 	 * careful when setting both.
754 	 *
755 	 * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096)
756 	 */
757 	__u32 	max_segment_size;
758 	__u16 	max_segments;
759 	__u8	pad[2];
760 };
761 
762 struct ublk_param_integrity {
763 	__u32	flags; /* LBMD_PI_CAP_* from linux/fs.h */
764 	__u16	max_integrity_segments; /* 0 means no limit */
765 	__u8	interval_exp;
766 	__u8	metadata_size; /* UBLK_PARAM_TYPE_INTEGRITY requires nonzero */
767 	__u8	pi_offset;
768 	__u8	csum_type; /* LBMD_PI_CSUM_* from linux/fs.h */
769 	__u8	tag_size;
770 	__u8	pad[5];
771 };
772 
773 struct ublk_params {
774 	/*
775 	 * Total length of parameters, userspace has to set 'len' for both
776 	 * SET_PARAMS and GET_PARAMS command, and driver may update len
777 	 * if two sides use different version of 'ublk_params', same with
778 	 * 'types' fields.
779 	 */
780 	__u32	len;
781 #define UBLK_PARAM_TYPE_BASIC           (1 << 0)
782 #define UBLK_PARAM_TYPE_DISCARD         (1 << 1)
783 #define UBLK_PARAM_TYPE_DEVT            (1 << 2)
784 #define UBLK_PARAM_TYPE_ZONED           (1 << 3)
785 #define UBLK_PARAM_TYPE_DMA_ALIGN       (1 << 4)
786 #define UBLK_PARAM_TYPE_SEGMENT         (1 << 5)
787 #define UBLK_PARAM_TYPE_INTEGRITY       (1 << 6) /* requires UBLK_F_INTEGRITY */
788 	__u32	types;			/* types of parameter included */
789 
790 	struct ublk_param_basic		basic;
791 	struct ublk_param_discard	discard;
792 	struct ublk_param_devt		devt;
793 	struct ublk_param_zoned	zoned;
794 	struct ublk_param_dma_align	dma;
795 	struct ublk_param_segment	seg;
796 	struct ublk_param_integrity	integrity;
797 };
798 
799 /*
800  * Shared memory zero-copy addr encoding for UBLK_IO_F_SHMEM_ZC.
801  *
802  * When UBLK_IO_F_SHMEM_ZC is set, ublksrv_io_desc.addr is encoded as:
803  *   bits [0:31]  = byte offset within the buffer (up to 4GB)
804  *   bits [32:47] = buffer index (up to 65536)
805  *   bits [48:63] = reserved (must be zero)
806  */
807 #define UBLK_SHMEM_ZC_OFF_MASK		0xffffffffULL
808 #define UBLK_SHMEM_ZC_IDX_OFF		32
809 #define UBLK_SHMEM_ZC_IDX_MASK		0xffffULL
810 
ublk_shmem_zc_addr(__u16 index,__u32 offset)811 static inline __u64 ublk_shmem_zc_addr(__u16 index, __u32 offset)
812 {
813 	return ((__u64)index << UBLK_SHMEM_ZC_IDX_OFF) | offset;
814 }
815 
ublk_shmem_zc_index(__u64 addr)816 static inline __u16 ublk_shmem_zc_index(__u64 addr)
817 {
818 	return (addr >> UBLK_SHMEM_ZC_IDX_OFF) & UBLK_SHMEM_ZC_IDX_MASK;
819 }
820 
ublk_shmem_zc_offset(__u64 addr)821 static inline __u32 ublk_shmem_zc_offset(__u64 addr)
822 {
823 	return (__u32)(addr & UBLK_SHMEM_ZC_OFF_MASK);
824 }
825 
826 #endif
827