1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 #ifndef USER_BLK_DRV_CMD_INC_H
3 #define USER_BLK_DRV_CMD_INC_H
4
5 #include <linux/types.h>
6
7 /* ublk server command definition */
8
9 /*
10 * Admin commands, issued by ublk server, and handled by ublk driver.
11 *
12 * Legacy command definition, don't use in new application, and don't
13 * add new such definition any more
14 */
15 #define UBLK_CMD_GET_QUEUE_AFFINITY 0x01
16 #define UBLK_CMD_GET_DEV_INFO 0x02
17 #define UBLK_CMD_ADD_DEV 0x04
18 #define UBLK_CMD_DEL_DEV 0x05
19 #define UBLK_CMD_START_DEV 0x06
20 #define UBLK_CMD_STOP_DEV 0x07
21 #define UBLK_CMD_SET_PARAMS 0x08
22 #define UBLK_CMD_GET_PARAMS 0x09
23 #define UBLK_CMD_START_USER_RECOVERY 0x10
24 #define UBLK_CMD_END_USER_RECOVERY 0x11
25 #define UBLK_CMD_GET_DEV_INFO2 0x12
26
27 /* Any new ctrl command should encode by __IO*() */
28 #define UBLK_U_CMD_GET_QUEUE_AFFINITY \
29 _IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd)
30 #define UBLK_U_CMD_GET_DEV_INFO \
31 _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd)
32 #define UBLK_U_CMD_ADD_DEV \
33 _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd)
34 #define UBLK_U_CMD_DEL_DEV \
35 _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd)
36 #define UBLK_U_CMD_START_DEV \
37 _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd)
38 #define UBLK_U_CMD_STOP_DEV \
39 _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd)
40 #define UBLK_U_CMD_SET_PARAMS \
41 _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd)
42 #define UBLK_U_CMD_GET_PARAMS \
43 _IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd)
44 #define UBLK_U_CMD_START_USER_RECOVERY \
45 _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd)
46 #define UBLK_U_CMD_END_USER_RECOVERY \
47 _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd)
48 #define UBLK_U_CMD_GET_DEV_INFO2 \
49 _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd)
50 #define UBLK_U_CMD_GET_FEATURES \
51 _IOR('u', 0x13, struct ublksrv_ctrl_cmd)
52 #define UBLK_U_CMD_DEL_DEV_ASYNC \
53 _IOR('u', 0x14, struct ublksrv_ctrl_cmd)
54 #define UBLK_U_CMD_UPDATE_SIZE \
55 _IOWR('u', 0x15, struct ublksrv_ctrl_cmd)
56 #define UBLK_U_CMD_QUIESCE_DEV \
57 _IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
58 #define UBLK_U_CMD_TRY_STOP_DEV \
59 _IOWR('u', 0x17, struct ublksrv_ctrl_cmd)
60 /*
61 * Register a shared memory buffer for zero-copy I/O.
62 * Input: ctrl_cmd.addr points to struct ublk_shmem_buf_reg (buffer VA + size)
63 * ctrl_cmd.len = sizeof(struct ublk_shmem_buf_reg)
64 * Result: >= 0 is the assigned buffer index, < 0 is error
65 *
66 * The kernel pins pages from the calling process's address space
67 * and inserts PFN ranges into a per-device maple tree. When a block
68 * request's pages match registered pages, the driver sets
69 * UBLK_IO_F_SHMEM_ZC and encodes the buffer index + offset in addr,
70 * allowing the server to access the data via its own mapping of the
71 * same shared memory — true zero copy.
72 *
73 * The memory can be backed by memfd, hugetlbfs, or any GUP-compatible
74 * shared mapping. Queue freeze is handled internally.
75 *
76 * The buffer VA and size are passed via a user buffer (not inline in
77 * ctrl_cmd) so that unprivileged devices can prepend the device path
78 * to ctrl_cmd.addr without corrupting the VA.
79 */
80 #define UBLK_U_CMD_REG_BUF \
81 _IOWR('u', 0x18, struct ublksrv_ctrl_cmd)
82 /*
83 * Unregister a shared memory buffer.
84 * Input: ctrl_cmd.data[0] = buffer index
85 */
86 #define UBLK_U_CMD_UNREG_BUF \
87 _IOWR('u', 0x19, struct ublksrv_ctrl_cmd)
88
89 /* Parameter buffer for UBLK_U_CMD_REG_BUF, pointed to by ctrl_cmd.addr */
90 struct ublk_shmem_buf_reg {
91 __u64 addr; /* userspace virtual address of shared memory */
92 __u64 len; /* buffer size in bytes, page-aligned, default max 4GB */
93 __u32 flags;
94 __u32 reserved;
95 };
96
97 /* Pin pages without FOLL_WRITE; usable with write-sealed memfd */
98 #define UBLK_SHMEM_BUF_READ_ONLY (1U << 0)
99 /*
100 * 64bits are enough now, and it should be easy to extend in case of
101 * running out of feature flags
102 */
103 #define UBLK_FEATURES_LEN 8
104
105 /*
106 * IO commands, issued by ublk server, and handled by ublk driver.
107 *
108 * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request
109 * from ublk driver, should be issued only when starting device. After
110 * the associated cqe is returned, request's tag can be retrieved via
111 * cqe->userdata.
112 *
113 * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled
114 * this IO request, request's handling result is committed to ublk
115 * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
116 * handled before completing io request.
117 *
118 * NEED_GET_DATA: only used for write requests to set io addr and copy data
119 * When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA
120 * command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA.
121 *
122 * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag
123 * while starting a ublk device.
124 */
125
126 /*
127 * Legacy IO command definition, don't use in new application, and don't
128 * add new such definition any more
129 */
130 #define UBLK_IO_FETCH_REQ 0x20
131 #define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21
132 #define UBLK_IO_NEED_GET_DATA 0x22
133
134 /* Any new IO command should encode by __IOWR() */
135 #define UBLK_U_IO_FETCH_REQ \
136 _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd)
137 #define UBLK_U_IO_COMMIT_AND_FETCH_REQ \
138 _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
139 #define UBLK_U_IO_NEED_GET_DATA \
140 _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
141 #define UBLK_U_IO_REGISTER_IO_BUF \
142 _IOWR('u', 0x23, struct ublksrv_io_cmd)
143 #define UBLK_U_IO_UNREGISTER_IO_BUF \
144 _IOWR('u', 0x24, struct ublksrv_io_cmd)
145
146 /*
147 * return 0 if the command is run successfully, otherwise failure code
148 * is returned
149 */
150 #define UBLK_U_IO_PREP_IO_CMDS \
151 _IOWR('u', 0x25, struct ublk_batch_io)
152 /*
153 * If failure code is returned, nothing in the command buffer is handled.
154 * Otherwise, the returned value means how many bytes in command buffer
155 * are handled actually, then number of handled IOs can be calculated with
156 * `elem_bytes` for each IO. IOs in the remained bytes are not committed,
157 * userspace has to check return value for dealing with partial committing
158 * correctly.
159 */
160 #define UBLK_U_IO_COMMIT_IO_CMDS \
161 _IOWR('u', 0x26, struct ublk_batch_io)
162
163 /*
164 * Fetch io commands to provided buffer in multishot style,
165 * `IORING_URING_CMD_MULTISHOT` is required for this command.
166 */
167 #define UBLK_U_IO_FETCH_IO_CMDS \
168 _IOWR('u', 0x27, struct ublk_batch_io)
169
170 /* only ABORT means that no re-fetch */
171 #define UBLK_IO_RES_OK 0
172 #define UBLK_IO_RES_NEED_GET_DATA 1
173 #define UBLK_IO_RES_ABORT (-ENODEV)
174
175 #define UBLKSRV_CMD_BUF_OFFSET 0
176 #define UBLKSRV_IO_BUF_OFFSET 0x80000000
177
178 /* tag bit is 16bit, so far limit at most 4096 IOs for each queue */
179 #define UBLK_MAX_QUEUE_DEPTH 4096
180
181 /* single IO buffer max size is 32MB */
182 #define UBLK_IO_BUF_OFF 0
183 #define UBLK_IO_BUF_BITS 25
184 #define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1)
185
186 /* so at most 64K IOs for each queue */
187 #define UBLK_TAG_OFF UBLK_IO_BUF_BITS
188 #define UBLK_TAG_BITS 16
189 #define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1)
190
191 /* max 4096 queues */
192 #define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS)
193 #define UBLK_QID_BITS 12
194 #define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1)
195
196 #define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS)
197
198 #define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS)
199 #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS)
200
201 /* Copy to/from request integrity buffer instead of data buffer */
202 #define UBLK_INTEGRITY_FLAG_OFF 62
203 #define UBLKSRV_IO_INTEGRITY_FLAG (1ULL << UBLK_INTEGRITY_FLAG_OFF)
204
205 /*
206 * ublk server can register data buffers for incoming I/O requests with a sparse
207 * io_uring buffer table. The request buffer can then be used as the data buffer
208 * for io_uring operations via the fixed buffer index.
209 * Note that the ublk server can never directly access the request data memory.
210 *
211 * To use this feature, the ublk server must first register a sparse buffer
212 * table on an io_uring instance.
213 * When an incoming ublk request is received, the ublk server submits a
214 * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The
215 * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register
216 * and addr is the index in the io_uring's buffer table to install the buffer.
217 * SQEs can now be submitted to the io_uring to read/write the request's buffer
218 * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or
219 * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index.
220 * Once the last io_uring operation using the request's buffer has completed,
221 * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag,
222 * and addr again specifying the request buffer to unregister.
223 * The ublk request is completed when its buffer is unregistered from all
224 * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ.
225 *
226 * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak
227 * uninitialized kernel memory by not reading into the full request buffer.
228 */
229 #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0)
230
231 /*
232 * Force to complete io cmd via io_uring_cmd_complete_in_task so that
233 * performance comparison is done easily with using task_work_add
234 */
235 #define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1)
236
237 /*
238 * User should issue io cmd again for write requests to
239 * set io buffer address and copy data from bio vectors
240 * to the userspace io buffer.
241 *
242 * In this mode, task_work is not used.
243 */
244 #define UBLK_F_NEED_GET_DATA (1UL << 2)
245
246 /*
247 * - Block devices are recoverable if ublk server exits and restarts
248 * - Outstanding I/O when ublk server exits is met with errors
249 * - I/O issued while there is no ublk server queues
250 */
251 #define UBLK_F_USER_RECOVERY (1UL << 3)
252
253 /*
254 * - Block devices are recoverable if ublk server exits and restarts
255 * - Outstanding I/O when ublk server exits is reissued
256 * - I/O issued while there is no ublk server queues
257 */
258 #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4)
259
260 /*
261 * Unprivileged user can create /dev/ublkcN and /dev/ublkbN.
262 *
263 * /dev/ublk-control needs to be available for unprivileged user, and it
264 * can be done via udev rule to make all control commands available to
265 * unprivileged user. Except for the command of UBLK_CMD_ADD_DEV, all
266 * other commands are only allowed for the owner of the specified device.
267 *
268 * When userspace sends UBLK_CMD_ADD_DEV, the device pair's owner_uid and
269 * owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only
270 * the current user's uid/gid is stored, that said owner of the created
271 * device is always the current user.
272 *
273 * We still need udev rule to apply OWNER/GROUP with the stored owner_uid
274 * and owner_gid.
275 *
276 * Then ublk server can be run as unprivileged user, and /dev/ublkbN can
277 * be accessed and managed by its owner represented by owner_uid/owner_gid.
278 */
279 #define UBLK_F_UNPRIVILEGED_DEV (1UL << 5)
280
281 /* use ioctl encoding for uring command */
282 #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6)
283
284 /*
285 * Copy between request and user buffer by pread()/pwrite()
286 *
287 * Not available for UBLK_F_UNPRIVILEGED_DEV, otherwise userspace may
288 * deceive us by not filling request buffer, then kernel uninitialized
289 * data may be leaked.
290 */
291 #define UBLK_F_USER_COPY (1UL << 7)
292
293 /*
294 * User space sets this flag when setting up the device to request zoned storage support. Kernel may
295 * deny the request by returning an error.
296 */
297 #define UBLK_F_ZONED (1ULL << 8)
298
299 /*
300 * - Block devices are recoverable if ublk server exits and restarts
301 * - Outstanding I/O when ublk server exits is met with errors
302 * - I/O issued while there is no ublk server is met with errors
303 */
304 #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
305
306 /*
307 * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE
308 * New size is passed in cmd->data[0] and is in units of sectors
309 */
310 #define UBLK_F_UPDATE_SIZE (1ULL << 10)
311
312 /*
313 * request buffer is registered automatically to uring_cmd's io_uring
314 * context before delivering this io command to ublk server, meantime
315 * it is un-registered automatically when completing this io command.
316 *
317 * For using this feature:
318 *
319 * - ublk server has to create sparse buffer table on the same `io_ring_ctx`
320 * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`.
321 * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's
322 * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF`
323 * manually, otherwise this ublk request won't complete.
324 *
325 * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
326 * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
327 * the definition of ublk_sqe_addr_to_auto_buf_reg()
328 *
329 * - pass buffer index from `ublk_auto_buf_reg.index`
330 *
331 * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
332 *
333 * - pass flags from `ublk_auto_buf_reg.flags` if needed
334 *
335 * This way avoids extra cost from two uring_cmd, but also simplifies backend
336 * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
337 * IO_UNREGISTER_IO_BUF becomes not necessary.
338 *
339 * If wrong data or flags are provided, both IO_FETCH_REQ and
340 * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
341 * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
342 * successfully
343 */
344 #define UBLK_F_AUTO_BUF_REG (1ULL << 11)
345
346 /*
347 * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device,
348 * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or
349 * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for
350 * handling `UBLK_IO_RES_ABORT` correctly.
351 *
352 * Typical use case is for supporting to upgrade ublk server application,
353 * meantime keep ublk block device persistent during the period.
354 *
355 * This feature is only available when UBLK_F_USER_RECOVERY is enabled.
356 *
357 * Note, this command returns -EBUSY in case that all IO commands are being
358 * handled by ublk server and not completed in specified time period which
359 * is passed from the control command parameter.
360 */
361 #define UBLK_F_QUIESCE (1ULL << 12)
362
363 /*
364 * If this feature is set, ublk_drv supports each (qid,tag) pair having
365 * its own independent daemon task that is responsible for handling it.
366 * If it is not set, daemons are per-queue instead, so for two pairs
367 * (qid1,tag1) and (qid2,tag2), if qid1 == qid2, then the same task must
368 * be responsible for handling (qid1,tag1) and (qid2,tag2).
369 */
370 #define UBLK_F_PER_IO_DAEMON (1ULL << 13)
371
372 /*
373 * If this feature is set, UBLK_U_IO_REGISTER_IO_BUF/UBLK_U_IO_UNREGISTER_IO_BUF
374 * can be issued for an I/O on any task. q_id and tag are also ignored in
375 * UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd.
376 * If it is unset, zero-copy buffers can only be registered and unregistered by
377 * the I/O's daemon task. The q_id and tag of the registered buffer are required
378 * in UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd.
379 */
380 #define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14)
381
382 /*
383 * Support the following commands for delivering & committing io command
384 * in batch.
385 *
386 * - UBLK_U_IO_PREP_IO_CMDS
387 * - UBLK_U_IO_COMMIT_IO_CMDS
388 * - UBLK_U_IO_FETCH_IO_CMDS
389 * - UBLK_U_IO_REGISTER_IO_BUF
390 * - UBLK_U_IO_UNREGISTER_IO_BUF
391 *
392 * The existing UBLK_U_IO_FETCH_REQ, UBLK_U_IO_COMMIT_AND_FETCH_REQ and
393 * UBLK_U_IO_NEED_GET_DATA uring_cmd are not supported for this feature.
394 */
395 #define UBLK_F_BATCH_IO (1ULL << 15)
396
397 /*
398 * ublk device supports requests with integrity/metadata buffer.
399 * Requires UBLK_F_USER_COPY.
400 */
401 #define UBLK_F_INTEGRITY (1ULL << 16)
402
403 /*
404 * The device supports the UBLK_CMD_TRY_STOP_DEV command, which
405 * allows stopping the device only if there are no openers.
406 */
407 #define UBLK_F_SAFE_STOP_DEV (1ULL << 17)
408
409 /* Disable automatic partition scanning when device is started */
410 #define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18)
411
412 /*
413 * Enable shared memory zero copy. When enabled, the server can register
414 * shared memory buffers via UBLK_U_CMD_REG_BUF. If a block request's
415 * pages match a registered buffer, UBLK_IO_F_SHMEM_ZC is set and addr
416 * encodes the buffer index + offset instead of a userspace buffer address.
417 */
418 #define UBLK_F_SHMEM_ZC (1ULL << 19)
419
420 /* device state */
421 #define UBLK_S_DEV_DEAD 0
422 #define UBLK_S_DEV_LIVE 1
423 #define UBLK_S_DEV_QUIESCED 2
424 #define UBLK_S_DEV_FAIL_IO 3
425
426 /* shipped via sqe->cmd of io_uring command */
427 struct ublksrv_ctrl_cmd {
428 /* sent to which device, must be valid */
429 __u32 dev_id;
430
431 /* sent to which queue, must be -1 if the cmd isn't for queue */
432 __u16 queue_id;
433 /*
434 * cmd specific buffer, can be IN or OUT.
435 */
436 __u16 len;
437 __u64 addr;
438
439 /* inline data */
440 __u64 data[1];
441
442 /*
443 * Used for UBLK_F_UNPRIVILEGED_DEV and UBLK_CMD_GET_DEV_INFO2
444 * only, include null char
445 */
446 __u16 dev_path_len;
447 __u16 pad;
448 __u32 reserved;
449 };
450
451 struct ublksrv_ctrl_dev_info {
452 __u16 nr_hw_queues;
453 __u16 queue_depth;
454 __u16 state;
455 __u16 pad0;
456
457 __u32 max_io_buf_bytes;
458 __u32 dev_id;
459
460 __s32 ublksrv_pid;
461 __u32 pad1;
462
463 __u64 flags;
464
465 /* For ublksrv internal use, invisible to ublk driver */
466 __u64 ublksrv_flags;
467
468 __u32 owner_uid; /* store by kernel */
469 __u32 owner_gid; /* store by kernel */
470 __u64 reserved1;
471 __u64 reserved2;
472 };
473
474 #define UBLK_IO_OP_READ 0
475 #define UBLK_IO_OP_WRITE 1
476 #define UBLK_IO_OP_FLUSH 2
477 #define UBLK_IO_OP_DISCARD 3
478 #define UBLK_IO_OP_WRITE_SAME 4
479 #define UBLK_IO_OP_WRITE_ZEROES 5
480 #define UBLK_IO_OP_ZONE_OPEN 10
481 #define UBLK_IO_OP_ZONE_CLOSE 11
482 #define UBLK_IO_OP_ZONE_FINISH 12
483 #define UBLK_IO_OP_ZONE_APPEND 13
484 #define UBLK_IO_OP_ZONE_RESET_ALL 14
485 #define UBLK_IO_OP_ZONE_RESET 15
486 /*
487 * Construct a zone report. The report request is carried in `struct
488 * ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone
489 * and shall indicate the first zone of the report. The `nr_zones` shall
490 * indicate how many zones should be reported at most. The report shall be
491 * delivered as a `struct blk_zone` array. To report fewer zones than requested,
492 * zero the last entry of the returned array.
493 *
494 * Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in
495 * include/uapi/linux/blkzoned.h are part of ublk UAPI.
496 */
497 #define UBLK_IO_OP_REPORT_ZONES 18
498
499 #define UBLK_IO_F_FAILFAST_DEV (1U << 8)
500 #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9)
501 #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10)
502 #define UBLK_IO_F_META (1U << 11)
503 #define UBLK_IO_F_FUA (1U << 13)
504 #define UBLK_IO_F_NOUNMAP (1U << 15)
505 #define UBLK_IO_F_SWAP (1U << 16)
506 /*
507 * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
508 *
509 * This flag is set if auto buffer register is failed & ublk server passes
510 * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
511 * manually for handling the delivered IO command if this flag is observed
512 *
513 * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
514 * passed in.
515 */
516 #define UBLK_IO_F_NEED_REG_BUF (1U << 17)
517 /* Request has an integrity data buffer */
518 #define UBLK_IO_F_INTEGRITY (1UL << 18)
519 /*
520 * I/O buffer is in a registered shared memory buffer. When set, the addr
521 * field in ublksrv_io_desc encodes buffer index and byte offset instead
522 * of a userspace virtual address.
523 */
524 #define UBLK_IO_F_SHMEM_ZC (1U << 19)
525
526 /*
527 * io cmd is described by this structure, and stored in share memory, indexed
528 * by request tag.
529 *
530 * The data is stored by ublk driver, and read by ublksrv after one fetch command
531 * returns.
532 */
533 struct ublksrv_io_desc {
534 /* op: bit 0-7, flags: bit 8-31 */
535 __u32 op_flags;
536
537 union {
538 __u32 nr_sectors;
539 __u32 nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */
540 };
541
542 /* start sector for this io */
543 __u64 start_sector;
544
545 /* buffer address in ublksrv daemon vm space, from ublk driver */
546 __u64 addr;
547 };
548
ublksrv_get_op(const struct ublksrv_io_desc * iod)549 static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod)
550 {
551 return iod->op_flags & 0xff;
552 }
553
ublksrv_get_flags(const struct ublksrv_io_desc * iod)554 static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
555 {
556 return iod->op_flags >> 8;
557 }
558
559 /*
560 * If this flag is set, fallback by completing the uring_cmd and setting
561 * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
562 * otherwise the client ublk request is failed silently
563 *
564 * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
565 * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
566 * ublk server needs to register io buffer manually for handling IO command.
567 */
568 #define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
569 #define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK
570
571 struct ublk_auto_buf_reg {
572 /* index for registering the delivered request buffer */
573 __u16 index;
574 __u8 flags;
575 __u8 reserved0;
576
577 /*
578 * io_ring FD can be passed via the reserve field in future for
579 * supporting to register io buffer to external io_uring
580 */
581 __u32 reserved1;
582 };
583
584 /*
585 * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
586 * uring_cmd's sqe->addr:
587 *
588 * - bit0 ~ bit15: buffer index
589 * - bit16 ~ bit23: flags
590 * - bit24 ~ bit31: reserved0
591 * - bit32 ~ bit63: reserved1
592 */
ublk_sqe_addr_to_auto_buf_reg(__u64 sqe_addr)593 static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
594 __u64 sqe_addr)
595 {
596 struct ublk_auto_buf_reg reg = {
597 .index = (__u16)sqe_addr,
598 .flags = (__u8)(sqe_addr >> 16),
599 .reserved0 = (__u8)(sqe_addr >> 24),
600 .reserved1 = (__u32)(sqe_addr >> 32),
601 };
602
603 return reg;
604 }
605
606 static inline __u64
ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg * buf)607 ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
608 {
609 __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
610 (__u64)buf->reserved1 << 32;
611
612 return addr;
613 }
614
615 /* issued to ublk driver via /dev/ublkcN */
616 struct ublksrv_io_cmd {
617 __u16 q_id;
618
619 /* for fetch/commit which result */
620 __u16 tag;
621
622 /* io result, it is valid for COMMIT* command only */
623 __s32 result;
624
625 union {
626 /*
627 * userspace buffer address in ublksrv daemon process, valid for
628 * FETCH* command only
629 *
630 * `addr` should not be used when UBLK_F_USER_COPY is enabled,
631 * because userspace handles data copy by pread()/pwrite() over
632 * /dev/ublkcN. But in case of UBLK_F_ZONED, this union is
633 * re-used to pass back the allocated LBA for
634 * UBLK_IO_OP_ZONE_APPEND which actually depends on
635 * UBLK_F_USER_COPY
636 */
637 __u64 addr;
638 __u64 zone_append_lba;
639 };
640 };
641
642 struct ublk_elem_header {
643 __u16 tag; /* IO tag */
644
645 /*
646 * Buffer index for incoming io command, only valid iff
647 * UBLK_F_AUTO_BUF_REG is set
648 */
649 __u16 buf_index;
650 __s32 result; /* I/O completion result (commit only) */
651 };
652
653 /*
654 * uring_cmd buffer structure for batch commands
655 *
656 * buffer includes multiple elements, which number is specified by
657 * `nr_elem`. Each element buffer is organized in the following order:
658 *
659 * struct ublk_elem_buffer {
660 * // Mandatory fields (8 bytes)
661 * struct ublk_elem_header header;
662 *
663 * // Optional fields (8 bytes each, included based on flags)
664 *
665 * // Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data
666 * // between ublk request and ublk server buffer
667 * __u64 buf_addr;
668 *
669 * // returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA)
670 * __u64 zone_lba;
671 * }
672 *
673 * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS`
674 */
675 struct ublk_batch_io {
676 __u16 q_id;
677 #define UBLK_BATCH_F_HAS_ZONE_LBA (1 << 0)
678 #define UBLK_BATCH_F_HAS_BUF_ADDR (1 << 1)
679 #define UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK (1 << 2)
680 __u16 flags;
681 __u16 nr_elem;
682 __u8 elem_bytes;
683 __u8 reserved;
684 __u64 reserved2;
685 };
686
687 struct ublk_param_basic {
688 #define UBLK_ATTR_READ_ONLY (1 << 0)
689 #define UBLK_ATTR_ROTATIONAL (1 << 1)
690 #define UBLK_ATTR_VOLATILE_CACHE (1 << 2)
691 #define UBLK_ATTR_FUA (1 << 3)
692 __u32 attrs;
693 __u8 logical_bs_shift;
694 __u8 physical_bs_shift;
695 __u8 io_opt_shift;
696 __u8 io_min_shift;
697
698 __u32 max_sectors;
699 __u32 chunk_sectors;
700
701 __u64 dev_sectors;
702 __u64 virt_boundary_mask;
703 };
704
705 struct ublk_param_discard {
706 __u32 discard_alignment;
707
708 __u32 discard_granularity;
709 __u32 max_discard_sectors;
710
711 __u32 max_write_zeroes_sectors;
712 __u16 max_discard_segments;
713 __u16 reserved0;
714 };
715
716 /*
717 * read-only, can't set via UBLK_CMD_SET_PARAMS, disk_devt is available
718 * after device is started
719 */
720 struct ublk_param_devt {
721 __u32 char_major;
722 __u32 char_minor;
723 __u32 disk_major;
724 __u32 disk_minor;
725 };
726
727 struct ublk_param_zoned {
728 __u32 max_open_zones;
729 __u32 max_active_zones;
730 __u32 max_zone_append_sectors;
731 __u8 reserved[20];
732 };
733
734 struct ublk_param_dma_align {
735 __u32 alignment;
736 __u8 pad[4];
737 };
738
739 #define UBLK_MIN_SEGMENT_SIZE 4096
740 /*
741 * If any one of the three segment parameter is set as 0, the behavior is
742 * undefined.
743 */
744 struct ublk_param_segment {
745 /*
746 * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has
747 * to be >= UBLK_MIN_SEGMENT_SIZE(4096)
748 */
749 __u64 seg_boundary_mask;
750
751 /*
752 * max_segment_size could be override by virt_boundary_mask, so be
753 * careful when setting both.
754 *
755 * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096)
756 */
757 __u32 max_segment_size;
758 __u16 max_segments;
759 __u8 pad[2];
760 };
761
762 struct ublk_param_integrity {
763 __u32 flags; /* LBMD_PI_CAP_* from linux/fs.h */
764 __u16 max_integrity_segments; /* 0 means no limit */
765 __u8 interval_exp;
766 __u8 metadata_size; /* UBLK_PARAM_TYPE_INTEGRITY requires nonzero */
767 __u8 pi_offset;
768 __u8 csum_type; /* LBMD_PI_CSUM_* from linux/fs.h */
769 __u8 tag_size;
770 __u8 pad[5];
771 };
772
773 struct ublk_params {
774 /*
775 * Total length of parameters, userspace has to set 'len' for both
776 * SET_PARAMS and GET_PARAMS command, and driver may update len
777 * if two sides use different version of 'ublk_params', same with
778 * 'types' fields.
779 */
780 __u32 len;
781 #define UBLK_PARAM_TYPE_BASIC (1 << 0)
782 #define UBLK_PARAM_TYPE_DISCARD (1 << 1)
783 #define UBLK_PARAM_TYPE_DEVT (1 << 2)
784 #define UBLK_PARAM_TYPE_ZONED (1 << 3)
785 #define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4)
786 #define UBLK_PARAM_TYPE_SEGMENT (1 << 5)
787 #define UBLK_PARAM_TYPE_INTEGRITY (1 << 6) /* requires UBLK_F_INTEGRITY */
788 __u32 types; /* types of parameter included */
789
790 struct ublk_param_basic basic;
791 struct ublk_param_discard discard;
792 struct ublk_param_devt devt;
793 struct ublk_param_zoned zoned;
794 struct ublk_param_dma_align dma;
795 struct ublk_param_segment seg;
796 struct ublk_param_integrity integrity;
797 };
798
799 /*
800 * Shared memory zero-copy addr encoding for UBLK_IO_F_SHMEM_ZC.
801 *
802 * When UBLK_IO_F_SHMEM_ZC is set, ublksrv_io_desc.addr is encoded as:
803 * bits [0:31] = byte offset within the buffer (up to 4GB)
804 * bits [32:47] = buffer index (up to 65536)
805 * bits [48:63] = reserved (must be zero)
806 */
807 #define UBLK_SHMEM_ZC_OFF_MASK 0xffffffffULL
808 #define UBLK_SHMEM_ZC_IDX_OFF 32
809 #define UBLK_SHMEM_ZC_IDX_MASK 0xffffULL
810
ublk_shmem_zc_addr(__u16 index,__u32 offset)811 static inline __u64 ublk_shmem_zc_addr(__u16 index, __u32 offset)
812 {
813 return ((__u64)index << UBLK_SHMEM_ZC_IDX_OFF) | offset;
814 }
815
ublk_shmem_zc_index(__u64 addr)816 static inline __u16 ublk_shmem_zc_index(__u64 addr)
817 {
818 return (addr >> UBLK_SHMEM_ZC_IDX_OFF) & UBLK_SHMEM_ZC_IDX_MASK;
819 }
820
ublk_shmem_zc_offset(__u64 addr)821 static inline __u32 ublk_shmem_zc_offset(__u64 addr)
822 {
823 return (__u32)(addr & UBLK_SHMEM_ZC_OFF_MASK);
824 }
825
826 #endif
827