1 #ifndef VHOST_H
2 #define VHOST_H
3
4 #include "hw/virtio/vhost-backend.h"
5 #include "hw/virtio/virtio.h"
6 #include "system/memory.h"
7
8 #define VHOST_F_DEVICE_IOTLB 63
9 #define VHOST_USER_F_PROTOCOL_FEATURES 30
10
11 #define VU_REALIZE_CONN_RETRIES 3
12
13 /* Generic structures common for any vhost based device. */
14
15 struct vhost_inflight {
16 int fd;
17 void *addr;
18 uint64_t size;
19 uint64_t offset;
20 uint16_t queue_size;
21 };
22
23 struct vhost_virtqueue {
24 int kick;
25 int call;
26 void *desc;
27 void *avail;
28 void *used;
29 int num;
30 unsigned long long desc_phys;
31 unsigned desc_size;
32 unsigned long long avail_phys;
33 unsigned avail_size;
34 unsigned long long used_phys;
35 unsigned used_size;
36 EventNotifier masked_notifier;
37 EventNotifier error_notifier;
38 EventNotifier masked_config_notifier;
39 struct vhost_dev *dev;
40 };
41
42 typedef unsigned long vhost_log_chunk_t;
43 #define VHOST_LOG_PAGE 0x1000
44 #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
45 #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
46 #define VHOST_INVALID_FEATURE_BIT (0xff)
47 #define VHOST_QUEUE_NUM_CONFIG_INR 0
48
49 struct vhost_log {
50 unsigned long long size;
51 int refcnt;
52 int fd;
53 vhost_log_chunk_t *log;
54 };
55
56 struct vhost_dev;
57 struct vhost_iommu {
58 struct vhost_dev *hdev;
59 MemoryRegion *mr;
60 hwaddr iommu_offset;
61 IOMMUNotifier n;
62 QLIST_ENTRY(vhost_iommu) iommu_next;
63 };
64
65 typedef struct VhostDevConfigOps {
66 /* Vhost device config space changed callback
67 */
68 int (*vhost_dev_config_notifier)(struct vhost_dev *dev);
69 } VhostDevConfigOps;
70
71 struct vhost_memory;
72
73 /**
74 * struct vhost_dev - common vhost_dev structure
75 * @vhost_ops: backend specific ops
76 * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier)
77 */
78 struct vhost_dev {
79 VirtIODevice *vdev;
80 MemoryListener memory_listener;
81 MemoryListener iommu_listener;
82 struct vhost_memory *mem;
83 int n_mem_sections;
84 MemoryRegionSection *mem_sections;
85 int n_tmp_sections;
86 MemoryRegionSection *tmp_sections;
87 struct vhost_virtqueue *vqs;
88 unsigned int nvqs;
89 /* the first virtqueue which would be used by this vhost dev */
90 int vq_index;
91 /* one past the last vq index for the virtio device (not vhost) */
92 int vq_index_end;
93 /* if non-zero, minimum required value for max_queues */
94 int num_queues;
95 /**
96 * vhost feature handling requires matching the feature set
97 * offered by a backend which may be a subset of the total
98 * features eventually offered to the guest.
99 *
100 * @features: available features provided by the backend
101 * @acked_features: final negotiated features with front-end driver
102 *
103 * @backend_features: this is used in a couple of places to either
104 * store VHOST_USER_F_PROTOCOL_FEATURES to apply to
105 * VHOST_USER_SET_FEATURES or VHOST_NET_F_VIRTIO_NET_HDR. Its
106 * future use should be discouraged and the variable retired as
107 * its easy to confuse with the VirtIO backend_features.
108 */
109 uint64_t features;
110 uint64_t acked_features;
111 uint64_t backend_features;
112
113 /**
114 * @protocol_features: is the vhost-user only feature set by
115 * VHOST_USER_SET_PROTOCOL_FEATURES. Protocol features are only
116 * negotiated if VHOST_USER_F_PROTOCOL_FEATURES has been offered
117 * by the backend (see @features).
118 */
119 uint64_t protocol_features;
120
121 uint64_t max_queues;
122 uint64_t backend_cap;
123 /* @started: is the vhost device started? */
124 bool started;
125 bool log_enabled;
126 uint64_t log_size;
127 Error *migration_blocker;
128 const VhostOps *vhost_ops;
129 void *opaque;
130 struct vhost_log *log;
131 QLIST_ENTRY(vhost_dev) entry;
132 QLIST_ENTRY(vhost_dev) logdev_entry;
133 QLIST_HEAD(, vhost_iommu) iommu_list;
134 IOMMUNotifier n;
135 const VhostDevConfigOps *config_ops;
136 };
137
138 extern const VhostOps kernel_ops;
139 extern const VhostOps user_ops;
140 extern const VhostOps vdpa_ops;
141
142 struct vhost_net {
143 struct vhost_dev dev;
144 struct vhost_virtqueue vqs[2];
145 int backend;
146 NetClientState *nc;
147 };
148
149 /**
150 * vhost_dev_init() - initialise the vhost interface
151 * @hdev: the common vhost_dev structure
152 * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa)
153 * @backend_type: type of backend
154 * @busyloop_timeout: timeout for polling virtqueue
155 * @errp: error handle
156 *
157 * The initialisation of the vhost device will trigger the
158 * initialisation of the backend and potentially capability
159 * negotiation of backend interface. Configuration of the VirtIO
160 * itself won't happen until the interface is started.
161 *
162 * Return: 0 on success, non-zero on error while setting errp.
163 */
164 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
165 VhostBackendType backend_type,
166 uint32_t busyloop_timeout, Error **errp);
167
168 /**
169 * vhost_dev_cleanup() - tear down and cleanup vhost interface
170 * @hdev: the common vhost_dev structure
171 */
172 void vhost_dev_cleanup(struct vhost_dev *hdev);
173
174 void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
175 VirtIODevice *vdev,
176 unsigned int nvqs);
177
178 /**
179 * vhost_dev_enable_notifiers() - enable event notifiers
180 * @hdev: common vhost_dev structure
181 * @vdev: the VirtIODevice structure
182 *
183 * Enable notifications directly to the vhost device rather than being
184 * triggered by QEMU itself. Notifications should be enabled before
185 * the vhost device is started via @vhost_dev_start.
186 *
187 * Return: 0 on success, < 0 on error.
188 */
189 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
190
191 /**
192 * vhost_dev_disable_notifiers - disable event notifications
193 * @hdev: common vhost_dev structure
194 * @vdev: the VirtIODevice structure
195 *
196 * Disable direct notifications to vhost device.
197 */
198 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
199 bool vhost_config_pending(struct vhost_dev *hdev);
200 void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask);
201
202 /**
203 * vhost_dev_is_started() - report status of vhost device
204 * @hdev: common vhost_dev structure
205 *
206 * Return the started status of the vhost device
207 */
vhost_dev_is_started(struct vhost_dev * hdev)208 static inline bool vhost_dev_is_started(struct vhost_dev *hdev)
209 {
210 return hdev->started;
211 }
212
213 /**
214 * vhost_dev_start() - start the vhost device
215 * @hdev: common vhost_dev structure
216 * @vdev: the VirtIODevice structure
217 * @vrings: true to have vrings enabled in this call
218 *
219 * Starts the vhost device. From this point VirtIO feature negotiation
220 * can start and the device can start processing VirtIO transactions.
221 *
222 * Return: 0 on success, < 0 on error.
223 */
224 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
225
226 /**
227 * vhost_dev_stop() - stop the vhost device
228 * @hdev: common vhost_dev structure
229 * @vdev: the VirtIODevice structure
230 * @vrings: true to have vrings disabled in this call
231 *
232 * Stop the vhost device. After the device is stopped the notifiers
233 * can be disabled (@vhost_dev_disable_notifiers) and the device can
234 * be torn down (@vhost_dev_cleanup).
235 *
236 * Return: 0 on success, != 0 on error when stopping dev.
237 */
238 int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
239
240 /**
241 * DOC: vhost device configuration handling
242 *
243 * The VirtIO device configuration space is used for rarely changing
244 * or initialisation time parameters. The configuration can be updated
245 * by either the guest driver or the device itself. If the device can
246 * change the configuration over time the vhost handler should
247 * register a @VhostDevConfigOps structure with
248 * @vhost_dev_set_config_notifier so the guest can be notified. Some
249 * devices register a handler anyway and will signal an error if an
250 * unexpected config change happens.
251 */
252
253 /**
254 * vhost_dev_get_config() - fetch device configuration
255 * @hdev: common vhost_dev_structure
256 * @config: pointer to device appropriate config structure
257 * @config_len: size of device appropriate config structure
258 *
259 * Return: 0 on success, < 0 on error while setting errp
260 */
261 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
262 uint32_t config_len, Error **errp);
263
264 /**
265 * vhost_dev_set_config() - set device configuration
266 * @hdev: common vhost_dev_structure
267 * @data: pointer to data to set
268 * @offset: offset into configuration space
269 * @size: length of set
270 * @flags: @VhostSetConfigType flags
271 *
272 * By use of @offset/@size a subset of the configuration space can be
273 * written to. The @flags are used to indicate if it is a normal
274 * transaction or related to migration.
275 *
276 * Return: 0 on success, non-zero on error
277 */
278 int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
279 uint32_t offset, uint32_t size, uint32_t flags);
280
281 /**
282 * vhost_dev_set_config_notifier() - register VhostDevConfigOps
283 * @hdev: common vhost_dev_structure
284 * @ops: notifier ops
285 *
286 * If the device is expected to change configuration a notifier can be
287 * setup to handle the case.
288 */
289 void vhost_dev_set_config_notifier(struct vhost_dev *dev,
290 const VhostDevConfigOps *ops);
291
292
293 /* Test and clear masked event pending status.
294 * Should be called after unmask to avoid losing events.
295 */
296 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
297
298 /* Mask/unmask events from this vq.
299 */
300 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
301 bool mask);
302
303 /**
304 * vhost_get_features() - return a sanitised set of feature bits
305 * @hdev: common vhost_dev structure
306 * @feature_bits: pointer to terminated table of feature bits
307 * @features: original feature set
308 *
309 * This returns a set of features bits that is an intersection of what
310 * is supported by the vhost backend (hdev->features), the supported
311 * feature_bits and the requested feature set.
312 */
313 uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
314 uint64_t features);
315
316 /**
317 * vhost_ack_features() - set vhost acked_features
318 * @hdev: common vhost_dev structure
319 * @feature_bits: pointer to terminated table of feature bits
320 * @features: requested feature set
321 *
322 * This sets the internal hdev->acked_features to the intersection of
323 * the backends advertised features and the supported feature_bits.
324 */
325 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
326 uint64_t features);
327 unsigned int vhost_get_max_memslots(void);
328 unsigned int vhost_get_free_memslots(void);
329
330 int vhost_net_set_backend(struct vhost_dev *hdev,
331 struct vhost_vring_file *file);
332
333 void vhost_toggle_device_iotlb(VirtIODevice *vdev);
334 int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
335
336 int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
337 struct vhost_virtqueue *vq, unsigned idx);
338 int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
339 struct vhost_virtqueue *vq, unsigned idx);
340
341 void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
342 void vhost_dev_free_inflight(struct vhost_inflight *inflight);
343 int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev);
344 int vhost_dev_set_inflight(struct vhost_dev *dev,
345 struct vhost_inflight *inflight);
346 int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
347 struct vhost_inflight *inflight);
348 bool vhost_dev_has_iommu(struct vhost_dev *dev);
349
350 #ifdef CONFIG_VHOST
351 int vhost_reset_device(struct vhost_dev *hdev);
352 #else
vhost_reset_device(struct vhost_dev * hdev)353 static inline int vhost_reset_device(struct vhost_dev *hdev)
354 {
355 return -ENOSYS;
356 }
357 #endif /* CONFIG_VHOST */
358
359 /**
360 * vhost_supports_device_state(): Checks whether the back-end supports
361 * transferring internal device state for the purpose of migration.
362 * Support for this feature is required for vhost_set_device_state_fd()
363 * and vhost_check_device_state().
364 *
365 * @dev: The vhost device
366 *
367 * Returns true if the device supports these commands, and false if it
368 * does not.
369 */
370 #ifdef CONFIG_VHOST
371 bool vhost_supports_device_state(struct vhost_dev *dev);
372 #else
vhost_supports_device_state(struct vhost_dev * dev)373 static inline bool vhost_supports_device_state(struct vhost_dev *dev)
374 {
375 return false;
376 }
377 #endif
378
379 /**
380 * vhost_set_device_state_fd(): Begin transfer of internal state from/to
381 * the back-end for the purpose of migration. Data is to be transferred
382 * over a pipe according to @direction and @phase. The sending end must
383 * only write to the pipe, and the receiving end must only read from it.
384 * Once the sending end is done, it closes its FD. The receiving end
385 * must take this as the end-of-transfer signal and close its FD, too.
386 *
387 * @fd is the back-end's end of the pipe: The write FD for SAVE, and the
388 * read FD for LOAD. This function transfers ownership of @fd to the
389 * back-end, i.e. closes it in the front-end.
390 *
391 * The back-end may optionally reply with an FD of its own, if this
392 * improves efficiency on its end. In this case, the returned FD is
393 * stored in *reply_fd. The back-end will discard the FD sent to it,
394 * and the front-end must use *reply_fd for transferring state to/from
395 * the back-end.
396 *
397 * @dev: The vhost device
398 * @direction: The direction in which the state is to be transferred.
399 * For outgoing migrations, this is SAVE, and data is read
400 * from the back-end and stored by the front-end in the
401 * migration stream.
402 * For incoming migrations, this is LOAD, and data is read
403 * by the front-end from the migration stream and sent to
404 * the back-end to restore the saved state.
405 * @phase: Which migration phase we are in. Currently, there is only
406 * STOPPED (device and all vrings are stopped), in the future,
407 * more phases such as PRE_COPY or POST_COPY may be added.
408 * @fd: Back-end's end of the pipe through which to transfer state; note
409 * that ownership is transferred to the back-end, so this function
410 * closes @fd in the front-end.
411 * @reply_fd: If the back-end wishes to use a different pipe for state
412 * transfer, this will contain an FD for the front-end to
413 * use. Otherwise, -1 is stored here.
414 * @errp: Potential error description
415 *
416 * Returns 0 on success, and -errno on failure.
417 */
418 int vhost_set_device_state_fd(struct vhost_dev *dev,
419 VhostDeviceStateDirection direction,
420 VhostDeviceStatePhase phase,
421 int fd,
422 int *reply_fd,
423 Error **errp);
424
425 /**
426 * vhost_set_device_state_fd(): After transferring state from/to the
427 * back-end via vhost_set_device_state_fd(), i.e. once the sending end
428 * has closed the pipe, inquire the back-end to report any potential
429 * errors that have occurred on its side. This allows to sense errors
430 * like:
431 * - During outgoing migration, when the source side had already started
432 * to produce its state, something went wrong and it failed to finish
433 * - During incoming migration, when the received state is somehow
434 * invalid and cannot be processed by the back-end
435 *
436 * @dev: The vhost device
437 * @errp: Potential error description
438 *
439 * Returns 0 when the back-end reports successful state transfer and
440 * processing, and -errno when an error occurred somewhere.
441 */
442 int vhost_check_device_state(struct vhost_dev *dev, Error **errp);
443
444 /**
445 * vhost_save_backend_state(): High-level function to receive a vhost
446 * back-end's state, and save it in @f. Uses
447 * `vhost_set_device_state_fd()` to get the data from the back-end, and
448 * stores it in consecutive chunks that are each prefixed by their
449 * respective length (be32). The end is marked by a 0-length chunk.
450 *
451 * Must only be called while the device and all its vrings are stopped
452 * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
453 *
454 * @dev: The vhost device from which to save the state
455 * @f: Migration stream in which to save the state
456 * @errp: Potential error message
457 *
458 * Returns 0 on success, and -errno otherwise.
459 */
460 #ifdef CONFIG_VHOST
461 int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
462 #else
vhost_save_backend_state(struct vhost_dev * dev,QEMUFile * f,Error ** errp)463 static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f,
464 Error **errp)
465 {
466 return -ENOSYS;
467 }
468 #endif
469
470 /**
471 * vhost_load_backend_state(): High-level function to load a vhost
472 * back-end's state from @f, and send it over to the back-end. Reads
473 * the data from @f in the format used by `vhost_save_state()`, and uses
474 * `vhost_set_device_state_fd()` to transfer it to the back-end.
475 *
476 * Must only be called while the device and all its vrings are stopped
477 * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
478 *
479 * @dev: The vhost device to which to send the state
480 * @f: Migration stream from which to load the state
481 * @errp: Potential error message
482 *
483 * Returns 0 on success, and -errno otherwise.
484 */
485 #ifdef CONFIG_VHOST
486 int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
487 #else
vhost_load_backend_state(struct vhost_dev * dev,QEMUFile * f,Error ** errp)488 static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f,
489 Error **errp)
490 {
491 return -ENOSYS;
492 }
493 #endif
494
495 #endif
496