1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Implements the virtqueue interface as basically described
31 * in the original VirtIO paper.
32 */
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/sdt.h>
40 #include <sys/sglist.h>
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43
44 #include <machine/cpu.h>
45 #include <machine/bus.h>
46 #include <machine/atomic.h>
47 #include <machine/resource.h>
48 #include <sys/bus.h>
49 #include <sys/rman.h>
50
51 #include <dev/virtio/virtio.h>
52 #include <dev/virtio/virtqueue.h>
53 #include <dev/virtio/virtio_ring.h>
54
55 #include "virtio_bus_if.h"
56
57 struct virtqueue {
58 device_t vq_dev;
59 struct mtx vq_ring_mtx;
60 struct mtx vq_indirect_mtx;
61 uint16_t vq_queue_index;
62 uint16_t vq_nentries;
63 uint32_t vq_flags;
64 #define VIRTQUEUE_FLAG_MODERN 0x0001
65 #define VIRTQUEUE_FLAG_INDIRECT 0x0002
66 #define VIRTQUEUE_FLAG_EVENT_IDX 0x0004
67
68 int vq_max_indirect_size;
69 bus_size_t vq_notify_offset;
70 virtqueue_intr_t *vq_intrhand;
71 void *vq_intrhand_arg;
72
73 struct vring vq_ring;
74 uint16_t vq_free_cnt;
75 uint16_t vq_queued_cnt;
76 /*
77 * Head of the free chain in the descriptor table. If
78 * there are no free descriptors, this will be set to
79 * VQ_RING_DESC_CHAIN_END.
80 */
81 uint16_t vq_desc_head_idx;
82 /*
83 * Last consumed descriptor in the used table,
84 * trails vq_ring.used->idx.
85 */
86 uint16_t vq_used_cons_idx;
87
88 void *vq_ring_mem;
89 bus_dmamap_t vq_ring_mapp;
90 vm_paddr_t vq_ring_paddr;
91
92 int vq_indirect_mem_size;
93 int vq_alignment;
94 int vq_ring_size;
95 char vq_name[VIRTQUEUE_MAX_NAME_SZ];
96
97 bus_dma_tag_t vq_ring_dmat;
98 bus_dma_tag_t vq_indirect_dmat;
99
100 struct vq_desc_extra {
101 void *cookie;
102 struct vring_desc *indirect;
103 vm_paddr_t indirect_paddr;
104 bus_dmamap_t mapp;
105 uint16_t ndescs;
106 } vq_descx[0];
107 };
108
109 /*
110 * The maximum virtqueue size is 2^15. Use that value as the end of
111 * descriptor chain terminator since it will never be a valid index
112 * in the descriptor table. This is used to verify we are correctly
113 * handling vq_free_cnt.
114 */
115 #define VQ_RING_DESC_CHAIN_END 32768
116
117 #define VQASSERT(_vq, _exp, _msg, ...) \
118 KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \
119 ##__VA_ARGS__))
120
121 #define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \
122 VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \
123 "invalid ring index: %d, max: %d", (_idx), \
124 (_vq)->vq_nentries)
125
126 #define VQ_RING_ASSERT_CHAIN_TERM(_vq) \
127 VQASSERT((_vq), (_vq)->vq_desc_head_idx == \
128 VQ_RING_DESC_CHAIN_END, "full ring terminated " \
129 "incorrectly: head idx: %d", (_vq)->vq_desc_head_idx)
130
131 static int virtqueue_init_indirect(struct virtqueue *vq, int);
132 static void virtqueue_free_indirect(struct virtqueue *vq);
133 static void virtqueue_init_indirect_list(struct virtqueue *,
134 struct vring_desc *);
135
136 static void vq_ring_init(struct virtqueue *);
137 static void vq_ring_update_avail(struct virtqueue *, uint16_t);
138 static uint16_t vq_ring_enqueue_segments(struct virtqueue *,
139 struct vring_desc *, uint16_t, struct sglist *, int, int);
140 static bool vq_ring_use_indirect(struct virtqueue *, int);
141 static void vq_ring_enqueue_indirect(struct virtqueue *, void *,
142 struct sglist *, int, int);
143 static int vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
144 static int vq_ring_must_notify_host(struct virtqueue *);
145 static void vq_ring_notify_host(struct virtqueue *);
146 static void vq_ring_free_chain(struct virtqueue *, uint16_t);
147
148 SDT_PROVIDER_DEFINE(virtqueue);
149 SDT_PROBE_DEFINE6(virtqueue, , enqueue_segments, entry, "struct virtqueue *",
150 "struct vring_desc *", "uint16_t", "struct sglist *", "int", "int");
151 SDT_PROBE_DEFINE1(virtqueue, , enqueue_segments, return, "uint16_t");
152
153 #define vq_modern(_vq) (((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0)
154 #define vq_htog16(_vq, _val) virtio_htog16(vq_modern(_vq), _val)
155 #define vq_htog32(_vq, _val) virtio_htog32(vq_modern(_vq), _val)
156 #define vq_htog64(_vq, _val) virtio_htog64(vq_modern(_vq), _val)
157 #define vq_gtoh16(_vq, _val) virtio_gtoh16(vq_modern(_vq), _val)
158 #define vq_gtoh32(_vq, _val) virtio_gtoh32(vq_modern(_vq), _val)
159 #define vq_gtoh64(_vq, _val) virtio_gtoh64(vq_modern(_vq), _val)
160
161 static void
virtqueue_ring_load_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)162 virtqueue_ring_load_callback(void *arg, bus_dma_segment_t *segs,
163 int nsegs, int error)
164 {
165 struct virtqueue *vq;
166
167 if (error != 0)
168 return;
169
170 KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
171
172 vq = (struct virtqueue *)arg;
173 vq->vq_ring_paddr = segs[0].ds_addr;
174 }
175
176 int
virtqueue_alloc(device_t dev,uint16_t queue,uint16_t size,bus_size_t notify_offset,int align,vm_paddr_t highaddr,struct vq_alloc_info * info,struct virtqueue ** vqp)177 virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size,
178 bus_size_t notify_offset, int align, vm_paddr_t highaddr,
179 struct vq_alloc_info *info, struct virtqueue **vqp)
180 {
181 struct virtqueue *vq;
182 int error;
183
184 *vqp = NULL;
185 error = 0;
186
187 if (size == 0) {
188 device_printf(dev,
189 "virtqueue %d (%s) does not exist (size is zero)\n",
190 queue, info->vqai_name);
191 return (ENODEV);
192 } else if (!powerof2(size)) {
193 device_printf(dev,
194 "virtqueue %d (%s) size is not a power of 2: %d\n",
195 queue, info->vqai_name, size);
196 return (ENXIO);
197 } else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) {
198 device_printf(dev, "virtqueue %d (%s) requested too many "
199 "indirect descriptors: %d, max %d\n",
200 queue, info->vqai_name, info->vqai_maxindirsz,
201 VIRTIO_MAX_INDIRECT);
202 return (EINVAL);
203 }
204
205 vq = malloc(sizeof(struct virtqueue) +
206 size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO);
207 if (vq == NULL) {
208 device_printf(dev, "cannot allocate virtqueue\n");
209 return (ENOMEM);
210 }
211
212 vq->vq_dev = dev;
213 strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name));
214 vq->vq_queue_index = queue;
215 vq->vq_notify_offset = notify_offset;
216 vq->vq_alignment = align;
217 vq->vq_nentries = size;
218 vq->vq_free_cnt = size;
219 vq->vq_intrhand = info->vqai_intr;
220 vq->vq_intrhand_arg = info->vqai_intr_arg;
221
222 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0)
223 vq->vq_flags |= VIRTQUEUE_FLAG_MODERN;
224 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
225 vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
226
227 vq->vq_ring_size = round_page(vring_size(size, align));
228
229 mtx_init(&vq->vq_ring_mtx, device_get_nameunit(dev),
230 "VirtIO Queue Lock", MTX_DEF);
231
232 error = bus_dma_tag_create(
233 bus_get_dma_tag(dev), /* parent */
234 align, /* alignment */
235 0, /* boundary */
236 BUS_SPACE_MAXADDR, /* lowaddr */
237 BUS_SPACE_MAXADDR, /* highaddr */
238 NULL, NULL, /* filter, filterarg */
239 vq->vq_ring_size, /* max request size */
240 1, /* max # segments */
241 vq->vq_ring_size, /* maxsegsize */
242 BUS_DMA_COHERENT, /* flags */
243 busdma_lock_mutex, /* lockfunc */
244 &vq->vq_ring_mtx, /* lockarg */
245 &vq->vq_ring_dmat);
246 if (error) {
247 device_printf(dev, "cannot create bus_dma_tag\n");
248 goto fail;
249 }
250
251 #ifdef __powerpc__
252 /*
253 * Virtio uses physical addresses rather than bus addresses, so we
254 * need to ask busdma to skip the iommu physical->bus mapping. At
255 * present, this is only a thing on the powerpc architectures.
256 */
257 bus_dma_tag_set_iommu(vq->vq_ring_dmat, NULL, NULL);
258 #endif
259
260 if (info->vqai_maxindirsz > 1) {
261 error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
262 if (error)
263 goto fail;
264 }
265
266 error = bus_dmamem_alloc(vq->vq_ring_dmat, &vq->vq_ring_mem,
267 BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT,
268 &vq->vq_ring_mapp);
269 if (error) {
270 device_printf(dev, "bus_dmamem_alloc failed\n");
271 goto fail;
272 }
273
274 error = bus_dmamap_load(vq->vq_ring_dmat, vq->vq_ring_mapp,
275 vq->vq_ring_mem, vq->vq_ring_size, virtqueue_ring_load_callback,
276 vq, BUS_DMA_NOWAIT);
277 if (error) {
278 device_printf(dev, "vq->vq_ring_mapp load failed\n");
279 goto fail;
280 }
281
282 vq_ring_init(vq);
283 virtqueue_disable_intr(vq);
284
285 *vqp = vq;
286
287 fail:
288 if (error)
289 virtqueue_free(vq);
290
291 return (error);
292 }
293
294 static void
virtqueue_indirect_load_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)295 virtqueue_indirect_load_callback(void *arg, bus_dma_segment_t *segs,
296 int nsegs, int error)
297 {
298 struct vq_desc_extra *dxp;
299
300 if (error != 0)
301 return;
302
303 KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
304
305 dxp = (struct vq_desc_extra *)arg;
306 dxp->indirect_paddr = segs[0].ds_addr;
307 }
308
309 static int
virtqueue_init_indirect(struct virtqueue * vq,int indirect_size)310 virtqueue_init_indirect(struct virtqueue *vq, int indirect_size)
311 {
312 device_t dev;
313 struct vq_desc_extra *dxp;
314 int i, size;
315 int error;
316 int align;
317
318 dev = vq->vq_dev;
319
320 if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
321 /*
322 * Indirect descriptors requested by the driver but not
323 * negotiated. Return zero to keep the initialization
324 * going: we'll run fine without.
325 */
326 if (bootverbose)
327 device_printf(dev, "virtqueue %d (%s) requested "
328 "indirect descriptors but not negotiated\n",
329 vq->vq_queue_index, vq->vq_name);
330 return (0);
331 }
332
333 size = indirect_size * sizeof(struct vring_desc);
334 vq->vq_max_indirect_size = indirect_size;
335 vq->vq_indirect_mem_size = size;
336 vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT;
337
338 mtx_init(&vq->vq_indirect_mtx, device_get_nameunit(dev),
339 "VirtIO Indirect Queue Lock", MTX_DEF);
340
341 align = size;
342 error = bus_dma_tag_create(
343 bus_get_dma_tag(dev), /* parent */
344 roundup_pow_of_two(align), /* alignment */
345 0, /* boundary */
346 BUS_SPACE_MAXADDR, /* lowaddr */
347 BUS_SPACE_MAXADDR, /* highaddr */
348 NULL, NULL, /* filter, filterarg */
349 size, /* max request size */
350 1, /* max # segments */
351 size, /* maxsegsize */
352 BUS_DMA_COHERENT, /* flags */
353 busdma_lock_mutex, /* lockfunc */
354 &vq->vq_indirect_mtx, /* lockarg */
355 &vq->vq_indirect_dmat);
356 if (error) {
357 device_printf(dev, "cannot create indirect bus_dma_tag\n");
358 return (error);
359 }
360
361 #ifdef __powerpc__
362 /*
363 * Virtio uses physical addresses rather than bus addresses, so we
364 * need to ask busdma to skip the iommu physical->bus mapping. At
365 * present, this is only a thing on the powerpc architectures.
366 */
367 bus_dma_tag_set_iommu(vq->vq_indirect_dmat, NULL, NULL);
368 #endif
369
370 for (i = 0; i < vq->vq_nentries; i++) {
371 dxp = &vq->vq_descx[i];
372
373 error = bus_dmamem_alloc(vq->vq_indirect_dmat,
374 (void **)&dxp->indirect,
375 BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT,
376 &dxp->mapp);
377 if (error) {
378 panic("dxp->mapp alloc failed\n");
379 return (error);
380 }
381
382 error = bus_dmamap_load(vq->vq_indirect_dmat, dxp->mapp,
383 dxp->indirect, size, virtqueue_indirect_load_callback, dxp,
384 BUS_DMA_NOWAIT);
385 if (error) {
386 panic("dxp->mapp load failed\n");
387 bus_dmamem_free(vq->vq_indirect_dmat, dxp->indirect,
388 dxp->mapp);
389 dxp->indirect = NULL;
390 return (error);
391 }
392
393 virtqueue_init_indirect_list(vq, dxp->indirect);
394 }
395
396 return (0);
397 }
398
399 static void
virtqueue_free_indirect(struct virtqueue * vq)400 virtqueue_free_indirect(struct virtqueue *vq)
401 {
402 struct vq_desc_extra *dxp;
403 int i;
404
405 for (i = 0; i < vq->vq_nentries; i++) {
406 dxp = &vq->vq_descx[i];
407
408 if (dxp->indirect == NULL)
409 break;
410
411 bus_dmamap_unload(vq->vq_indirect_dmat, dxp->mapp);
412 bus_dmamem_free(vq->vq_indirect_dmat, dxp->indirect, dxp->mapp);
413 dxp->indirect = NULL;
414 dxp->indirect_paddr = 0;
415 }
416
417 vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT;
418 vq->vq_indirect_mem_size = 0;
419 }
420
421 static void
virtqueue_init_indirect_list(struct virtqueue * vq,struct vring_desc * indirect)422 virtqueue_init_indirect_list(struct virtqueue *vq,
423 struct vring_desc *indirect)
424 {
425 int i;
426
427 bzero(indirect, vq->vq_indirect_mem_size);
428
429 for (i = 0; i < vq->vq_max_indirect_size - 1; i++)
430 indirect[i].next = vq_gtoh16(vq, i + 1);
431 indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END);
432 }
433
434 int
virtqueue_reinit(struct virtqueue * vq,uint16_t size)435 virtqueue_reinit(struct virtqueue *vq, uint16_t size)
436 {
437 struct vq_desc_extra *dxp;
438 int i;
439
440 if (vq->vq_nentries != size) {
441 device_printf(vq->vq_dev,
442 "%s: '%s' changed size; old=%hu, new=%hu\n",
443 __func__, vq->vq_name, vq->vq_nentries, size);
444 return (EINVAL);
445 }
446
447 /* Warn if the virtqueue was not properly cleaned up. */
448 if (vq->vq_free_cnt != vq->vq_nentries) {
449 device_printf(vq->vq_dev,
450 "%s: warning '%s' virtqueue not empty, "
451 "leaking %d entries\n", __func__, vq->vq_name,
452 vq->vq_nentries - vq->vq_free_cnt);
453 }
454
455 vq->vq_desc_head_idx = 0;
456 vq->vq_used_cons_idx = 0;
457 vq->vq_queued_cnt = 0;
458 vq->vq_free_cnt = vq->vq_nentries;
459
460 /* To be safe, reset all our allocated memory. */
461 bzero(vq->vq_ring_mem, vq->vq_ring_size);
462 for (i = 0; i < vq->vq_nentries; i++) {
463 dxp = &vq->vq_descx[i];
464 dxp->cookie = NULL;
465 dxp->ndescs = 0;
466 if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
467 virtqueue_init_indirect_list(vq, dxp->indirect);
468 }
469
470 vq_ring_init(vq);
471 virtqueue_disable_intr(vq);
472
473 return (0);
474 }
475
476 void
virtqueue_free(struct virtqueue * vq)477 virtqueue_free(struct virtqueue *vq)
478 {
479
480 if (vq->vq_free_cnt != vq->vq_nentries) {
481 device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, "
482 "leaking %d entries\n", vq->vq_name,
483 vq->vq_nentries - vq->vq_free_cnt);
484 }
485
486 if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
487 virtqueue_free_indirect(vq);
488
489 if (vq->vq_ring_mem != NULL) {
490 bus_dmamap_unload(vq->vq_ring_dmat, vq->vq_ring_mapp);
491 bus_dmamem_free(vq->vq_ring_dmat, vq->vq_ring_mem,
492 vq->vq_ring_mapp);
493 vq->vq_ring_size = 0;
494 }
495
496 if (vq->vq_ring_dmat != NULL) {
497 bus_dma_tag_destroy(vq->vq_ring_dmat);
498 }
499
500 free(vq, M_DEVBUF);
501 }
502
503 vm_paddr_t
virtqueue_paddr(struct virtqueue * vq)504 virtqueue_paddr(struct virtqueue *vq)
505 {
506 return (vq->vq_ring_paddr);
507 }
508
509 vm_paddr_t
virtqueue_desc_paddr(struct virtqueue * vq)510 virtqueue_desc_paddr(struct virtqueue *vq)
511 {
512 return (vq->vq_ring.desc_paddr);
513 }
514
515 vm_paddr_t
virtqueue_avail_paddr(struct virtqueue * vq)516 virtqueue_avail_paddr(struct virtqueue *vq)
517 {
518 return (vq->vq_ring.avail_paddr);
519 }
520
521 vm_paddr_t
virtqueue_used_paddr(struct virtqueue * vq)522 virtqueue_used_paddr(struct virtqueue *vq)
523 {
524 return (vq->vq_ring.used_paddr);
525 }
526
527 uint16_t
virtqueue_index(struct virtqueue * vq)528 virtqueue_index(struct virtqueue *vq)
529 {
530
531 return (vq->vq_queue_index);
532 }
533
534 int
virtqueue_size(struct virtqueue * vq)535 virtqueue_size(struct virtqueue *vq)
536 {
537
538 return (vq->vq_nentries);
539 }
540
541 int
virtqueue_nfree(struct virtqueue * vq)542 virtqueue_nfree(struct virtqueue *vq)
543 {
544
545 return (vq->vq_free_cnt);
546 }
547
548 bool
virtqueue_empty(struct virtqueue * vq)549 virtqueue_empty(struct virtqueue *vq)
550 {
551
552 return (vq->vq_nentries == vq->vq_free_cnt);
553 }
554
555 bool
virtqueue_full(struct virtqueue * vq)556 virtqueue_full(struct virtqueue *vq)
557 {
558
559 return (vq->vq_free_cnt == 0);
560 }
561
562 void
virtqueue_notify(struct virtqueue * vq)563 virtqueue_notify(struct virtqueue *vq)
564 {
565 /* Ensure updated avail->idx is visible to host. */
566 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
567 BUS_DMASYNC_PREWRITE);
568 #if defined(__i386__) || defined(__amd64__)
569 mb();
570 #endif
571
572 if (vq_ring_must_notify_host(vq))
573 vq_ring_notify_host(vq);
574 vq->vq_queued_cnt = 0;
575 }
576
577 int
virtqueue_nused(struct virtqueue * vq)578 virtqueue_nused(struct virtqueue *vq)
579 {
580 uint16_t used_idx, nused;
581
582 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
583 BUS_DMASYNC_POSTREAD);
584
585 used_idx = vq_htog16(vq, vq->vq_ring.used->idx);
586
587 nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
588 VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
589
590 return (nused);
591 }
592
593 int
virtqueue_intr_filter(struct virtqueue * vq)594 virtqueue_intr_filter(struct virtqueue *vq)
595 {
596 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
597 BUS_DMASYNC_POSTREAD);
598
599 if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx))
600 return (0);
601
602 virtqueue_disable_intr(vq);
603
604 return (1);
605 }
606
607 void
virtqueue_intr(struct virtqueue * vq)608 virtqueue_intr(struct virtqueue *vq)
609 {
610
611 vq->vq_intrhand(vq->vq_intrhand_arg);
612 }
613
614 int
virtqueue_enable_intr(struct virtqueue * vq)615 virtqueue_enable_intr(struct virtqueue *vq)
616 {
617
618 return (vq_ring_enable_interrupt(vq, 0));
619 }
620
621 int
virtqueue_postpone_intr(struct virtqueue * vq,vq_postpone_t hint)622 virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint)
623 {
624 uint16_t ndesc, avail_idx;
625
626 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
627 BUS_DMASYNC_POSTREAD);
628
629 avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
630 ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx);
631
632 switch (hint) {
633 case VQ_POSTPONE_SHORT:
634 ndesc = ndesc / 4;
635 break;
636 case VQ_POSTPONE_LONG:
637 ndesc = (ndesc * 3) / 4;
638 break;
639 case VQ_POSTPONE_EMPTIED:
640 break;
641 }
642
643 return (vq_ring_enable_interrupt(vq, ndesc));
644 }
645
646 /*
647 * Note this is only considered a hint to the host.
648 */
649 void
virtqueue_disable_intr(struct virtqueue * vq)650 virtqueue_disable_intr(struct virtqueue *vq)
651 {
652
653 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
654 vring_used_event(&vq->vq_ring) = vq_gtoh16(vq,
655 vq->vq_used_cons_idx - vq->vq_nentries - 1);
656 return;
657 }
658
659 vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT);
660
661 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
662 BUS_DMASYNC_PREWRITE);
663 }
664
665 int
virtqueue_enqueue(struct virtqueue * vq,void * cookie,struct sglist * sg,int readable,int writable)666 virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg,
667 int readable, int writable)
668 {
669 struct vq_desc_extra *dxp;
670 int needed;
671 uint16_t head_idx, idx;
672
673 needed = readable + writable;
674
675 VQASSERT(vq, cookie != NULL, "enqueuing with no cookie");
676 VQASSERT(vq, needed == sg->sg_nseg,
677 "segment count mismatch, %d, %d", needed, sg->sg_nseg);
678 VQASSERT(vq,
679 needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size,
680 "too many segments to enqueue: %d, %d/%d", needed,
681 vq->vq_nentries, vq->vq_max_indirect_size);
682
683 if (needed < 1)
684 return (EINVAL);
685 if (vq->vq_free_cnt == 0)
686 return (ENOSPC);
687
688 if (vq_ring_use_indirect(vq, needed)) {
689 vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable);
690 return (0);
691 } else if (vq->vq_free_cnt < needed)
692 return (EMSGSIZE);
693
694 head_idx = vq->vq_desc_head_idx;
695 VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
696 dxp = &vq->vq_descx[head_idx];
697
698 VQASSERT(vq, dxp->cookie == NULL,
699 "cookie already exists for index %d", head_idx);
700 dxp->cookie = cookie;
701 dxp->ndescs = needed;
702
703 idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx,
704 sg, readable, writable);
705
706 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
707 BUS_DMASYNC_PREWRITE);
708
709 vq->vq_desc_head_idx = idx;
710 vq->vq_free_cnt -= needed;
711 if (vq->vq_free_cnt == 0)
712 VQ_RING_ASSERT_CHAIN_TERM(vq);
713 else
714 VQ_RING_ASSERT_VALID_IDX(vq, idx);
715
716 vq_ring_update_avail(vq, head_idx);
717
718 return (0);
719 }
720
721 void *
virtqueue_dequeue(struct virtqueue * vq,uint32_t * len)722 virtqueue_dequeue(struct virtqueue *vq, uint32_t *len)
723 {
724 struct vring_used_elem *uep;
725 void *cookie;
726 uint16_t used_idx, desc_idx;
727
728 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
729 BUS_DMASYNC_POSTREAD);
730
731 if (vq->vq_used_cons_idx ==
732 vq_htog16(vq, atomic_load_16(&vq->vq_ring.used->idx)))
733 return (NULL);
734
735 used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1);
736 uep = &vq->vq_ring.used->ring[used_idx];
737
738 rmb();
739 desc_idx = (uint16_t) vq_htog32(vq, uep->id);
740 if (len != NULL)
741 *len = vq_htog32(vq, uep->len);
742
743 vq_ring_free_chain(vq, desc_idx);
744
745 cookie = vq->vq_descx[desc_idx].cookie;
746 VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx);
747 vq->vq_descx[desc_idx].cookie = NULL;
748
749 return (cookie);
750 }
751
752 void *
virtqueue_poll(struct virtqueue * vq,uint32_t * len)753 virtqueue_poll(struct virtqueue *vq, uint32_t *len)
754 {
755 void *cookie;
756
757 while ((cookie = virtqueue_dequeue(vq, len)) == NULL) {
758 cpu_spinwait();
759 }
760
761 return (cookie);
762 }
763
764 void *
virtqueue_drain(struct virtqueue * vq,int * last)765 virtqueue_drain(struct virtqueue *vq, int *last)
766 {
767 void *cookie;
768 int idx;
769
770 cookie = NULL;
771 idx = *last;
772
773 while (idx < vq->vq_nentries && cookie == NULL) {
774 if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
775 vq->vq_descx[idx].cookie = NULL;
776 /* Free chain to keep free count consistent. */
777 vq_ring_free_chain(vq, idx);
778 }
779 idx++;
780 }
781
782 *last = idx;
783
784 return (cookie);
785 }
786
787 void
virtqueue_dump(struct virtqueue * vq)788 virtqueue_dump(struct virtqueue *vq)
789 {
790
791 if (vq == NULL)
792 return;
793
794 printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; "
795 "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; "
796 "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n",
797 vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq),
798 vq->vq_queued_cnt, vq->vq_desc_head_idx,
799 vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx,
800 vq_htog16(vq, vq->vq_ring.used->idx),
801 vq_htog16(vq, vring_used_event(&vq->vq_ring)),
802 vq_htog16(vq, vq->vq_ring.avail->flags),
803 vq_htog16(vq, vq->vq_ring.used->flags));
804 }
805
806 static void
vq_ring_init(struct virtqueue * vq)807 vq_ring_init(struct virtqueue *vq)
808 {
809 struct vring *vr;
810 char *ring_mem;
811 int i, size;
812
813 ring_mem = vq->vq_ring_mem;
814 size = vq->vq_nentries;
815 vr = &vq->vq_ring;
816
817 vring_init(vr, size, ring_mem, vq->vq_ring_paddr, vq->vq_alignment);
818
819 for (i = 0; i < size - 1; i++)
820 vr->desc[i].next = vq_gtoh16(vq, i + 1);
821 vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END);
822
823 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
824 BUS_DMASYNC_PREWRITE);
825 }
826
827 static void
vq_ring_update_avail(struct virtqueue * vq,uint16_t desc_idx)828 vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
829 {
830 uint16_t avail_idx, avail_ring_idx;
831
832 /*
833 * Place the head of the descriptor chain into the next slot and make
834 * it usable to the host. The chain is made available now rather than
835 * deferring to virtqueue_notify() in the hopes that if the host is
836 * currently running on another CPU, we can keep it processing the new
837 * descriptor.
838 */
839 avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
840 avail_ring_idx = avail_idx & (vq->vq_nentries - 1);
841 vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx);
842
843 wmb();
844 vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1);
845
846 /* Keep pending count until virtqueue_notify(). */
847 vq->vq_queued_cnt++;
848
849 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
850 BUS_DMASYNC_PREWRITE);
851 }
852
853 static uint16_t
vq_ring_enqueue_segments(struct virtqueue * vq,struct vring_desc * desc,uint16_t head_idx,struct sglist * sg,int readable,int writable)854 vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc,
855 uint16_t head_idx, struct sglist *sg, int readable, int writable)
856 {
857 struct sglist_seg *seg;
858 struct vring_desc *dp;
859 int i, needed;
860 uint16_t idx;
861
862 SDT_PROBE6(virtqueue, , enqueue_segments, entry, vq, desc, head_idx,
863 sg, readable, writable);
864
865 needed = readable + writable;
866
867 for (i = 0, idx = head_idx, seg = sg->sg_segs;
868 i < needed;
869 i++, idx = vq_htog16(vq, dp->next), seg++) {
870 VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END,
871 "premature end of free desc chain");
872
873 dp = &desc[idx];
874 dp->addr = vq_gtoh64(vq, seg->ss_paddr);
875 dp->len = vq_gtoh32(vq, seg->ss_len);
876 dp->flags = 0;
877
878 if (i < needed - 1)
879 dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT);
880 if (i >= readable)
881 dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE);
882 }
883
884 SDT_PROBE1(virtqueue, , enqueue_segments, return, idx);
885 return (idx);
886 }
887
888 static bool
vq_ring_use_indirect(struct virtqueue * vq,int needed)889 vq_ring_use_indirect(struct virtqueue *vq, int needed)
890 {
891
892 if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0)
893 return (false);
894
895 if (vq->vq_max_indirect_size < needed)
896 return (false);
897
898 if (needed < 2)
899 return (false);
900
901 return (true);
902 }
903
904 static void
vq_ring_enqueue_indirect(struct virtqueue * vq,void * cookie,struct sglist * sg,int readable,int writable)905 vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
906 struct sglist *sg, int readable, int writable)
907 {
908 struct vring_desc *dp;
909 struct vq_desc_extra *dxp;
910 int needed;
911 uint16_t head_idx;
912
913 needed = readable + writable;
914 VQASSERT(vq, needed <= vq->vq_max_indirect_size,
915 "enqueuing too many indirect descriptors");
916
917 head_idx = vq->vq_desc_head_idx;
918 VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
919 dp = &vq->vq_ring.desc[head_idx];
920 dxp = &vq->vq_descx[head_idx];
921
922 VQASSERT(vq, dxp->cookie == NULL,
923 "cookie already exists for index %d", head_idx);
924 dxp->cookie = cookie;
925 dxp->ndescs = 1;
926
927 dp->addr = vq_gtoh64(vq, dxp->indirect_paddr);
928 dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc));
929 dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT);
930
931 vq_ring_enqueue_segments(vq, dxp->indirect, 0,
932 sg, readable, writable);
933
934 bus_dmamap_sync(vq->vq_indirect_dmat, dxp->mapp, BUS_DMASYNC_PREWRITE);
935 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
936 BUS_DMASYNC_PREWRITE);
937
938 vq->vq_desc_head_idx = vq_htog16(vq, dp->next);
939 vq->vq_free_cnt--;
940 if (vq->vq_free_cnt == 0)
941 VQ_RING_ASSERT_CHAIN_TERM(vq);
942 else
943 VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx);
944
945 vq_ring_update_avail(vq, head_idx);
946 }
947
948 static int
vq_ring_enable_interrupt(struct virtqueue * vq,uint16_t ndesc)949 vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc)
950 {
951
952 /*
953 * Enable interrupts, making sure we get the latest index of
954 * what's already been consumed.
955 */
956 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
957 vring_used_event(&vq->vq_ring) =
958 vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc);
959 } else {
960 vq->vq_ring.avail->flags &=
961 vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT);
962 }
963
964 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
965 BUS_DMASYNC_PREWRITE);
966 #if defined(__i386__) || defined(__amd64__)
967 mb();
968 #endif
969
970 /*
971 * Enough items may have already been consumed to meet our threshold
972 * since we last checked. Let our caller know so it processes the new
973 * entries.
974 */
975 if (virtqueue_nused(vq) > ndesc)
976 return (1);
977
978 return (0);
979 }
980
981 static int
vq_ring_must_notify_host(struct virtqueue * vq)982 vq_ring_must_notify_host(struct virtqueue *vq)
983 {
984 uint16_t new_idx, prev_idx, event_idx, flags;
985
986 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
987 BUS_DMASYNC_POSTREAD);
988
989 if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
990 new_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
991 prev_idx = new_idx - vq->vq_queued_cnt;
992 event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring));
993
994 return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
995 }
996
997 flags = vq->vq_ring.used->flags;
998 return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0);
999 }
1000
1001 static void
vq_ring_notify_host(struct virtqueue * vq)1002 vq_ring_notify_host(struct virtqueue *vq)
1003 {
1004
1005 VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index,
1006 vq->vq_notify_offset);
1007 }
1008
1009 static void
vq_ring_free_chain(struct virtqueue * vq,uint16_t desc_idx)1010 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
1011 {
1012 struct vring_desc *dp;
1013 struct vq_desc_extra *dxp;
1014
1015 VQ_RING_ASSERT_VALID_IDX(vq, desc_idx);
1016 dp = &vq->vq_ring.desc[desc_idx];
1017 dxp = &vq->vq_descx[desc_idx];
1018
1019 if (vq->vq_free_cnt == 0)
1020 VQ_RING_ASSERT_CHAIN_TERM(vq);
1021
1022 vq->vq_free_cnt += dxp->ndescs;
1023 dxp->ndescs--;
1024
1025 if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) {
1026 while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) {
1027 uint16_t next_idx = vq_htog16(vq, dp->next);
1028 VQ_RING_ASSERT_VALID_IDX(vq, next_idx);
1029 dp = &vq->vq_ring.desc[next_idx];
1030 dxp->ndescs--;
1031 }
1032 }
1033
1034 VQASSERT(vq, dxp->ndescs == 0,
1035 "failed to free entire desc chain, remaining: %d", dxp->ndescs);
1036
1037 /*
1038 * We must append the existing free chain, if any, to the end of
1039 * newly freed chain. If the virtqueue was completely used, then
1040 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
1041 */
1042 dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx);
1043 vq->vq_desc_head_idx = desc_idx;
1044
1045 bus_dmamap_sync(vq->vq_ring_dmat, vq->vq_ring_mapp,
1046 BUS_DMASYNC_PREWRITE);
1047 }
1048