xref: /qemu/hw/hyperv/vmbus.c (revision f9bb7e53a341d08fd4ec8d7e810ebfd4f6f936bd)
1 /*
2  * QEMU Hyper-V VMBus
3  *
4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "exec/target_page.h"
14 #include "qapi/error.h"
15 #include "migration/vmstate.h"
16 #include "hw/qdev-properties.h"
17 #include "hw/qdev-properties-system.h"
18 #include "hw/hyperv/hyperv.h"
19 #include "hw/hyperv/vmbus.h"
20 #include "hw/hyperv/vmbus-bridge.h"
21 #include "hw/sysbus.h"
22 #include "exec/target_page.h"
23 #include "trace.h"
24 
25 enum {
26     VMGPADL_INIT,
27     VMGPADL_ALIVE,
28     VMGPADL_TEARINGDOWN,
29     VMGPADL_TORNDOWN,
30 };
31 
32 struct VMBusGpadl {
33     /* GPADL id */
34     uint32_t id;
35     /* associated channel id (rudimentary?) */
36     uint32_t child_relid;
37 
38     /* number of pages in the GPADL as declared in GPADL_HEADER message */
39     uint32_t num_gfns;
40     /*
41      * Due to limited message size, GPADL may not fit fully in a single
42      * GPADL_HEADER message, and is further popluated using GPADL_BODY
43      * messages.  @seen_gfns is the number of pages seen so far; once it
44      * reaches @num_gfns, the GPADL is ready to use.
45      */
46     uint32_t seen_gfns;
47     /* array of GFNs (of size @num_gfns once allocated) */
48     uint64_t *gfns;
49 
50     uint8_t state;
51 
52     QTAILQ_ENTRY(VMBusGpadl) link;
53     VMBus *vmbus;
54     unsigned refcount;
55 };
56 
57 /*
58  * Wrap sequential read from / write to GPADL.
59  */
60 typedef struct GpadlIter {
61     VMBusGpadl *gpadl;
62     AddressSpace *as;
63     DMADirection dir;
64     /* offset into GPADL where the next i/o will be performed */
65     uint32_t off;
66     /*
67      * Cached mapping of the currently accessed page, up to page boundary.
68      * Updated lazily on i/o.
69      * Note: MemoryRegionCache can not be used here because pages in the GPADL
70      * are non-contiguous and may belong to different memory regions.
71      */
72     void *map;
73     /* offset after last i/o (i.e. not affected by seek) */
74     uint32_t last_off;
75     /*
76      * Indicator that the iterator is active and may have a cached mapping.
77      * Allows to enforce bracketing of all i/o (which may create cached
78      * mappings) and thus exclude mapping leaks.
79      */
80     bool active;
81 } GpadlIter;
82 
83 /*
84  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
85  * channel.
86  * Each ring buffer consists of a set of pages, with the first page containing
87  * the ring buffer header, and the remaining pages being for data packets.
88  */
89 typedef struct VMBusRingBufCommon {
90     AddressSpace *as;
91     /* GPA of the ring buffer header */
92     dma_addr_t rb_addr;
93     /* start and length of the ring buffer data area within GPADL */
94     uint32_t base;
95     uint32_t len;
96 
97     GpadlIter iter;
98 } VMBusRingBufCommon;
99 
100 typedef struct VMBusSendRingBuf {
101     VMBusRingBufCommon common;
102     /* current write index, to be committed at the end of send */
103     uint32_t wr_idx;
104     /* write index at the start of send */
105     uint32_t last_wr_idx;
106     /* space to be requested from the guest */
107     uint32_t wanted;
108     /* space reserved for planned sends */
109     uint32_t reserved;
110     /* last seen read index */
111     uint32_t last_seen_rd_idx;
112 } VMBusSendRingBuf;
113 
114 typedef struct VMBusRecvRingBuf {
115     VMBusRingBufCommon common;
116     /* current read index, to be committed at the end of receive */
117     uint32_t rd_idx;
118     /* read index at the start of receive */
119     uint32_t last_rd_idx;
120     /* last seen write index */
121     uint32_t last_seen_wr_idx;
122 } VMBusRecvRingBuf;
123 
124 
125 enum {
126     VMOFFER_INIT,
127     VMOFFER_SENDING,
128     VMOFFER_SENT,
129 };
130 
131 enum {
132     VMCHAN_INIT,
133     VMCHAN_OPENING,
134     VMCHAN_OPEN,
135 };
136 
137 struct VMBusChannel {
138     VMBusDevice *dev;
139 
140     /* channel id */
141     uint32_t id;
142     /*
143      * subchannel index within the device; subchannel #0 is "primary" and
144      * always exists
145      */
146     uint16_t subchan_idx;
147     uint32_t open_id;
148     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
149     uint32_t target_vp;
150     /* GPADL id to use for the ring buffers */
151     uint32_t ringbuf_gpadl;
152     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
153     uint32_t ringbuf_send_offset;
154 
155     uint8_t offer_state;
156     uint8_t state;
157     bool is_open;
158 
159     /* main device worker; copied from the device class */
160     VMBusChannelNotifyCb notify_cb;
161     /*
162      * guest->host notifications, either sent directly or dispatched via
163      * interrupt page (older VMBus)
164      */
165     EventNotifier notifier;
166 
167     VMBus *vmbus;
168     /*
169      * SINT route to signal with host->guest notifications; may be shared with
170      * the main VMBus SINT route
171      */
172     HvSintRoute *notify_route;
173     VMBusGpadl *gpadl;
174 
175     VMBusSendRingBuf send_ringbuf;
176     VMBusRecvRingBuf recv_ringbuf;
177 
178     QTAILQ_ENTRY(VMBusChannel) link;
179 };
180 
181 /*
182  * Hyper-V spec mandates that every message port has 16 buffers, which means
183  * that the guest can post up to this many messages without blocking.
184  * Therefore a queue for incoming messages has to be provided.
185  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
186  * doesn't transition to a new state until the message is known to have been
187  * successfully delivered to the respective SynIC message slot.
188  */
189 #define HV_MSG_QUEUE_LEN     16
190 
191 /* Hyper-V devices never use channel #0.  Must be something special. */
192 #define VMBUS_FIRST_CHANID      1
193 /* Each channel occupies one bit within a single event page sint slot. */
194 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
195 /* Leave a few connection numbers for other purposes. */
196 #define VMBUS_CHAN_CONNECTION_OFFSET     16
197 
198 /*
199  * Since the success or failure of sending a message is reported
200  * asynchronously, the VMBus state machine has effectively two entry points:
201  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
202  * message delivery status becomes known).  Both are run as oneshot BHs on the
203  * main aio context, ensuring serialization.
204  */
205 enum {
206     VMBUS_LISTEN,
207     VMBUS_HANDSHAKE,
208     VMBUS_OFFER,
209     VMBUS_CREATE_GPADL,
210     VMBUS_TEARDOWN_GPADL,
211     VMBUS_OPEN_CHANNEL,
212     VMBUS_UNLOAD,
213     VMBUS_STATE_MAX
214 };
215 
216 struct VMBus {
217     BusState parent;
218 
219     uint8_t state;
220     /* protection against recursive aio_poll (see vmbus_run) */
221     bool in_progress;
222     /* whether there's a message being delivered to the guest */
223     bool msg_in_progress;
224     uint32_t version;
225     /* VP_INDEX of the vCPU to send messages and interrupts to */
226     uint32_t target_vp;
227     HvSintRoute *sint_route;
228     /*
229      * interrupt page for older protocol versions; newer ones use SynIC event
230      * flags directly
231      */
232     hwaddr int_page_gpa;
233 
234     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
235 
236     /* incoming message queue */
237     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
238     uint8_t rx_queue_head;
239     uint8_t rx_queue_size;
240     QemuMutex rx_queue_lock;
241 
242     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
243     QTAILQ_HEAD(, VMBusChannel) channel_list;
244 
245     /*
246      * guest->host notifications for older VMBus, to be dispatched via
247      * interrupt page
248      */
249     EventNotifier notifier;
250 };
251 
gpadl_full(VMBusGpadl * gpadl)252 static bool gpadl_full(VMBusGpadl *gpadl)
253 {
254     return gpadl->seen_gfns == gpadl->num_gfns;
255 }
256 
create_gpadl(VMBus * vmbus,uint32_t id,uint32_t child_relid,uint32_t num_gfns)257 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
258                                 uint32_t child_relid, uint32_t num_gfns)
259 {
260     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
261 
262     gpadl->id = id;
263     gpadl->child_relid = child_relid;
264     gpadl->num_gfns = num_gfns;
265     gpadl->gfns = g_new(uint64_t, num_gfns);
266     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
267     gpadl->vmbus = vmbus;
268     gpadl->refcount = 1;
269     return gpadl;
270 }
271 
free_gpadl(VMBusGpadl * gpadl)272 static void free_gpadl(VMBusGpadl *gpadl)
273 {
274     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
275     g_free(gpadl->gfns);
276     g_free(gpadl);
277 }
278 
find_gpadl(VMBus * vmbus,uint32_t gpadl_id)279 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
280 {
281     VMBusGpadl *gpadl;
282     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
283         if (gpadl->id == gpadl_id) {
284             return gpadl;
285         }
286     }
287     return NULL;
288 }
289 
vmbus_get_gpadl(VMBusChannel * chan,uint32_t gpadl_id)290 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
291 {
292     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
293     if (!gpadl || !gpadl_full(gpadl)) {
294         return NULL;
295     }
296     gpadl->refcount++;
297     return gpadl;
298 }
299 
vmbus_put_gpadl(VMBusGpadl * gpadl)300 void vmbus_put_gpadl(VMBusGpadl *gpadl)
301 {
302     if (!gpadl) {
303         return;
304     }
305     if (--gpadl->refcount) {
306         return;
307     }
308     free_gpadl(gpadl);
309 }
310 
vmbus_gpadl_len(VMBusGpadl * gpadl)311 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
312 {
313     return gpadl->num_gfns * TARGET_PAGE_SIZE;
314 }
315 
gpadl_iter_init(GpadlIter * iter,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir)316 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
317                             AddressSpace *as, DMADirection dir)
318 {
319     iter->gpadl = gpadl;
320     iter->as = as;
321     iter->dir = dir;
322     iter->active = false;
323 }
324 
gpadl_iter_cache_unmap(GpadlIter * iter)325 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
326 {
327     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
328     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
329 
330     /* mapping is only done to do non-zero amount of i/o */
331     assert(iter->last_off > 0);
332     assert(map_start_in_page < io_end_in_page);
333 
334     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
335                      iter->dir, io_end_in_page - map_start_in_page);
336 }
337 
338 /*
339  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
340  * The direction of the copy is determined by @iter->dir.
341  * The caller must ensure the operation overflows neither @buf nor the GPADL
342  * (there's an assert for the latter).
343  * Reuse the currently mapped page in the GPADL if possible.
344  */
gpadl_iter_io(GpadlIter * iter,void * buf,uint32_t len)345 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
346 {
347     ssize_t ret = len;
348 
349     assert(iter->active);
350 
351     while (len) {
352         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
353         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
354         uint32_t cplen = MIN(pgleft, len);
355         void *p;
356 
357         /* try to reuse the cached mapping */
358         if (iter->map) {
359             uint32_t map_start_in_page =
360                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
361             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
362             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
363             if (off_base != mapped_base || off_in_page < map_start_in_page) {
364                 gpadl_iter_cache_unmap(iter);
365                 iter->map = NULL;
366             }
367         }
368 
369         if (!iter->map) {
370             dma_addr_t maddr;
371             dma_addr_t mlen = pgleft;
372             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
373             assert(idx < iter->gpadl->num_gfns);
374 
375             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
376 
377             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
378                                        MEMTXATTRS_UNSPECIFIED);
379             if (mlen != pgleft) {
380                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
381                 iter->map = NULL;
382                 return -EFAULT;
383             }
384         }
385 
386         p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
387                 off_in_page);
388         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
389             memcpy(p, buf, cplen);
390         } else {
391             memcpy(buf, p, cplen);
392         }
393 
394         buf += cplen;
395         len -= cplen;
396         iter->off += cplen;
397         iter->last_off = iter->off;
398     }
399 
400     return ret;
401 }
402 
403 /*
404  * Position the iterator @iter at new offset @new_off.
405  * If this results in the cached mapping being unusable with the new offset,
406  * unmap it.
407  */
gpadl_iter_seek(GpadlIter * iter,uint32_t new_off)408 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
409 {
410     assert(iter->active);
411     iter->off = new_off;
412 }
413 
414 /*
415  * Start a series of i/o on the GPADL.
416  * After this i/o and seek operations on @iter become legal.
417  */
gpadl_iter_start_io(GpadlIter * iter)418 static inline void gpadl_iter_start_io(GpadlIter *iter)
419 {
420     assert(!iter->active);
421     /* mapping is cached lazily on i/o */
422     iter->map = NULL;
423     iter->active = true;
424 }
425 
426 /*
427  * End the eariler started series of i/o on the GPADL and release the cached
428  * mapping if any.
429  */
gpadl_iter_end_io(GpadlIter * iter)430 static inline void gpadl_iter_end_io(GpadlIter *iter)
431 {
432     assert(iter->active);
433 
434     if (iter->map) {
435         gpadl_iter_cache_unmap(iter);
436     }
437 
438     iter->active = false;
439 }
440 
441 static void vmbus_resched(VMBus *vmbus);
442 static void vmbus_msg_cb(void *data, int status);
443 
vmbus_iov_to_gpadl(VMBusChannel * chan,VMBusGpadl * gpadl,uint32_t off,const struct iovec * iov,size_t iov_cnt)444 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
445                            const struct iovec *iov, size_t iov_cnt)
446 {
447     GpadlIter iter;
448     size_t i;
449     ssize_t ret = 0;
450 
451     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
452                     DMA_DIRECTION_FROM_DEVICE);
453     gpadl_iter_start_io(&iter);
454     gpadl_iter_seek(&iter, off);
455     for (i = 0; i < iov_cnt; i++) {
456         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
457         if (ret < 0) {
458             goto out;
459         }
460     }
461 out:
462     gpadl_iter_end_io(&iter);
463     return ret;
464 }
465 
vmbus_map_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t len,size_t off)466 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
467                   unsigned iov_cnt, size_t len, size_t off)
468 {
469     int ret_cnt = 0, ret;
470     unsigned i;
471     QEMUSGList *sgl = &req->sgl;
472     ScatterGatherEntry *sg = sgl->sg;
473 
474     for (i = 0; i < sgl->nsg; i++) {
475         if (sg[i].len > off) {
476             break;
477         }
478         off -= sg[i].len;
479     }
480     for (; len && i < sgl->nsg; i++) {
481         dma_addr_t mlen = MIN(sg[i].len - off, len);
482         dma_addr_t addr = sg[i].base + off;
483         len -= mlen;
484         off = 0;
485 
486         for (; mlen; ret_cnt++) {
487             dma_addr_t l = mlen;
488             dma_addr_t a = addr;
489 
490             if (ret_cnt == iov_cnt) {
491                 ret = -ENOBUFS;
492                 goto err;
493             }
494 
495             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
496                                                    MEMTXATTRS_UNSPECIFIED);
497             if (!l) {
498                 ret = -EFAULT;
499                 goto err;
500             }
501             iov[ret_cnt].iov_len = l;
502             addr += l;
503             mlen -= l;
504         }
505     }
506 
507     return ret_cnt;
508 err:
509     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
510     return ret;
511 }
512 
vmbus_unmap_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t accessed)513 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
514                      unsigned iov_cnt, size_t accessed)
515 {
516     QEMUSGList *sgl = &req->sgl;
517     unsigned i;
518 
519     for (i = 0; i < iov_cnt; i++) {
520         size_t acsd = MIN(accessed, iov[i].iov_len);
521         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
522         accessed -= acsd;
523     }
524 }
525 
526 static const VMStateDescription vmstate_gpadl = {
527     .name = "vmbus/gpadl",
528     .version_id = 0,
529     .minimum_version_id = 0,
530     .fields = (const VMStateField[]) {
531         VMSTATE_UINT32(id, VMBusGpadl),
532         VMSTATE_UINT32(child_relid, VMBusGpadl),
533         VMSTATE_UINT32(num_gfns, VMBusGpadl),
534         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
535         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
536                                     vmstate_info_uint64, uint64_t),
537         VMSTATE_UINT8(state, VMBusGpadl),
538         VMSTATE_END_OF_LIST()
539     }
540 };
541 
542 /*
543  * Wrap the index into a ring buffer of @len bytes.
544  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
545  * single wraparound is considered.
546  */
rb_idx_wrap(uint32_t idx,uint32_t len)547 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
548 {
549     if (idx >= len) {
550         idx -= len;
551     }
552     return idx;
553 }
554 
555 /*
556  * Circular difference between two indices into a ring buffer of @len bytes.
557  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
558  * up write index but not vice versa.
559  */
rb_idx_delta(uint32_t idx1,uint32_t idx2,uint32_t len,bool allow_catchup)560 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
561                                     bool allow_catchup)
562 {
563     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
564 }
565 
ringbuf_map_hdr(VMBusRingBufCommon * ringbuf)566 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
567 {
568     vmbus_ring_buffer *rb;
569     dma_addr_t mlen = sizeof(*rb);
570 
571     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
572                         DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
573     if (mlen != sizeof(*rb)) {
574         dma_memory_unmap(ringbuf->as, rb, mlen,
575                          DMA_DIRECTION_FROM_DEVICE, 0);
576         return NULL;
577     }
578     return rb;
579 }
580 
ringbuf_unmap_hdr(VMBusRingBufCommon * ringbuf,vmbus_ring_buffer * rb,bool dirty)581 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
582                               vmbus_ring_buffer *rb, bool dirty)
583 {
584     assert(rb);
585 
586     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
587                      dirty ? sizeof(*rb) : 0);
588 }
589 
ringbuf_init_common(VMBusRingBufCommon * ringbuf,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir,uint32_t begin,uint32_t end)590 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
591                                 AddressSpace *as, DMADirection dir,
592                                 uint32_t begin, uint32_t end)
593 {
594     ringbuf->as = as;
595     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
596     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
597     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
598     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
599 }
600 
ringbufs_init(VMBusChannel * chan)601 static int ringbufs_init(VMBusChannel *chan)
602 {
603     vmbus_ring_buffer *rb;
604     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
605     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
606 
607     if (chan->ringbuf_send_offset <= 1 ||
608         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
609         return -EINVAL;
610     }
611 
612     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
613                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
614     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
615                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
616                         chan->gpadl->num_gfns);
617     send_ringbuf->wanted = 0;
618     send_ringbuf->reserved = 0;
619 
620     rb = ringbuf_map_hdr(&recv_ringbuf->common);
621     if (!rb) {
622         return -EFAULT;
623     }
624     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
625     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
626 
627     rb = ringbuf_map_hdr(&send_ringbuf->common);
628     if (!rb) {
629         return -EFAULT;
630     }
631     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
632     send_ringbuf->last_seen_rd_idx = rb->read_index;
633     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
634     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
635 
636     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
637         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
638         return -EOVERFLOW;
639     }
640 
641     return 0;
642 }
643 
644 /*
645  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
646  * around if needed.
647  * @len is assumed not to exceed the size of the ringbuffer, so only single
648  * wraparound is considered.
649  */
ringbuf_io(VMBusRingBufCommon * ringbuf,void * buf,uint32_t len)650 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
651 {
652     ssize_t ret1 = 0, ret2 = 0;
653     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
654 
655     if (len >= remain) {
656         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
657         if (ret1 < 0) {
658             return ret1;
659         }
660         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
661         buf += remain;
662         len -= remain;
663     }
664     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
665     if (ret2 < 0) {
666         return ret2;
667     }
668     return ret1 + ret2;
669 }
670 
671 /*
672  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
673  * around if needed.
674  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
675  * single wraparound is considered.
676  */
ringbuf_seek(VMBusRingBufCommon * ringbuf,uint32_t new_off)677 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
678 {
679     gpadl_iter_seek(&ringbuf->iter,
680                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
681 }
682 
ringbuf_tell(VMBusRingBufCommon * ringbuf)683 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
684 {
685     return ringbuf->iter.off - ringbuf->base;
686 }
687 
ringbuf_start_io(VMBusRingBufCommon * ringbuf)688 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
689 {
690     gpadl_iter_start_io(&ringbuf->iter);
691 }
692 
ringbuf_end_io(VMBusRingBufCommon * ringbuf)693 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
694 {
695     gpadl_iter_end_io(&ringbuf->iter);
696 }
697 
vmbus_channel_device(VMBusChannel * chan)698 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
699 {
700     return chan->dev;
701 }
702 
vmbus_device_channel(VMBusDevice * dev,uint32_t chan_idx)703 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
704 {
705     if (chan_idx >= dev->num_channels) {
706         return NULL;
707     }
708     return &dev->channels[chan_idx];
709 }
710 
vmbus_channel_idx(VMBusChannel * chan)711 uint32_t vmbus_channel_idx(VMBusChannel *chan)
712 {
713     return chan - chan->dev->channels;
714 }
715 
vmbus_channel_notify_host(VMBusChannel * chan)716 void vmbus_channel_notify_host(VMBusChannel *chan)
717 {
718     event_notifier_set(&chan->notifier);
719 }
720 
vmbus_channel_is_open(VMBusChannel * chan)721 bool vmbus_channel_is_open(VMBusChannel *chan)
722 {
723     return chan->is_open;
724 }
725 
726 /*
727  * Notify the guest side about the data to work on in the channel ring buffer.
728  * The notification is done by signaling a dedicated per-channel SynIC event
729  * flag (more recent guests) or setting a bit in the interrupt page and firing
730  * the VMBus SINT (older guests).
731  */
vmbus_channel_notify_guest(VMBusChannel * chan)732 static int vmbus_channel_notify_guest(VMBusChannel *chan)
733 {
734     int res = 0;
735     unsigned long *int_map, mask;
736     unsigned idx;
737     hwaddr addr = chan->vmbus->int_page_gpa;
738     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
739 
740     trace_vmbus_channel_notify_guest(chan->id);
741 
742     if (!addr) {
743         return hyperv_set_event_flag(chan->notify_route, chan->id);
744     }
745 
746     int_map = cpu_physical_memory_map(addr, &len, 1);
747     if (len != TARGET_PAGE_SIZE / 2) {
748         res = -ENXIO;
749         goto unmap;
750     }
751 
752     idx = BIT_WORD(chan->id);
753     mask = BIT_MASK(chan->id);
754     if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
755         res = hyperv_sint_route_set_sint(chan->notify_route);
756         dirty = len;
757     }
758 
759 unmap:
760     cpu_physical_memory_unmap(int_map, len, 1, dirty);
761     return res;
762 }
763 
764 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
765 
vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr * hdr,uint32_t desclen,uint32_t msglen)766 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
767                                           uint32_t desclen, uint32_t msglen)
768 {
769     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
770         DIV_ROUND_UP(desclen, sizeof(uint64_t));
771     hdr->len_qwords = hdr->offset_qwords +
772         DIV_ROUND_UP(msglen, sizeof(uint64_t));
773     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
774 }
775 
776 /*
777  * Simplified ring buffer operation with paired barriers annotations in the
778  * producer and consumer loops:
779  *
780  * producer                           * consumer
781  * ~~~~~~~~                           * ~~~~~~~~
782  * write pending_send_sz              * read write_index
783  * smp_mb                       [A]   * smp_mb                       [C]
784  * read read_index                    * read packet
785  * smp_mb                       [B]   * read/write out-of-band data
786  * read/write out-of-band data        * smp_mb                       [B]
787  * write packet                       * write read_index
788  * smp_mb                       [C]   * smp_mb                       [A]
789  * write write_index                  * read pending_send_sz
790  * smp_wmb                      [D]   * smp_rmb                      [D]
791  * write pending_send_sz              * read write_index
792  * ...                                * ...
793  */
794 
ringbuf_send_avail(VMBusSendRingBuf * ringbuf)795 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
796 {
797     /* don't trust guest data */
798     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
799         return 0;
800     }
801     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
802                         ringbuf->common.len, false);
803 }
804 
ringbuf_send_update_idx(VMBusChannel * chan)805 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
806 {
807     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
808     vmbus_ring_buffer *rb;
809     uint32_t written;
810 
811     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
812                            ringbuf->common.len, true);
813     if (!written) {
814         return 0;
815     }
816 
817     rb = ringbuf_map_hdr(&ringbuf->common);
818     if (!rb) {
819         return -EFAULT;
820     }
821 
822     ringbuf->reserved -= written;
823 
824     /* prevent reorder with the data operation and packet write */
825     smp_mb();                   /* barrier pair [C] */
826     rb->write_index = ringbuf->wr_idx;
827 
828     /*
829      * If the producer earlier indicated that it wants to be notified when the
830      * consumer frees certain amount of space in the ring buffer, that amount
831      * is reduced by the size of the completed write.
832      */
833     if (ringbuf->wanted) {
834         /* otherwise reservation would fail */
835         assert(ringbuf->wanted < written);
836         ringbuf->wanted -= written;
837         /* prevent reorder with write_index write */
838         smp_wmb();              /* barrier pair [D] */
839         rb->pending_send_sz = ringbuf->wanted;
840     }
841 
842     /* prevent reorder with write_index or pending_send_sz write */
843     smp_mb();                   /* barrier pair [A] */
844     ringbuf->last_seen_rd_idx = rb->read_index;
845 
846     /*
847      * The consumer may have missed the reduction of pending_send_sz and skip
848      * notification, so re-check the blocking condition, and, if it's no longer
849      * true, ensure processing another iteration by simulating consumer's
850      * notification.
851      */
852     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
853         vmbus_channel_notify_host(chan);
854     }
855 
856     /* skip notification by consumer's request */
857     if (rb->interrupt_mask) {
858         goto out;
859     }
860 
861     /*
862      * The consumer hasn't caught up with the producer's previous state so it's
863      * not blocked.
864      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
865      * validation here as it only affects notification.)
866      */
867     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
868                      ringbuf->common.len, true) > written) {
869         goto out;
870     }
871 
872     vmbus_channel_notify_guest(chan);
873 out:
874     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
875     ringbuf->last_wr_idx = ringbuf->wr_idx;
876     return written;
877 }
878 
vmbus_channel_reserve(VMBusChannel * chan,uint32_t desclen,uint32_t msglen)879 int vmbus_channel_reserve(VMBusChannel *chan,
880                           uint32_t desclen, uint32_t msglen)
881 {
882     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
883     vmbus_ring_buffer *rb = NULL;
884     vmbus_packet_hdr hdr;
885     uint32_t needed = ringbuf->reserved +
886         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
887 
888     /* avoid touching the guest memory if possible */
889     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
890         goto success;
891     }
892 
893     rb = ringbuf_map_hdr(&ringbuf->common);
894     if (!rb) {
895         return -EFAULT;
896     }
897 
898     /* fetch read index from guest memory and try again */
899     ringbuf->last_seen_rd_idx = rb->read_index;
900 
901     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
902         goto success;
903     }
904 
905     rb->pending_send_sz = needed;
906 
907     /*
908      * The consumer may have made progress and freed up some space before
909      * seeing updated pending_send_sz, so re-read read_index (preventing
910      * reorder with the pending_send_sz write) and try again.
911      */
912     smp_mb();                   /* barrier pair [A] */
913     ringbuf->last_seen_rd_idx = rb->read_index;
914 
915     if (needed > ringbuf_send_avail(ringbuf)) {
916         goto out;
917     }
918 
919 success:
920     ringbuf->reserved = needed;
921     needed = 0;
922 
923     /* clear pending_send_sz if it was set */
924     if (ringbuf->wanted) {
925         if (!rb) {
926             rb = ringbuf_map_hdr(&ringbuf->common);
927             if (!rb) {
928                 /* failure to clear pending_send_sz is non-fatal */
929                 goto out;
930             }
931         }
932 
933         rb->pending_send_sz = 0;
934     }
935 
936     /* prevent reorder of the following data operation with read_index read */
937     smp_mb();                   /* barrier pair [B] */
938 
939 out:
940     if (rb) {
941         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
942     }
943     ringbuf->wanted = needed;
944     return needed ? -ENOSPC : 0;
945 }
946 
vmbus_channel_send(VMBusChannel * chan,uint16_t pkt_type,void * desc,uint32_t desclen,void * msg,uint32_t msglen,bool need_comp,uint64_t transaction_id)947 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
948                            void *desc, uint32_t desclen,
949                            void *msg, uint32_t msglen,
950                            bool need_comp, uint64_t transaction_id)
951 {
952     ssize_t ret = 0;
953     vmbus_packet_hdr hdr;
954     uint32_t totlen;
955     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
956 
957     if (!vmbus_channel_is_open(chan)) {
958         return -EINVAL;
959     }
960 
961     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
962     hdr.type = pkt_type;
963     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
964     hdr.transaction_id = transaction_id;
965 
966     assert(totlen <= ringbuf->reserved);
967 
968     ringbuf_start_io(&ringbuf->common);
969     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
970     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
971     if (ret < 0) {
972         goto out;
973     }
974     if (desclen) {
975         assert(desc);
976         ret = ringbuf_io(&ringbuf->common, desc, desclen);
977         if (ret < 0) {
978             goto out;
979         }
980         ringbuf_seek(&ringbuf->common,
981                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
982     }
983     ret = ringbuf_io(&ringbuf->common, msg, msglen);
984     if (ret < 0) {
985         goto out;
986     }
987     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
988     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
989     ret = 0;
990 out:
991     ringbuf_end_io(&ringbuf->common);
992     if (ret) {
993         return ret;
994     }
995     return ringbuf_send_update_idx(chan);
996 }
997 
vmbus_channel_send_completion(VMBusChanReq * req,void * msg,uint32_t msglen)998 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
999                                       void *msg, uint32_t msglen)
1000 {
1001     assert(req->need_comp);
1002     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1003                               msg, msglen, false, req->transaction_id);
1004 }
1005 
sgl_from_gpa_ranges(QEMUSGList * sgl,VMBusDevice * dev,VMBusRingBufCommon * ringbuf,uint32_t len)1006 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1007                                VMBusRingBufCommon *ringbuf, uint32_t len)
1008 {
1009     int ret;
1010     vmbus_pkt_gpa_direct hdr;
1011     hwaddr curaddr = 0;
1012     hwaddr curlen = 0;
1013     int num;
1014 
1015     if (len < sizeof(hdr)) {
1016         return -EIO;
1017     }
1018     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1019     if (ret < 0) {
1020         return ret;
1021     }
1022     len -= sizeof(hdr);
1023 
1024     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1025     if (num < 0) {
1026         return -EIO;
1027     }
1028     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1029 
1030     for (; hdr.rangecount; hdr.rangecount--) {
1031         vmbus_gpa_range range;
1032 
1033         if (len < sizeof(range)) {
1034             goto eio;
1035         }
1036         ret = ringbuf_io(ringbuf, &range, sizeof(range));
1037         if (ret < 0) {
1038             goto err;
1039         }
1040         len -= sizeof(range);
1041 
1042         if (range.byte_offset & TARGET_PAGE_MASK) {
1043             goto eio;
1044         }
1045 
1046         for (; range.byte_count; range.byte_offset = 0) {
1047             uint64_t paddr;
1048             uint32_t plen = MIN(range.byte_count,
1049                                 TARGET_PAGE_SIZE - range.byte_offset);
1050 
1051             if (len < sizeof(uint64_t)) {
1052                 goto eio;
1053             }
1054             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1055             if (ret < 0) {
1056                 goto err;
1057             }
1058             len -= sizeof(uint64_t);
1059             paddr <<= TARGET_PAGE_BITS;
1060             paddr |= range.byte_offset;
1061             range.byte_count -= plen;
1062 
1063             if (curaddr + curlen == paddr) {
1064                 /* consecutive fragments - join */
1065                 curlen += plen;
1066             } else {
1067                 if (curlen) {
1068                     qemu_sglist_add(sgl, curaddr, curlen);
1069                 }
1070 
1071                 curaddr = paddr;
1072                 curlen = plen;
1073             }
1074         }
1075     }
1076 
1077     if (curlen) {
1078         qemu_sglist_add(sgl, curaddr, curlen);
1079     }
1080 
1081     return 0;
1082 eio:
1083     ret = -EIO;
1084 err:
1085     qemu_sglist_destroy(sgl);
1086     return ret;
1087 }
1088 
vmbus_alloc_req(VMBusChannel * chan,uint32_t size,uint16_t pkt_type,uint32_t msglen,uint64_t transaction_id,bool need_comp)1089 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1090                                      uint32_t size, uint16_t pkt_type,
1091                                      uint32_t msglen, uint64_t transaction_id,
1092                                      bool need_comp)
1093 {
1094     VMBusChanReq *req;
1095     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1096     uint32_t totlen = msgoff + msglen;
1097 
1098     req = g_malloc0(totlen);
1099     req->chan = chan;
1100     req->pkt_type = pkt_type;
1101     req->msg = (void *)req + msgoff;
1102     req->msglen = msglen;
1103     req->transaction_id = transaction_id;
1104     req->need_comp = need_comp;
1105     return req;
1106 }
1107 
vmbus_channel_recv_start(VMBusChannel * chan)1108 int vmbus_channel_recv_start(VMBusChannel *chan)
1109 {
1110     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1111     vmbus_ring_buffer *rb;
1112 
1113     rb = ringbuf_map_hdr(&ringbuf->common);
1114     if (!rb) {
1115         return -EFAULT;
1116     }
1117     ringbuf->last_seen_wr_idx = rb->write_index;
1118     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1119 
1120     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1121         return -EOVERFLOW;
1122     }
1123 
1124     /* prevent reorder of the following data operation with write_index read */
1125     smp_mb();                   /* barrier pair [C] */
1126     return 0;
1127 }
1128 
vmbus_channel_recv_peek(VMBusChannel * chan,uint32_t size)1129 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1130 {
1131     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1132     vmbus_packet_hdr hdr = {};
1133     VMBusChanReq *req;
1134     uint32_t avail;
1135     uint32_t totlen, pktlen, msglen, msgoff, desclen;
1136 
1137     assert(size >= sizeof(*req));
1138 
1139     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1140     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1141                          ringbuf->common.len, true);
1142     if (avail < sizeof(hdr)) {
1143         return NULL;
1144     }
1145 
1146     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1147     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1148         return NULL;
1149     }
1150 
1151     pktlen = hdr.len_qwords * sizeof(uint64_t);
1152     totlen = pktlen + VMBUS_PKT_TRAILER;
1153     if (totlen > avail) {
1154         return NULL;
1155     }
1156 
1157     msgoff = hdr.offset_qwords * sizeof(uint64_t);
1158     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1159         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1160         return NULL;
1161     }
1162 
1163     msglen = pktlen - msgoff;
1164 
1165     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1166                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1167 
1168     switch (hdr.type) {
1169     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1170         desclen = msgoff - sizeof(hdr);
1171         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1172                                 desclen) < 0) {
1173             error_report("%s: failed to convert GPA ranges to SGL", __func__);
1174             goto free_req;
1175         }
1176         break;
1177     case VMBUS_PACKET_DATA_INBAND:
1178     case VMBUS_PACKET_COMP:
1179         break;
1180     default:
1181         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1182         goto free_req;
1183     }
1184 
1185     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1186     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1187         goto free_req;
1188     }
1189     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1190 
1191     return req;
1192 free_req:
1193     vmbus_free_req(req);
1194     return NULL;
1195 }
1196 
vmbus_channel_recv_pop(VMBusChannel * chan)1197 void vmbus_channel_recv_pop(VMBusChannel *chan)
1198 {
1199     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1200     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1201 }
1202 
vmbus_channel_recv_done(VMBusChannel * chan)1203 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1204 {
1205     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1206     vmbus_ring_buffer *rb;
1207     uint32_t read;
1208 
1209     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1210                         ringbuf->common.len, true);
1211     if (!read) {
1212         return 0;
1213     }
1214 
1215     rb = ringbuf_map_hdr(&ringbuf->common);
1216     if (!rb) {
1217         return -EFAULT;
1218     }
1219 
1220     /* prevent reorder with the data operation and packet read */
1221     smp_mb();                   /* barrier pair [B] */
1222     rb->read_index = ringbuf->rd_idx;
1223 
1224     /* prevent reorder of the following pending_send_sz read */
1225     smp_mb();                   /* barrier pair [A] */
1226 
1227     if (rb->interrupt_mask) {
1228         goto out;
1229     }
1230 
1231     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1232         uint32_t wr_idx, wr_avail;
1233         uint32_t wanted = rb->pending_send_sz;
1234 
1235         if (!wanted) {
1236             goto out;
1237         }
1238 
1239         /* prevent reorder with pending_send_sz read */
1240         smp_rmb();              /* barrier pair [D] */
1241         wr_idx = rb->write_index;
1242 
1243         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1244                                 true);
1245 
1246         /* the producer wasn't blocked on the consumer state */
1247         if (wr_avail >= read + wanted) {
1248             goto out;
1249         }
1250         /* there's not enough space for the producer to make progress */
1251         if (wr_avail < wanted) {
1252             goto out;
1253         }
1254     }
1255 
1256     vmbus_channel_notify_guest(chan);
1257 out:
1258     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1259     ringbuf->last_rd_idx = ringbuf->rd_idx;
1260     return read;
1261 }
1262 
vmbus_free_req(void * req)1263 void vmbus_free_req(void *req)
1264 {
1265     VMBusChanReq *r = req;
1266 
1267     if (!req) {
1268         return;
1269     }
1270 
1271     if (r->sgl.dev) {
1272         qemu_sglist_destroy(&r->sgl);
1273     }
1274     g_free(req);
1275 }
1276 
channel_event_cb(EventNotifier * e)1277 static void channel_event_cb(EventNotifier *e)
1278 {
1279     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1280     if (event_notifier_test_and_clear(e)) {
1281         /*
1282          * All receives are supposed to happen within the device worker, so
1283          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1284          * potentially reuse the cached mapping throughout the worker.
1285          * Can't do this for sends as they may happen outside the device
1286          * worker.
1287          */
1288         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1289         ringbuf_start_io(&ringbuf->common);
1290         chan->notify_cb(chan);
1291         ringbuf_end_io(&ringbuf->common);
1292 
1293     }
1294 }
1295 
alloc_chan_id(VMBus * vmbus)1296 static int alloc_chan_id(VMBus *vmbus)
1297 {
1298     int ret;
1299 
1300     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1301     if (ret == VMBUS_CHANID_COUNT) {
1302         return -ENOMEM;
1303     }
1304     return ret + VMBUS_FIRST_CHANID;
1305 }
1306 
register_chan_id(VMBusChannel * chan)1307 static int register_chan_id(VMBusChannel *chan)
1308 {
1309     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1310                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1311 }
1312 
unregister_chan_id(VMBusChannel * chan)1313 static void unregister_chan_id(VMBusChannel *chan)
1314 {
1315     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1316 }
1317 
chan_connection_id(VMBusChannel * chan)1318 static uint32_t chan_connection_id(VMBusChannel *chan)
1319 {
1320     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1321 }
1322 
init_channel(VMBus * vmbus,VMBusDevice * dev,VMBusDeviceClass * vdc,VMBusChannel * chan,uint16_t idx,Error ** errp)1323 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1324                          VMBusChannel *chan, uint16_t idx, Error **errp)
1325 {
1326     int res;
1327 
1328     chan->dev = dev;
1329     chan->notify_cb = vdc->chan_notify_cb;
1330     chan->subchan_idx = idx;
1331     chan->vmbus = vmbus;
1332 
1333     res = alloc_chan_id(vmbus);
1334     if (res < 0) {
1335         error_setg(errp, "no spare channel id");
1336         return;
1337     }
1338     chan->id = res;
1339     register_chan_id(chan);
1340 
1341     /*
1342      * The guest drivers depend on the device subchannels (idx #1+) to be
1343      * offered after the primary channel (idx #0) of that device.  To ensure
1344      * that, record the channels on the channel list in the order they appear
1345      * within the device.
1346      */
1347     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1348 }
1349 
deinit_channel(VMBusChannel * chan)1350 static void deinit_channel(VMBusChannel *chan)
1351 {
1352     assert(chan->state == VMCHAN_INIT);
1353     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1354     unregister_chan_id(chan);
1355 }
1356 
create_channels(VMBus * vmbus,VMBusDevice * dev,Error ** errp)1357 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1358 {
1359     uint16_t i;
1360     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1361     Error *err = NULL;
1362 
1363     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1364     if (dev->num_channels < 1) {
1365         error_setg(errp, "invalid #channels: %u", dev->num_channels);
1366         return;
1367     }
1368 
1369     dev->channels = g_new0(VMBusChannel, dev->num_channels);
1370     for (i = 0; i < dev->num_channels; i++) {
1371         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1372         if (err) {
1373             goto err_init;
1374         }
1375     }
1376 
1377     return;
1378 
1379 err_init:
1380     while (i--) {
1381         deinit_channel(&dev->channels[i]);
1382     }
1383     error_propagate(errp, err);
1384 }
1385 
free_channels(VMBusDevice * dev)1386 static void free_channels(VMBusDevice *dev)
1387 {
1388     uint16_t i;
1389     for (i = 0; i < dev->num_channels; i++) {
1390         deinit_channel(&dev->channels[i]);
1391     }
1392     g_free(dev->channels);
1393 }
1394 
make_sint_route(VMBus * vmbus,uint32_t vp_index)1395 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1396 {
1397     VMBusChannel *chan;
1398 
1399     if (vp_index == vmbus->target_vp) {
1400         hyperv_sint_route_ref(vmbus->sint_route);
1401         return vmbus->sint_route;
1402     }
1403 
1404     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1405         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1406             hyperv_sint_route_ref(chan->notify_route);
1407             return chan->notify_route;
1408         }
1409     }
1410 
1411     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1412 }
1413 
open_channel(VMBusChannel * chan)1414 static void open_channel(VMBusChannel *chan)
1415 {
1416     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1417 
1418     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1419     if (!chan->gpadl) {
1420         return;
1421     }
1422 
1423     if (ringbufs_init(chan)) {
1424         goto put_gpadl;
1425     }
1426 
1427     if (event_notifier_init(&chan->notifier, 0)) {
1428         goto put_gpadl;
1429     }
1430 
1431     event_notifier_set_handler(&chan->notifier, channel_event_cb);
1432 
1433     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1434                                       &chan->notifier)) {
1435         goto cleanup_notifier;
1436     }
1437 
1438     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1439     if (!chan->notify_route) {
1440         goto clear_event_flag_handler;
1441     }
1442 
1443     if (vdc->open_channel && vdc->open_channel(chan)) {
1444         goto unref_sint_route;
1445     }
1446 
1447     chan->is_open = true;
1448     return;
1449 
1450 unref_sint_route:
1451     hyperv_sint_route_unref(chan->notify_route);
1452 clear_event_flag_handler:
1453     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1454 cleanup_notifier:
1455     event_notifier_set_handler(&chan->notifier, NULL);
1456     event_notifier_cleanup(&chan->notifier);
1457 put_gpadl:
1458     vmbus_put_gpadl(chan->gpadl);
1459 }
1460 
close_channel(VMBusChannel * chan)1461 static void close_channel(VMBusChannel *chan)
1462 {
1463     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1464 
1465     if (!chan->is_open) {
1466         return;
1467     }
1468 
1469     if (vdc->close_channel) {
1470         vdc->close_channel(chan);
1471     }
1472 
1473     hyperv_sint_route_unref(chan->notify_route);
1474     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1475     event_notifier_set_handler(&chan->notifier, NULL);
1476     event_notifier_cleanup(&chan->notifier);
1477     vmbus_put_gpadl(chan->gpadl);
1478     chan->is_open = false;
1479 }
1480 
channel_post_load(void * opaque,int version_id)1481 static int channel_post_load(void *opaque, int version_id)
1482 {
1483     VMBusChannel *chan = opaque;
1484 
1485     return register_chan_id(chan);
1486 }
1487 
1488 static const VMStateDescription vmstate_channel = {
1489     .name = "vmbus/channel",
1490     .version_id = 0,
1491     .minimum_version_id = 0,
1492     .post_load = channel_post_load,
1493     .fields = (const VMStateField[]) {
1494         VMSTATE_UINT32(id, VMBusChannel),
1495         VMSTATE_UINT16(subchan_idx, VMBusChannel),
1496         VMSTATE_UINT32(open_id, VMBusChannel),
1497         VMSTATE_UINT32(target_vp, VMBusChannel),
1498         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1499         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1500         VMSTATE_UINT8(offer_state, VMBusChannel),
1501         VMSTATE_UINT8(state, VMBusChannel),
1502         VMSTATE_END_OF_LIST()
1503     }
1504 };
1505 
find_channel(VMBus * vmbus,uint32_t id)1506 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1507 {
1508     VMBusChannel *chan;
1509     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1510         if (chan->id == id) {
1511             return chan;
1512         }
1513     }
1514     return NULL;
1515 }
1516 
enqueue_incoming_message(VMBus * vmbus,const struct hyperv_post_message_input * msg)1517 static int enqueue_incoming_message(VMBus *vmbus,
1518                                     const struct hyperv_post_message_input *msg)
1519 {
1520     int ret = 0;
1521     uint8_t idx, prev_size;
1522 
1523     qemu_mutex_lock(&vmbus->rx_queue_lock);
1524 
1525     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1526         ret = -ENOBUFS;
1527         goto out;
1528     }
1529 
1530     prev_size = vmbus->rx_queue_size;
1531     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1532     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1533     vmbus->rx_queue_size++;
1534 
1535     /* only need to resched if the queue was empty before */
1536     if (!prev_size) {
1537         vmbus_resched(vmbus);
1538     }
1539 out:
1540     qemu_mutex_unlock(&vmbus->rx_queue_lock);
1541     return ret;
1542 }
1543 
vmbus_recv_message(const struct hyperv_post_message_input * msg,void * data)1544 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1545                                    void *data)
1546 {
1547     VMBus *vmbus = data;
1548     struct vmbus_message_header *vmbus_msg;
1549 
1550     if (msg->message_type != HV_MESSAGE_VMBUS) {
1551         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1552     }
1553 
1554     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1555         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1556     }
1557 
1558     vmbus_msg = (struct vmbus_message_header *)msg->payload;
1559 
1560     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1561 
1562     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1563         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1564         error_report("vmbus: unknown message type %#x",
1565                      vmbus_msg->message_type);
1566         return HV_STATUS_INVALID_HYPERCALL_INPUT;
1567     }
1568 
1569     if (enqueue_incoming_message(vmbus, msg)) {
1570         return HV_STATUS_INSUFFICIENT_BUFFERS;
1571     }
1572     return HV_STATUS_SUCCESS;
1573 }
1574 
vmbus_initialized(VMBus * vmbus)1575 static bool vmbus_initialized(VMBus *vmbus)
1576 {
1577     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1578 }
1579 
vmbus_reset_all(VMBus * vmbus)1580 static void vmbus_reset_all(VMBus *vmbus)
1581 {
1582     bus_cold_reset(BUS(vmbus));
1583 }
1584 
post_msg(VMBus * vmbus,void * msgdata,uint32_t msglen)1585 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1586 {
1587     int ret;
1588     struct hyperv_message msg = {
1589         .header.message_type = HV_MESSAGE_VMBUS,
1590     };
1591 
1592     assert(!vmbus->msg_in_progress);
1593     assert(msglen <= sizeof(msg.payload));
1594     assert(msglen >= sizeof(struct vmbus_message_header));
1595 
1596     vmbus->msg_in_progress = true;
1597 
1598     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1599                          msglen);
1600 
1601     memcpy(msg.payload, msgdata, msglen);
1602     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1603 
1604     ret = hyperv_post_msg(vmbus->sint_route, &msg);
1605     if (ret == 0 || ret == -EAGAIN) {
1606         return;
1607     }
1608 
1609     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1610     vmbus_reset_all(vmbus);
1611 }
1612 
vmbus_init(VMBus * vmbus)1613 static int vmbus_init(VMBus *vmbus)
1614 {
1615     if (vmbus->target_vp != (uint32_t)-1) {
1616         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1617                                                   vmbus_msg_cb, vmbus);
1618         if (!vmbus->sint_route) {
1619             error_report("failed to set up SINT route");
1620             return -ENOMEM;
1621         }
1622     }
1623     return 0;
1624 }
1625 
vmbus_deinit(VMBus * vmbus)1626 static void vmbus_deinit(VMBus *vmbus)
1627 {
1628     VMBusGpadl *gpadl, *tmp_gpadl;
1629     VMBusChannel *chan;
1630 
1631     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1632         if (gpadl->state == VMGPADL_TORNDOWN) {
1633             continue;
1634         }
1635         vmbus_put_gpadl(gpadl);
1636     }
1637 
1638     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1639         chan->offer_state = VMOFFER_INIT;
1640     }
1641 
1642     hyperv_sint_route_unref(vmbus->sint_route);
1643     vmbus->sint_route = NULL;
1644     vmbus->int_page_gpa = 0;
1645     vmbus->target_vp = (uint32_t)-1;
1646     vmbus->version = 0;
1647     vmbus->state = VMBUS_LISTEN;
1648     vmbus->msg_in_progress = false;
1649 }
1650 
handle_initiate_contact(VMBus * vmbus,vmbus_message_initiate_contact * msg,uint32_t msglen)1651 static void handle_initiate_contact(VMBus *vmbus,
1652                                     vmbus_message_initiate_contact *msg,
1653                                     uint32_t msglen)
1654 {
1655     if (msglen < sizeof(*msg)) {
1656         return;
1657     }
1658 
1659     trace_vmbus_initiate_contact(msg->version_requested >> 16,
1660                                  msg->version_requested & 0xffff,
1661                                  msg->target_vcpu, msg->monitor_page1,
1662                                  msg->monitor_page2, msg->interrupt_page);
1663 
1664     /*
1665      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1666      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1667      * before handing over to OS loader.
1668      */
1669     vmbus_reset_all(vmbus);
1670 
1671     vmbus->target_vp = msg->target_vcpu;
1672     vmbus->version = msg->version_requested;
1673     if (vmbus->version < VMBUS_VERSION_WIN8) {
1674         /* linux passes interrupt page even when it doesn't need it */
1675         vmbus->int_page_gpa = msg->interrupt_page;
1676     }
1677     vmbus->state = VMBUS_HANDSHAKE;
1678 
1679     if (vmbus_init(vmbus)) {
1680         error_report("failed to init vmbus; aborting");
1681         vmbus_deinit(vmbus);
1682         return;
1683     }
1684 }
1685 
send_handshake(VMBus * vmbus)1686 static void send_handshake(VMBus *vmbus)
1687 {
1688     struct vmbus_message_version_response msg = {
1689         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1690         .version_supported = vmbus_initialized(vmbus),
1691     };
1692 
1693     post_msg(vmbus, &msg, sizeof(msg));
1694 }
1695 
handle_request_offers(VMBus * vmbus,void * msgdata,uint32_t msglen)1696 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1697 {
1698     VMBusChannel *chan;
1699 
1700     if (!vmbus_initialized(vmbus)) {
1701         return;
1702     }
1703 
1704     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1705         if (chan->offer_state == VMOFFER_INIT) {
1706             chan->offer_state = VMOFFER_SENDING;
1707             break;
1708         }
1709     }
1710 
1711     vmbus->state = VMBUS_OFFER;
1712 }
1713 
send_offer(VMBus * vmbus)1714 static void send_offer(VMBus *vmbus)
1715 {
1716     VMBusChannel *chan;
1717     struct vmbus_message_header alloffers_msg = {
1718         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1719     };
1720 
1721     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1722         if (chan->offer_state == VMOFFER_SENDING) {
1723             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1724             /* Hyper-V wants LE GUIDs */
1725             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1726             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1727             struct vmbus_message_offer_channel msg = {
1728                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1729                 .child_relid = chan->id,
1730                 .connection_id = chan_connection_id(chan),
1731                 .channel_flags = vdc->channel_flags,
1732                 .mmio_size_mb = vdc->mmio_size_mb,
1733                 .sub_channel_index = vmbus_channel_idx(chan),
1734                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1735             };
1736 
1737             memcpy(msg.type_uuid, &classid, sizeof(classid));
1738             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1739 
1740             trace_vmbus_send_offer(chan->id, chan->dev);
1741 
1742             post_msg(vmbus, &msg, sizeof(msg));
1743             return;
1744         }
1745     }
1746 
1747     /* no more offers, send terminator message */
1748     trace_vmbus_terminate_offers();
1749     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1750 }
1751 
complete_offer(VMBus * vmbus)1752 static bool complete_offer(VMBus *vmbus)
1753 {
1754     VMBusChannel *chan;
1755 
1756     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1757         if (chan->offer_state == VMOFFER_SENDING) {
1758             chan->offer_state = VMOFFER_SENT;
1759             goto next_offer;
1760         }
1761     }
1762     /*
1763      * no transitioning channels found so this is completing the terminator
1764      * message, and vmbus can move to the next state
1765      */
1766     return true;
1767 
1768 next_offer:
1769     /* try to mark another channel for offering */
1770     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1771         if (chan->offer_state == VMOFFER_INIT) {
1772             chan->offer_state = VMOFFER_SENDING;
1773             break;
1774         }
1775     }
1776     /*
1777      * if an offer has been sent there are more offers or the terminator yet to
1778      * send, so no state transition for vmbus
1779      */
1780     return false;
1781 }
1782 
1783 
handle_gpadl_header(VMBus * vmbus,vmbus_message_gpadl_header * msg,uint32_t msglen)1784 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1785                                 uint32_t msglen)
1786 {
1787     VMBusGpadl *gpadl;
1788     uint32_t num_gfns, i;
1789 
1790     /* must include at least one gpa range */
1791     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1792         !vmbus_initialized(vmbus)) {
1793         return;
1794     }
1795 
1796     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1797                sizeof(msg->range[0].pfn_array[0]);
1798 
1799     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1800 
1801     /*
1802      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1803      * ranges each with arbitrary size and alignment.  However in practice only
1804      * single-range page-aligned GPADLs have been observed so just ignore
1805      * anything else and simplify things greatly.
1806      */
1807     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1808         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1809         return;
1810     }
1811 
1812     /* ignore requests to create already existing GPADLs */
1813     if (find_gpadl(vmbus, msg->gpadl_id)) {
1814         return;
1815     }
1816 
1817     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1818 
1819     for (i = 0; i < num_gfns &&
1820          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1821          i++) {
1822         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1823     }
1824 
1825     if (gpadl_full(gpadl)) {
1826         vmbus->state = VMBUS_CREATE_GPADL;
1827     }
1828 }
1829 
handle_gpadl_body(VMBus * vmbus,vmbus_message_gpadl_body * msg,uint32_t msglen)1830 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1831                               uint32_t msglen)
1832 {
1833     VMBusGpadl *gpadl;
1834     uint32_t num_gfns_left, i;
1835 
1836     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1837         return;
1838     }
1839 
1840     trace_vmbus_gpadl_body(msg->gpadl_id);
1841 
1842     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1843     if (!gpadl) {
1844         return;
1845     }
1846 
1847     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1848     assert(num_gfns_left);
1849 
1850     for (i = 0; i < num_gfns_left &&
1851          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1852         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1853     }
1854 
1855     if (gpadl_full(gpadl)) {
1856         vmbus->state = VMBUS_CREATE_GPADL;
1857     }
1858 }
1859 
send_create_gpadl(VMBus * vmbus)1860 static void send_create_gpadl(VMBus *vmbus)
1861 {
1862     VMBusGpadl *gpadl;
1863 
1864     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1865         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1866             struct vmbus_message_gpadl_created msg = {
1867                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1868                 .gpadl_id = gpadl->id,
1869                 .child_relid = gpadl->child_relid,
1870             };
1871 
1872             trace_vmbus_gpadl_created(gpadl->id);
1873             post_msg(vmbus, &msg, sizeof(msg));
1874             return;
1875         }
1876     }
1877 
1878     g_assert_not_reached();
1879 }
1880 
complete_create_gpadl(VMBus * vmbus)1881 static bool complete_create_gpadl(VMBus *vmbus)
1882 {
1883     VMBusGpadl *gpadl;
1884 
1885     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1886         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1887             gpadl->state = VMGPADL_ALIVE;
1888 
1889             return true;
1890         }
1891     }
1892 
1893     g_assert_not_reached();
1894 }
1895 
handle_gpadl_teardown(VMBus * vmbus,vmbus_message_gpadl_teardown * msg,uint32_t msglen)1896 static void handle_gpadl_teardown(VMBus *vmbus,
1897                                   vmbus_message_gpadl_teardown *msg,
1898                                   uint32_t msglen)
1899 {
1900     VMBusGpadl *gpadl;
1901 
1902     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1903         return;
1904     }
1905 
1906     trace_vmbus_gpadl_teardown(msg->gpadl_id);
1907 
1908     gpadl = find_gpadl(vmbus, msg->gpadl_id);
1909     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
1910         return;
1911     }
1912 
1913     gpadl->state = VMGPADL_TEARINGDOWN;
1914     vmbus->state = VMBUS_TEARDOWN_GPADL;
1915 }
1916 
send_teardown_gpadl(VMBus * vmbus)1917 static void send_teardown_gpadl(VMBus *vmbus)
1918 {
1919     VMBusGpadl *gpadl;
1920 
1921     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1922         if (gpadl->state == VMGPADL_TEARINGDOWN) {
1923             struct vmbus_message_gpadl_torndown msg = {
1924                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
1925                 .gpadl_id = gpadl->id,
1926             };
1927 
1928             trace_vmbus_gpadl_torndown(gpadl->id);
1929             post_msg(vmbus, &msg, sizeof(msg));
1930             return;
1931         }
1932     }
1933 
1934     g_assert_not_reached();
1935 }
1936 
complete_teardown_gpadl(VMBus * vmbus)1937 static bool complete_teardown_gpadl(VMBus *vmbus)
1938 {
1939     VMBusGpadl *gpadl;
1940 
1941     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1942         if (gpadl->state == VMGPADL_TEARINGDOWN) {
1943             gpadl->state = VMGPADL_TORNDOWN;
1944             vmbus_put_gpadl(gpadl);
1945             return true;
1946         }
1947     }
1948 
1949     g_assert_not_reached();
1950 }
1951 
handle_open_channel(VMBus * vmbus,vmbus_message_open_channel * msg,uint32_t msglen)1952 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
1953                                 uint32_t msglen)
1954 {
1955     VMBusChannel *chan;
1956 
1957     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1958         return;
1959     }
1960 
1961     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
1962                              msg->target_vp);
1963     chan = find_channel(vmbus, msg->child_relid);
1964     if (!chan || chan->state != VMCHAN_INIT) {
1965         return;
1966     }
1967 
1968     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
1969     chan->ringbuf_send_offset = msg->ring_buffer_offset;
1970     chan->target_vp = msg->target_vp;
1971     chan->open_id = msg->open_id;
1972 
1973     open_channel(chan);
1974 
1975     chan->state = VMCHAN_OPENING;
1976     vmbus->state = VMBUS_OPEN_CHANNEL;
1977 }
1978 
send_open_channel(VMBus * vmbus)1979 static void send_open_channel(VMBus *vmbus)
1980 {
1981     VMBusChannel *chan;
1982 
1983     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1984         if (chan->state == VMCHAN_OPENING) {
1985             struct vmbus_message_open_result msg = {
1986                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
1987                 .child_relid = chan->id,
1988                 .open_id = chan->open_id,
1989                 .status = !vmbus_channel_is_open(chan),
1990             };
1991 
1992             trace_vmbus_channel_open(chan->id, msg.status);
1993             post_msg(vmbus, &msg, sizeof(msg));
1994             return;
1995         }
1996     }
1997 
1998     g_assert_not_reached();
1999 }
2000 
complete_open_channel(VMBus * vmbus)2001 static bool complete_open_channel(VMBus *vmbus)
2002 {
2003     VMBusChannel *chan;
2004 
2005     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2006         if (chan->state == VMCHAN_OPENING) {
2007             if (vmbus_channel_is_open(chan)) {
2008                 chan->state = VMCHAN_OPEN;
2009                 /*
2010                  * simulate guest notification of ringbuffer space made
2011                  * available, for the channel protocols where the host
2012                  * initiates the communication
2013                  */
2014                 vmbus_channel_notify_host(chan);
2015             } else {
2016                 chan->state = VMCHAN_INIT;
2017             }
2018             return true;
2019         }
2020     }
2021 
2022     g_assert_not_reached();
2023 }
2024 
vdev_reset_on_close(VMBusDevice * vdev)2025 static void vdev_reset_on_close(VMBusDevice *vdev)
2026 {
2027     uint16_t i;
2028 
2029     for (i = 0; i < vdev->num_channels; i++) {
2030         if (vmbus_channel_is_open(&vdev->channels[i])) {
2031             return;
2032         }
2033     }
2034 
2035     /* all channels closed -- reset device */
2036     device_cold_reset(DEVICE(vdev));
2037 }
2038 
handle_close_channel(VMBus * vmbus,vmbus_message_close_channel * msg,uint32_t msglen)2039 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2040                                  uint32_t msglen)
2041 {
2042     VMBusChannel *chan;
2043 
2044     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2045         return;
2046     }
2047 
2048     trace_vmbus_close_channel(msg->child_relid);
2049 
2050     chan = find_channel(vmbus, msg->child_relid);
2051     if (!chan) {
2052         return;
2053     }
2054 
2055     close_channel(chan);
2056     chan->state = VMCHAN_INIT;
2057 
2058     vdev_reset_on_close(chan->dev);
2059 }
2060 
handle_unload(VMBus * vmbus,void * msg,uint32_t msglen)2061 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2062 {
2063     vmbus->state = VMBUS_UNLOAD;
2064 }
2065 
send_unload(VMBus * vmbus)2066 static void send_unload(VMBus *vmbus)
2067 {
2068     vmbus_message_header msg = {
2069         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2070     };
2071 
2072     qemu_mutex_lock(&vmbus->rx_queue_lock);
2073     vmbus->rx_queue_size = 0;
2074     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2075 
2076     post_msg(vmbus, &msg, sizeof(msg));
2077 }
2078 
complete_unload(VMBus * vmbus)2079 static bool complete_unload(VMBus *vmbus)
2080 {
2081     vmbus_reset_all(vmbus);
2082     return true;
2083 }
2084 
process_message(VMBus * vmbus)2085 static void process_message(VMBus *vmbus)
2086 {
2087     struct hyperv_post_message_input *hv_msg;
2088     struct vmbus_message_header *msg;
2089     void *msgdata;
2090     uint32_t msglen;
2091 
2092     qemu_mutex_lock(&vmbus->rx_queue_lock);
2093 
2094     if (!vmbus->rx_queue_size) {
2095         goto unlock;
2096     }
2097 
2098     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2099     msglen =  hv_msg->payload_size;
2100     if (msglen < sizeof(*msg)) {
2101         goto out;
2102     }
2103     msgdata = hv_msg->payload;
2104     msg = msgdata;
2105 
2106     trace_vmbus_process_incoming_message(msg->message_type);
2107 
2108     switch (msg->message_type) {
2109     case VMBUS_MSG_INITIATE_CONTACT:
2110         handle_initiate_contact(vmbus, msgdata, msglen);
2111         break;
2112     case VMBUS_MSG_REQUESTOFFERS:
2113         handle_request_offers(vmbus, msgdata, msglen);
2114         break;
2115     case VMBUS_MSG_GPADL_HEADER:
2116         handle_gpadl_header(vmbus, msgdata, msglen);
2117         break;
2118     case VMBUS_MSG_GPADL_BODY:
2119         handle_gpadl_body(vmbus, msgdata, msglen);
2120         break;
2121     case VMBUS_MSG_GPADL_TEARDOWN:
2122         handle_gpadl_teardown(vmbus, msgdata, msglen);
2123         break;
2124     case VMBUS_MSG_OPENCHANNEL:
2125         handle_open_channel(vmbus, msgdata, msglen);
2126         break;
2127     case VMBUS_MSG_CLOSECHANNEL:
2128         handle_close_channel(vmbus, msgdata, msglen);
2129         break;
2130     case VMBUS_MSG_UNLOAD:
2131         handle_unload(vmbus, msgdata, msglen);
2132         break;
2133     default:
2134         error_report("unknown message type %#x", msg->message_type);
2135         break;
2136     }
2137 
2138 out:
2139     vmbus->rx_queue_size--;
2140     vmbus->rx_queue_head++;
2141     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2142 
2143     vmbus_resched(vmbus);
2144 unlock:
2145     qemu_mutex_unlock(&vmbus->rx_queue_lock);
2146 }
2147 
2148 static const struct {
2149     void (*run)(VMBus *vmbus);
2150     bool (*complete)(VMBus *vmbus);
2151 } state_runner[] = {
2152     [VMBUS_LISTEN]         = {process_message,     NULL},
2153     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2154     [VMBUS_OFFER]          = {send_offer,          complete_offer},
2155     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2156     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2157     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2158     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2159 };
2160 
vmbus_do_run(VMBus * vmbus)2161 static void vmbus_do_run(VMBus *vmbus)
2162 {
2163     if (vmbus->msg_in_progress) {
2164         return;
2165     }
2166 
2167     assert(vmbus->state < VMBUS_STATE_MAX);
2168     assert(state_runner[vmbus->state].run);
2169     state_runner[vmbus->state].run(vmbus);
2170 }
2171 
vmbus_run(void * opaque)2172 static void vmbus_run(void *opaque)
2173 {
2174     VMBus *vmbus = opaque;
2175 
2176     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2177     if (vmbus->in_progress) {
2178         return;
2179     }
2180 
2181     vmbus->in_progress = true;
2182     /*
2183      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2184      * should go *after* the code that can result in aio_poll; otherwise
2185      * reschedules can be missed.  No idea how to enforce that.
2186      */
2187     vmbus_do_run(vmbus);
2188     vmbus->in_progress = false;
2189 }
2190 
vmbus_msg_cb(void * data,int status)2191 static void vmbus_msg_cb(void *data, int status)
2192 {
2193     VMBus *vmbus = data;
2194     bool (*complete)(VMBus *vmbus);
2195 
2196     assert(vmbus->msg_in_progress);
2197 
2198     trace_vmbus_msg_cb(status);
2199 
2200     if (status == -EAGAIN) {
2201         goto out;
2202     }
2203     if (status) {
2204         error_report("message delivery fatal failure: %d; aborting vmbus",
2205                      status);
2206         vmbus_reset_all(vmbus);
2207         return;
2208     }
2209 
2210     assert(vmbus->state < VMBUS_STATE_MAX);
2211     complete = state_runner[vmbus->state].complete;
2212     if (!complete || complete(vmbus)) {
2213         vmbus->state = VMBUS_LISTEN;
2214     }
2215 out:
2216     vmbus->msg_in_progress = false;
2217     vmbus_resched(vmbus);
2218 }
2219 
vmbus_resched(VMBus * vmbus)2220 static void vmbus_resched(VMBus *vmbus)
2221 {
2222     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2223 }
2224 
vmbus_signal_event(EventNotifier * e)2225 static void vmbus_signal_event(EventNotifier *e)
2226 {
2227     VMBusChannel *chan;
2228     VMBus *vmbus = container_of(e, VMBus, notifier);
2229     unsigned long *int_map;
2230     hwaddr addr, len;
2231     bool is_dirty = false;
2232 
2233     if (!event_notifier_test_and_clear(e)) {
2234         return;
2235     }
2236 
2237     trace_vmbus_signal_event();
2238 
2239     if (!vmbus->int_page_gpa) {
2240         return;
2241     }
2242 
2243     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2244     len = TARGET_PAGE_SIZE / 2;
2245     int_map = cpu_physical_memory_map(addr, &len, 1);
2246     if (len != TARGET_PAGE_SIZE / 2) {
2247         goto unmap;
2248     }
2249 
2250     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2251         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2252             if (!vmbus_channel_is_open(chan)) {
2253                 continue;
2254             }
2255             vmbus_channel_notify_host(chan);
2256             is_dirty = true;
2257         }
2258     }
2259 
2260 unmap:
2261     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2262 }
2263 
vmbus_dev_realize(DeviceState * dev,Error ** errp)2264 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2265 {
2266     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2267     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2268     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2269     BusChild *child;
2270     Error *err = NULL;
2271     char idstr[UUID_STR_LEN];
2272 
2273     assert(!qemu_uuid_is_null(&vdev->instanceid));
2274 
2275     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2276         /* Class wants to only have a single instance with a fixed UUID */
2277         if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2278             error_setg(&err, "instance id can't be changed");
2279             goto error_out;
2280         }
2281     }
2282 
2283     /* Check for instance id collision for this class id */
2284     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2285         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2286 
2287         if (child_dev == vdev) {
2288             continue;
2289         }
2290 
2291         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2292             qemu_uuid_unparse(&vdev->instanceid, idstr);
2293             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2294             goto error_out;
2295         }
2296     }
2297 
2298     vdev->dma_as = &address_space_memory;
2299 
2300     create_channels(vmbus, vdev, &err);
2301     if (err) {
2302         goto error_out;
2303     }
2304 
2305     if (vdc->vmdev_realize) {
2306         vdc->vmdev_realize(vdev, &err);
2307         if (err) {
2308             goto err_vdc_realize;
2309         }
2310     }
2311     return;
2312 
2313 err_vdc_realize:
2314     free_channels(vdev);
2315 error_out:
2316     error_propagate(errp, err);
2317 }
2318 
vmbus_dev_reset(DeviceState * dev)2319 static void vmbus_dev_reset(DeviceState *dev)
2320 {
2321     uint16_t i;
2322     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2323     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2324 
2325     if (vdev->channels) {
2326         for (i = 0; i < vdev->num_channels; i++) {
2327             VMBusChannel *chan = &vdev->channels[i];
2328             close_channel(chan);
2329             chan->state = VMCHAN_INIT;
2330         }
2331     }
2332 
2333     if (vdc->vmdev_reset) {
2334         vdc->vmdev_reset(vdev);
2335     }
2336 }
2337 
vmbus_dev_unrealize(DeviceState * dev)2338 static void vmbus_dev_unrealize(DeviceState *dev)
2339 {
2340     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2341     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2342 
2343     if (vdc->vmdev_unrealize) {
2344         vdc->vmdev_unrealize(vdev);
2345     }
2346     free_channels(vdev);
2347 }
2348 
2349 static const Property vmbus_dev_props[] = {
2350     DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2351 };
2352 
2353 
vmbus_dev_class_init(ObjectClass * klass,const void * data)2354 static void vmbus_dev_class_init(ObjectClass *klass, const void *data)
2355 {
2356     DeviceClass *kdev = DEVICE_CLASS(klass);
2357     device_class_set_props(kdev, vmbus_dev_props);
2358     kdev->bus_type = TYPE_VMBUS;
2359     kdev->realize = vmbus_dev_realize;
2360     kdev->unrealize = vmbus_dev_unrealize;
2361     device_class_set_legacy_reset(kdev, vmbus_dev_reset);
2362 }
2363 
vmbus_dev_instance_init(Object * obj)2364 static void vmbus_dev_instance_init(Object *obj)
2365 {
2366     VMBusDevice *vdev = VMBUS_DEVICE(obj);
2367     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2368 
2369     if (!qemu_uuid_is_null(&vdc->instanceid)) {
2370         /* Class wants to only have a single instance with a fixed UUID */
2371         vdev->instanceid = vdc->instanceid;
2372     }
2373 }
2374 
2375 const VMStateDescription vmstate_vmbus_dev = {
2376     .name = TYPE_VMBUS_DEVICE,
2377     .version_id = 0,
2378     .minimum_version_id = 0,
2379     .fields = (const VMStateField[]) {
2380         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2381         VMSTATE_UINT16(num_channels, VMBusDevice),
2382         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2383                                              num_channels, vmstate_channel,
2384                                              VMBusChannel),
2385         VMSTATE_END_OF_LIST()
2386     }
2387 };
2388 
2389 /* vmbus generic device base */
2390 static const TypeInfo vmbus_dev_type_info = {
2391     .name = TYPE_VMBUS_DEVICE,
2392     .parent = TYPE_DEVICE,
2393     .abstract = true,
2394     .instance_size = sizeof(VMBusDevice),
2395     .class_size = sizeof(VMBusDeviceClass),
2396     .class_init = vmbus_dev_class_init,
2397     .instance_init = vmbus_dev_instance_init,
2398 };
2399 
vmbus_realize(BusState * bus,Error ** errp)2400 static void vmbus_realize(BusState *bus, Error **errp)
2401 {
2402     int ret = 0;
2403     VMBus *vmbus = VMBUS(bus);
2404 
2405     qemu_mutex_init(&vmbus->rx_queue_lock);
2406 
2407     QTAILQ_INIT(&vmbus->gpadl_list);
2408     QTAILQ_INIT(&vmbus->channel_list);
2409 
2410     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2411                                  vmbus_recv_message, vmbus);
2412     if (ret != 0) {
2413         error_setg(errp, "hyperv set message handler failed: %d", ret);
2414         goto error_out;
2415     }
2416 
2417     ret = event_notifier_init(&vmbus->notifier, 0);
2418     if (ret != 0) {
2419         error_setg(errp, "event notifier failed to init with %d", ret);
2420         goto remove_msg_handler;
2421     }
2422 
2423     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2424     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2425                                         &vmbus->notifier);
2426     if (ret != 0) {
2427         error_setg(errp, "hyperv set event handler failed with %d", ret);
2428         goto clear_event_notifier;
2429     }
2430 
2431     return;
2432 
2433 clear_event_notifier:
2434     event_notifier_cleanup(&vmbus->notifier);
2435 remove_msg_handler:
2436     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2437 error_out:
2438     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2439 }
2440 
vmbus_unrealize(BusState * bus)2441 static void vmbus_unrealize(BusState *bus)
2442 {
2443     VMBus *vmbus = VMBUS(bus);
2444 
2445     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2446     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2447     event_notifier_cleanup(&vmbus->notifier);
2448 
2449     qemu_mutex_destroy(&vmbus->rx_queue_lock);
2450 }
2451 
vmbus_reset_hold(Object * obj,ResetType type)2452 static void vmbus_reset_hold(Object *obj, ResetType type)
2453 {
2454     vmbus_deinit(VMBUS(obj));
2455 }
2456 
vmbus_get_dev_path(DeviceState * dev)2457 static char *vmbus_get_dev_path(DeviceState *dev)
2458 {
2459     BusState *bus = qdev_get_parent_bus(dev);
2460     return qdev_get_dev_path(bus->parent);
2461 }
2462 
vmbus_get_fw_dev_path(DeviceState * dev)2463 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2464 {
2465     VMBusDevice *vdev = VMBUS_DEVICE(dev);
2466     char uuid[UUID_STR_LEN];
2467 
2468     qemu_uuid_unparse(&vdev->instanceid, uuid);
2469     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2470 }
2471 
vmbus_class_init(ObjectClass * klass,const void * data)2472 static void vmbus_class_init(ObjectClass *klass, const void *data)
2473 {
2474     BusClass *k = BUS_CLASS(klass);
2475     ResettableClass *rc = RESETTABLE_CLASS(klass);
2476 
2477     k->get_dev_path = vmbus_get_dev_path;
2478     k->get_fw_dev_path = vmbus_get_fw_dev_path;
2479     k->realize = vmbus_realize;
2480     k->unrealize = vmbus_unrealize;
2481     rc->phases.hold = vmbus_reset_hold;
2482 }
2483 
vmbus_pre_load(void * opaque)2484 static int vmbus_pre_load(void *opaque)
2485 {
2486     VMBusChannel *chan;
2487     VMBus *vmbus = VMBUS(opaque);
2488 
2489     /*
2490      * channel IDs allocated by the source will come in the migration stream
2491      * for each channel, so clean up the ones allocated at realize
2492      */
2493     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2494         unregister_chan_id(chan);
2495     }
2496 
2497     return 0;
2498 }
vmbus_post_load(void * opaque,int version_id)2499 static int vmbus_post_load(void *opaque, int version_id)
2500 {
2501     int ret;
2502     VMBus *vmbus = VMBUS(opaque);
2503     VMBusGpadl *gpadl;
2504     VMBusChannel *chan;
2505 
2506     ret = vmbus_init(vmbus);
2507     if (ret) {
2508         return ret;
2509     }
2510 
2511     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2512         gpadl->vmbus = vmbus;
2513         gpadl->refcount = 1;
2514     }
2515 
2516     /*
2517      * reopening channels depends on initialized vmbus so it's done here
2518      * instead of channel_post_load()
2519      */
2520     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2521 
2522         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2523             open_channel(chan);
2524         }
2525 
2526         if (chan->state != VMCHAN_OPEN) {
2527             continue;
2528         }
2529 
2530         if (!vmbus_channel_is_open(chan)) {
2531             /* reopen failed, abort loading */
2532             return -1;
2533         }
2534 
2535         /* resume processing on the guest side if it missed the notification */
2536         hyperv_sint_route_set_sint(chan->notify_route);
2537         /* ditto on the host side */
2538         vmbus_channel_notify_host(chan);
2539     }
2540 
2541     vmbus_resched(vmbus);
2542     return 0;
2543 }
2544 
2545 static const VMStateDescription vmstate_post_message_input = {
2546     .name = "vmbus/hyperv_post_message_input",
2547     .version_id = 0,
2548     .minimum_version_id = 0,
2549     .fields = (const VMStateField[]) {
2550         /*
2551          * skip connection_id and message_type as they are validated before
2552          * queueing and ignored on dequeueing
2553          */
2554         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2555         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2556                             HV_MESSAGE_PAYLOAD_SIZE),
2557         VMSTATE_END_OF_LIST()
2558     }
2559 };
2560 
vmbus_rx_queue_needed(void * opaque)2561 static bool vmbus_rx_queue_needed(void *opaque)
2562 {
2563     VMBus *vmbus = VMBUS(opaque);
2564     return vmbus->rx_queue_size;
2565 }
2566 
2567 static const VMStateDescription vmstate_rx_queue = {
2568     .name = "vmbus/rx_queue",
2569     .version_id = 0,
2570     .minimum_version_id = 0,
2571     .needed = vmbus_rx_queue_needed,
2572     .fields = (const VMStateField[]) {
2573         VMSTATE_UINT8(rx_queue_head, VMBus),
2574         VMSTATE_UINT8(rx_queue_size, VMBus),
2575         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2576                              HV_MSG_QUEUE_LEN, 0,
2577                              vmstate_post_message_input,
2578                              struct hyperv_post_message_input),
2579         VMSTATE_END_OF_LIST()
2580     }
2581 };
2582 
2583 static const VMStateDescription vmstate_vmbus = {
2584     .name = TYPE_VMBUS,
2585     .version_id = 0,
2586     .minimum_version_id = 0,
2587     .pre_load = vmbus_pre_load,
2588     .post_load = vmbus_post_load,
2589     .fields = (const VMStateField[]) {
2590         VMSTATE_UINT8(state, VMBus),
2591         VMSTATE_UINT32(version, VMBus),
2592         VMSTATE_UINT32(target_vp, VMBus),
2593         VMSTATE_UINT64(int_page_gpa, VMBus),
2594         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2595                          vmstate_gpadl, VMBusGpadl, link),
2596         VMSTATE_END_OF_LIST()
2597     },
2598     .subsections = (const VMStateDescription * const []) {
2599         &vmstate_rx_queue,
2600         NULL
2601     }
2602 };
2603 
2604 static const TypeInfo vmbus_type_info = {
2605     .name = TYPE_VMBUS,
2606     .parent = TYPE_BUS,
2607     .instance_size = sizeof(VMBus),
2608     .class_init = vmbus_class_init,
2609 };
2610 
vmbus_bridge_realize(DeviceState * dev,Error ** errp)2611 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2612 {
2613     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2614 
2615     /*
2616      * here there's at least one vmbus bridge that is being realized, so
2617      * vmbus_bridge_find can only return NULL if it's not unique
2618      */
2619     if (!vmbus_bridge_find()) {
2620         error_setg(errp, "there can be at most one %s in the system",
2621                    TYPE_VMBUS_BRIDGE);
2622         return;
2623     }
2624 
2625     if (!hyperv_is_synic_enabled()) {
2626         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2627         return;
2628     }
2629 
2630     if (!hyperv_are_vmbus_recommended_features_enabled()) {
2631         warn_report("VMBus enabled without the recommended set of Hyper-V features: "
2632                     "hv-stimer, hv-vapic and hv-runtime. "
2633                     "Some Windows versions might not boot or enable the VMBus device");
2634     }
2635 
2636     bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2637 }
2638 
vmbus_bridge_ofw_unit_address(const SysBusDevice * dev)2639 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2640 {
2641     /* there can be only one VMBus */
2642     return g_strdup("0");
2643 }
2644 
2645 static const VMStateDescription vmstate_vmbus_bridge = {
2646     .name = TYPE_VMBUS_BRIDGE,
2647     .version_id = 0,
2648     .minimum_version_id = 0,
2649     .fields = (const VMStateField[]) {
2650         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2651         VMSTATE_END_OF_LIST()
2652     },
2653 };
2654 
2655 static const Property vmbus_bridge_props[] = {
2656     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2657 };
2658 
vmbus_bridge_class_init(ObjectClass * klass,const void * data)2659 static void vmbus_bridge_class_init(ObjectClass *klass, const void *data)
2660 {
2661     DeviceClass *k = DEVICE_CLASS(klass);
2662     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2663 
2664     k->realize = vmbus_bridge_realize;
2665     k->fw_name = "vmbus";
2666     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2667     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2668     k->vmsd = &vmstate_vmbus_bridge;
2669     device_class_set_props(k, vmbus_bridge_props);
2670     /* override SysBusDevice's default */
2671     k->user_creatable = true;
2672 }
2673 
2674 static const TypeInfo vmbus_bridge_type_info = {
2675     .name = TYPE_VMBUS_BRIDGE,
2676     .parent = TYPE_SYS_BUS_DEVICE,
2677     .instance_size = sizeof(VMBusBridge),
2678     .class_init = vmbus_bridge_class_init,
2679 };
2680 
vmbus_register_types(void)2681 static void vmbus_register_types(void)
2682 {
2683     type_register_static(&vmbus_bridge_type_info);
2684     type_register_static(&vmbus_dev_type_info);
2685     type_register_static(&vmbus_type_info);
2686 }
2687 
2688 type_init(vmbus_register_types)
2689