1 /*
2 * QEMU Hyper-V VMBus
3 *
4 * Copyright (c) 2017-2018 Virtuozzo International GmbH.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10 #include "qemu/osdep.h"
11 #include "qemu/error-report.h"
12 #include "qemu/main-loop.h"
13 #include "exec/target_page.h"
14 #include "qapi/error.h"
15 #include "migration/vmstate.h"
16 #include "hw/qdev-properties.h"
17 #include "hw/qdev-properties-system.h"
18 #include "hw/hyperv/hyperv.h"
19 #include "hw/hyperv/vmbus.h"
20 #include "hw/hyperv/vmbus-bridge.h"
21 #include "hw/sysbus.h"
22 #include "exec/target_page.h"
23 #include "trace.h"
24
25 enum {
26 VMGPADL_INIT,
27 VMGPADL_ALIVE,
28 VMGPADL_TEARINGDOWN,
29 VMGPADL_TORNDOWN,
30 };
31
32 struct VMBusGpadl {
33 /* GPADL id */
34 uint32_t id;
35 /* associated channel id (rudimentary?) */
36 uint32_t child_relid;
37
38 /* number of pages in the GPADL as declared in GPADL_HEADER message */
39 uint32_t num_gfns;
40 /*
41 * Due to limited message size, GPADL may not fit fully in a single
42 * GPADL_HEADER message, and is further popluated using GPADL_BODY
43 * messages. @seen_gfns is the number of pages seen so far; once it
44 * reaches @num_gfns, the GPADL is ready to use.
45 */
46 uint32_t seen_gfns;
47 /* array of GFNs (of size @num_gfns once allocated) */
48 uint64_t *gfns;
49
50 uint8_t state;
51
52 QTAILQ_ENTRY(VMBusGpadl) link;
53 VMBus *vmbus;
54 unsigned refcount;
55 };
56
57 /*
58 * Wrap sequential read from / write to GPADL.
59 */
60 typedef struct GpadlIter {
61 VMBusGpadl *gpadl;
62 AddressSpace *as;
63 DMADirection dir;
64 /* offset into GPADL where the next i/o will be performed */
65 uint32_t off;
66 /*
67 * Cached mapping of the currently accessed page, up to page boundary.
68 * Updated lazily on i/o.
69 * Note: MemoryRegionCache can not be used here because pages in the GPADL
70 * are non-contiguous and may belong to different memory regions.
71 */
72 void *map;
73 /* offset after last i/o (i.e. not affected by seek) */
74 uint32_t last_off;
75 /*
76 * Indicator that the iterator is active and may have a cached mapping.
77 * Allows to enforce bracketing of all i/o (which may create cached
78 * mappings) and thus exclude mapping leaks.
79 */
80 bool active;
81 } GpadlIter;
82
83 /*
84 * Ring buffer. There are two of them, sitting in the same GPADL, for each
85 * channel.
86 * Each ring buffer consists of a set of pages, with the first page containing
87 * the ring buffer header, and the remaining pages being for data packets.
88 */
89 typedef struct VMBusRingBufCommon {
90 AddressSpace *as;
91 /* GPA of the ring buffer header */
92 dma_addr_t rb_addr;
93 /* start and length of the ring buffer data area within GPADL */
94 uint32_t base;
95 uint32_t len;
96
97 GpadlIter iter;
98 } VMBusRingBufCommon;
99
100 typedef struct VMBusSendRingBuf {
101 VMBusRingBufCommon common;
102 /* current write index, to be committed at the end of send */
103 uint32_t wr_idx;
104 /* write index at the start of send */
105 uint32_t last_wr_idx;
106 /* space to be requested from the guest */
107 uint32_t wanted;
108 /* space reserved for planned sends */
109 uint32_t reserved;
110 /* last seen read index */
111 uint32_t last_seen_rd_idx;
112 } VMBusSendRingBuf;
113
114 typedef struct VMBusRecvRingBuf {
115 VMBusRingBufCommon common;
116 /* current read index, to be committed at the end of receive */
117 uint32_t rd_idx;
118 /* read index at the start of receive */
119 uint32_t last_rd_idx;
120 /* last seen write index */
121 uint32_t last_seen_wr_idx;
122 } VMBusRecvRingBuf;
123
124
125 enum {
126 VMOFFER_INIT,
127 VMOFFER_SENDING,
128 VMOFFER_SENT,
129 };
130
131 enum {
132 VMCHAN_INIT,
133 VMCHAN_OPENING,
134 VMCHAN_OPEN,
135 };
136
137 struct VMBusChannel {
138 VMBusDevice *dev;
139
140 /* channel id */
141 uint32_t id;
142 /*
143 * subchannel index within the device; subchannel #0 is "primary" and
144 * always exists
145 */
146 uint16_t subchan_idx;
147 uint32_t open_id;
148 /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
149 uint32_t target_vp;
150 /* GPADL id to use for the ring buffers */
151 uint32_t ringbuf_gpadl;
152 /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
153 uint32_t ringbuf_send_offset;
154
155 uint8_t offer_state;
156 uint8_t state;
157 bool is_open;
158
159 /* main device worker; copied from the device class */
160 VMBusChannelNotifyCb notify_cb;
161 /*
162 * guest->host notifications, either sent directly or dispatched via
163 * interrupt page (older VMBus)
164 */
165 EventNotifier notifier;
166
167 VMBus *vmbus;
168 /*
169 * SINT route to signal with host->guest notifications; may be shared with
170 * the main VMBus SINT route
171 */
172 HvSintRoute *notify_route;
173 VMBusGpadl *gpadl;
174
175 VMBusSendRingBuf send_ringbuf;
176 VMBusRecvRingBuf recv_ringbuf;
177
178 QTAILQ_ENTRY(VMBusChannel) link;
179 };
180
181 /*
182 * Hyper-V spec mandates that every message port has 16 buffers, which means
183 * that the guest can post up to this many messages without blocking.
184 * Therefore a queue for incoming messages has to be provided.
185 * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
186 * doesn't transition to a new state until the message is known to have been
187 * successfully delivered to the respective SynIC message slot.
188 */
189 #define HV_MSG_QUEUE_LEN 16
190
191 /* Hyper-V devices never use channel #0. Must be something special. */
192 #define VMBUS_FIRST_CHANID 1
193 /* Each channel occupies one bit within a single event page sint slot. */
194 #define VMBUS_CHANID_COUNT (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
195 /* Leave a few connection numbers for other purposes. */
196 #define VMBUS_CHAN_CONNECTION_OFFSET 16
197
198 /*
199 * Since the success or failure of sending a message is reported
200 * asynchronously, the VMBus state machine has effectively two entry points:
201 * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
202 * message delivery status becomes known). Both are run as oneshot BHs on the
203 * main aio context, ensuring serialization.
204 */
205 enum {
206 VMBUS_LISTEN,
207 VMBUS_HANDSHAKE,
208 VMBUS_OFFER,
209 VMBUS_CREATE_GPADL,
210 VMBUS_TEARDOWN_GPADL,
211 VMBUS_OPEN_CHANNEL,
212 VMBUS_UNLOAD,
213 VMBUS_STATE_MAX
214 };
215
216 struct VMBus {
217 BusState parent;
218
219 uint8_t state;
220 /* protection against recursive aio_poll (see vmbus_run) */
221 bool in_progress;
222 /* whether there's a message being delivered to the guest */
223 bool msg_in_progress;
224 uint32_t version;
225 /* VP_INDEX of the vCPU to send messages and interrupts to */
226 uint32_t target_vp;
227 HvSintRoute *sint_route;
228 /*
229 * interrupt page for older protocol versions; newer ones use SynIC event
230 * flags directly
231 */
232 hwaddr int_page_gpa;
233
234 DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
235
236 /* incoming message queue */
237 struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
238 uint8_t rx_queue_head;
239 uint8_t rx_queue_size;
240 QemuMutex rx_queue_lock;
241
242 QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
243 QTAILQ_HEAD(, VMBusChannel) channel_list;
244
245 /*
246 * guest->host notifications for older VMBus, to be dispatched via
247 * interrupt page
248 */
249 EventNotifier notifier;
250 };
251
gpadl_full(VMBusGpadl * gpadl)252 static bool gpadl_full(VMBusGpadl *gpadl)
253 {
254 return gpadl->seen_gfns == gpadl->num_gfns;
255 }
256
create_gpadl(VMBus * vmbus,uint32_t id,uint32_t child_relid,uint32_t num_gfns)257 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
258 uint32_t child_relid, uint32_t num_gfns)
259 {
260 VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
261
262 gpadl->id = id;
263 gpadl->child_relid = child_relid;
264 gpadl->num_gfns = num_gfns;
265 gpadl->gfns = g_new(uint64_t, num_gfns);
266 QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
267 gpadl->vmbus = vmbus;
268 gpadl->refcount = 1;
269 return gpadl;
270 }
271
free_gpadl(VMBusGpadl * gpadl)272 static void free_gpadl(VMBusGpadl *gpadl)
273 {
274 QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
275 g_free(gpadl->gfns);
276 g_free(gpadl);
277 }
278
find_gpadl(VMBus * vmbus,uint32_t gpadl_id)279 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
280 {
281 VMBusGpadl *gpadl;
282 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
283 if (gpadl->id == gpadl_id) {
284 return gpadl;
285 }
286 }
287 return NULL;
288 }
289
vmbus_get_gpadl(VMBusChannel * chan,uint32_t gpadl_id)290 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
291 {
292 VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
293 if (!gpadl || !gpadl_full(gpadl)) {
294 return NULL;
295 }
296 gpadl->refcount++;
297 return gpadl;
298 }
299
vmbus_put_gpadl(VMBusGpadl * gpadl)300 void vmbus_put_gpadl(VMBusGpadl *gpadl)
301 {
302 if (!gpadl) {
303 return;
304 }
305 if (--gpadl->refcount) {
306 return;
307 }
308 free_gpadl(gpadl);
309 }
310
vmbus_gpadl_len(VMBusGpadl * gpadl)311 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
312 {
313 return gpadl->num_gfns * TARGET_PAGE_SIZE;
314 }
315
gpadl_iter_init(GpadlIter * iter,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir)316 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
317 AddressSpace *as, DMADirection dir)
318 {
319 iter->gpadl = gpadl;
320 iter->as = as;
321 iter->dir = dir;
322 iter->active = false;
323 }
324
gpadl_iter_cache_unmap(GpadlIter * iter)325 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
326 {
327 uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
328 uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
329
330 /* mapping is only done to do non-zero amount of i/o */
331 assert(iter->last_off > 0);
332 assert(map_start_in_page < io_end_in_page);
333
334 dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
335 iter->dir, io_end_in_page - map_start_in_page);
336 }
337
338 /*
339 * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
340 * The direction of the copy is determined by @iter->dir.
341 * The caller must ensure the operation overflows neither @buf nor the GPADL
342 * (there's an assert for the latter).
343 * Reuse the currently mapped page in the GPADL if possible.
344 */
gpadl_iter_io(GpadlIter * iter,void * buf,uint32_t len)345 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
346 {
347 ssize_t ret = len;
348
349 assert(iter->active);
350
351 while (len) {
352 uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
353 uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
354 uint32_t cplen = MIN(pgleft, len);
355 void *p;
356
357 /* try to reuse the cached mapping */
358 if (iter->map) {
359 uint32_t map_start_in_page =
360 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
361 uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
362 uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
363 if (off_base != mapped_base || off_in_page < map_start_in_page) {
364 gpadl_iter_cache_unmap(iter);
365 iter->map = NULL;
366 }
367 }
368
369 if (!iter->map) {
370 dma_addr_t maddr;
371 dma_addr_t mlen = pgleft;
372 uint32_t idx = iter->off >> TARGET_PAGE_BITS;
373 assert(idx < iter->gpadl->num_gfns);
374
375 maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
376
377 iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
378 MEMTXATTRS_UNSPECIFIED);
379 if (mlen != pgleft) {
380 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
381 iter->map = NULL;
382 return -EFAULT;
383 }
384 }
385
386 p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
387 off_in_page);
388 if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
389 memcpy(p, buf, cplen);
390 } else {
391 memcpy(buf, p, cplen);
392 }
393
394 buf += cplen;
395 len -= cplen;
396 iter->off += cplen;
397 iter->last_off = iter->off;
398 }
399
400 return ret;
401 }
402
403 /*
404 * Position the iterator @iter at new offset @new_off.
405 * If this results in the cached mapping being unusable with the new offset,
406 * unmap it.
407 */
gpadl_iter_seek(GpadlIter * iter,uint32_t new_off)408 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
409 {
410 assert(iter->active);
411 iter->off = new_off;
412 }
413
414 /*
415 * Start a series of i/o on the GPADL.
416 * After this i/o and seek operations on @iter become legal.
417 */
gpadl_iter_start_io(GpadlIter * iter)418 static inline void gpadl_iter_start_io(GpadlIter *iter)
419 {
420 assert(!iter->active);
421 /* mapping is cached lazily on i/o */
422 iter->map = NULL;
423 iter->active = true;
424 }
425
426 /*
427 * End the eariler started series of i/o on the GPADL and release the cached
428 * mapping if any.
429 */
gpadl_iter_end_io(GpadlIter * iter)430 static inline void gpadl_iter_end_io(GpadlIter *iter)
431 {
432 assert(iter->active);
433
434 if (iter->map) {
435 gpadl_iter_cache_unmap(iter);
436 }
437
438 iter->active = false;
439 }
440
441 static void vmbus_resched(VMBus *vmbus);
442 static void vmbus_msg_cb(void *data, int status);
443
vmbus_iov_to_gpadl(VMBusChannel * chan,VMBusGpadl * gpadl,uint32_t off,const struct iovec * iov,size_t iov_cnt)444 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
445 const struct iovec *iov, size_t iov_cnt)
446 {
447 GpadlIter iter;
448 size_t i;
449 ssize_t ret = 0;
450
451 gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
452 DMA_DIRECTION_FROM_DEVICE);
453 gpadl_iter_start_io(&iter);
454 gpadl_iter_seek(&iter, off);
455 for (i = 0; i < iov_cnt; i++) {
456 ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
457 if (ret < 0) {
458 goto out;
459 }
460 }
461 out:
462 gpadl_iter_end_io(&iter);
463 return ret;
464 }
465
vmbus_map_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t len,size_t off)466 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
467 unsigned iov_cnt, size_t len, size_t off)
468 {
469 int ret_cnt = 0, ret;
470 unsigned i;
471 QEMUSGList *sgl = &req->sgl;
472 ScatterGatherEntry *sg = sgl->sg;
473
474 for (i = 0; i < sgl->nsg; i++) {
475 if (sg[i].len > off) {
476 break;
477 }
478 off -= sg[i].len;
479 }
480 for (; len && i < sgl->nsg; i++) {
481 dma_addr_t mlen = MIN(sg[i].len - off, len);
482 dma_addr_t addr = sg[i].base + off;
483 len -= mlen;
484 off = 0;
485
486 for (; mlen; ret_cnt++) {
487 dma_addr_t l = mlen;
488 dma_addr_t a = addr;
489
490 if (ret_cnt == iov_cnt) {
491 ret = -ENOBUFS;
492 goto err;
493 }
494
495 iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
496 MEMTXATTRS_UNSPECIFIED);
497 if (!l) {
498 ret = -EFAULT;
499 goto err;
500 }
501 iov[ret_cnt].iov_len = l;
502 addr += l;
503 mlen -= l;
504 }
505 }
506
507 return ret_cnt;
508 err:
509 vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
510 return ret;
511 }
512
vmbus_unmap_sgl(VMBusChanReq * req,DMADirection dir,struct iovec * iov,unsigned iov_cnt,size_t accessed)513 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
514 unsigned iov_cnt, size_t accessed)
515 {
516 QEMUSGList *sgl = &req->sgl;
517 unsigned i;
518
519 for (i = 0; i < iov_cnt; i++) {
520 size_t acsd = MIN(accessed, iov[i].iov_len);
521 dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
522 accessed -= acsd;
523 }
524 }
525
526 static const VMStateDescription vmstate_gpadl = {
527 .name = "vmbus/gpadl",
528 .version_id = 0,
529 .minimum_version_id = 0,
530 .fields = (const VMStateField[]) {
531 VMSTATE_UINT32(id, VMBusGpadl),
532 VMSTATE_UINT32(child_relid, VMBusGpadl),
533 VMSTATE_UINT32(num_gfns, VMBusGpadl),
534 VMSTATE_UINT32(seen_gfns, VMBusGpadl),
535 VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
536 vmstate_info_uint64, uint64_t),
537 VMSTATE_UINT8(state, VMBusGpadl),
538 VMSTATE_END_OF_LIST()
539 }
540 };
541
542 /*
543 * Wrap the index into a ring buffer of @len bytes.
544 * @idx is assumed not to exceed twice the size of the ringbuffer, so only
545 * single wraparound is considered.
546 */
rb_idx_wrap(uint32_t idx,uint32_t len)547 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
548 {
549 if (idx >= len) {
550 idx -= len;
551 }
552 return idx;
553 }
554
555 /*
556 * Circular difference between two indices into a ring buffer of @len bytes.
557 * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
558 * up write index but not vice versa.
559 */
rb_idx_delta(uint32_t idx1,uint32_t idx2,uint32_t len,bool allow_catchup)560 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
561 bool allow_catchup)
562 {
563 return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
564 }
565
ringbuf_map_hdr(VMBusRingBufCommon * ringbuf)566 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
567 {
568 vmbus_ring_buffer *rb;
569 dma_addr_t mlen = sizeof(*rb);
570
571 rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
572 DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
573 if (mlen != sizeof(*rb)) {
574 dma_memory_unmap(ringbuf->as, rb, mlen,
575 DMA_DIRECTION_FROM_DEVICE, 0);
576 return NULL;
577 }
578 return rb;
579 }
580
ringbuf_unmap_hdr(VMBusRingBufCommon * ringbuf,vmbus_ring_buffer * rb,bool dirty)581 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
582 vmbus_ring_buffer *rb, bool dirty)
583 {
584 assert(rb);
585
586 dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
587 dirty ? sizeof(*rb) : 0);
588 }
589
ringbuf_init_common(VMBusRingBufCommon * ringbuf,VMBusGpadl * gpadl,AddressSpace * as,DMADirection dir,uint32_t begin,uint32_t end)590 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
591 AddressSpace *as, DMADirection dir,
592 uint32_t begin, uint32_t end)
593 {
594 ringbuf->as = as;
595 ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
596 ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
597 ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
598 gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
599 }
600
ringbufs_init(VMBusChannel * chan)601 static int ringbufs_init(VMBusChannel *chan)
602 {
603 vmbus_ring_buffer *rb;
604 VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
605 VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
606
607 if (chan->ringbuf_send_offset <= 1 ||
608 chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
609 return -EINVAL;
610 }
611
612 ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
613 DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
614 ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
615 DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
616 chan->gpadl->num_gfns);
617 send_ringbuf->wanted = 0;
618 send_ringbuf->reserved = 0;
619
620 rb = ringbuf_map_hdr(&recv_ringbuf->common);
621 if (!rb) {
622 return -EFAULT;
623 }
624 recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
625 ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
626
627 rb = ringbuf_map_hdr(&send_ringbuf->common);
628 if (!rb) {
629 return -EFAULT;
630 }
631 send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
632 send_ringbuf->last_seen_rd_idx = rb->read_index;
633 rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
634 ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
635
636 if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
637 send_ringbuf->wr_idx >= send_ringbuf->common.len) {
638 return -EOVERFLOW;
639 }
640
641 return 0;
642 }
643
644 /*
645 * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
646 * around if needed.
647 * @len is assumed not to exceed the size of the ringbuffer, so only single
648 * wraparound is considered.
649 */
ringbuf_io(VMBusRingBufCommon * ringbuf,void * buf,uint32_t len)650 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
651 {
652 ssize_t ret1 = 0, ret2 = 0;
653 uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
654
655 if (len >= remain) {
656 ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
657 if (ret1 < 0) {
658 return ret1;
659 }
660 gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
661 buf += remain;
662 len -= remain;
663 }
664 ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
665 if (ret2 < 0) {
666 return ret2;
667 }
668 return ret1 + ret2;
669 }
670
671 /*
672 * Position the circular iterator within @ringbuf to offset @new_off, wrapping
673 * around if needed.
674 * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
675 * single wraparound is considered.
676 */
ringbuf_seek(VMBusRingBufCommon * ringbuf,uint32_t new_off)677 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
678 {
679 gpadl_iter_seek(&ringbuf->iter,
680 ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
681 }
682
ringbuf_tell(VMBusRingBufCommon * ringbuf)683 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
684 {
685 return ringbuf->iter.off - ringbuf->base;
686 }
687
ringbuf_start_io(VMBusRingBufCommon * ringbuf)688 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
689 {
690 gpadl_iter_start_io(&ringbuf->iter);
691 }
692
ringbuf_end_io(VMBusRingBufCommon * ringbuf)693 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
694 {
695 gpadl_iter_end_io(&ringbuf->iter);
696 }
697
vmbus_channel_device(VMBusChannel * chan)698 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
699 {
700 return chan->dev;
701 }
702
vmbus_device_channel(VMBusDevice * dev,uint32_t chan_idx)703 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
704 {
705 if (chan_idx >= dev->num_channels) {
706 return NULL;
707 }
708 return &dev->channels[chan_idx];
709 }
710
vmbus_channel_idx(VMBusChannel * chan)711 uint32_t vmbus_channel_idx(VMBusChannel *chan)
712 {
713 return chan - chan->dev->channels;
714 }
715
vmbus_channel_notify_host(VMBusChannel * chan)716 void vmbus_channel_notify_host(VMBusChannel *chan)
717 {
718 event_notifier_set(&chan->notifier);
719 }
720
vmbus_channel_is_open(VMBusChannel * chan)721 bool vmbus_channel_is_open(VMBusChannel *chan)
722 {
723 return chan->is_open;
724 }
725
726 /*
727 * Notify the guest side about the data to work on in the channel ring buffer.
728 * The notification is done by signaling a dedicated per-channel SynIC event
729 * flag (more recent guests) or setting a bit in the interrupt page and firing
730 * the VMBus SINT (older guests).
731 */
vmbus_channel_notify_guest(VMBusChannel * chan)732 static int vmbus_channel_notify_guest(VMBusChannel *chan)
733 {
734 int res = 0;
735 unsigned long *int_map, mask;
736 unsigned idx;
737 hwaddr addr = chan->vmbus->int_page_gpa;
738 hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
739
740 trace_vmbus_channel_notify_guest(chan->id);
741
742 if (!addr) {
743 return hyperv_set_event_flag(chan->notify_route, chan->id);
744 }
745
746 int_map = cpu_physical_memory_map(addr, &len, 1);
747 if (len != TARGET_PAGE_SIZE / 2) {
748 res = -ENXIO;
749 goto unmap;
750 }
751
752 idx = BIT_WORD(chan->id);
753 mask = BIT_MASK(chan->id);
754 if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
755 res = hyperv_sint_route_set_sint(chan->notify_route);
756 dirty = len;
757 }
758
759 unmap:
760 cpu_physical_memory_unmap(int_map, len, 1, dirty);
761 return res;
762 }
763
764 #define VMBUS_PKT_TRAILER sizeof(uint64_t)
765
vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr * hdr,uint32_t desclen,uint32_t msglen)766 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
767 uint32_t desclen, uint32_t msglen)
768 {
769 hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
770 DIV_ROUND_UP(desclen, sizeof(uint64_t));
771 hdr->len_qwords = hdr->offset_qwords +
772 DIV_ROUND_UP(msglen, sizeof(uint64_t));
773 return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
774 }
775
776 /*
777 * Simplified ring buffer operation with paired barriers annotations in the
778 * producer and consumer loops:
779 *
780 * producer * consumer
781 * ~~~~~~~~ * ~~~~~~~~
782 * write pending_send_sz * read write_index
783 * smp_mb [A] * smp_mb [C]
784 * read read_index * read packet
785 * smp_mb [B] * read/write out-of-band data
786 * read/write out-of-band data * smp_mb [B]
787 * write packet * write read_index
788 * smp_mb [C] * smp_mb [A]
789 * write write_index * read pending_send_sz
790 * smp_wmb [D] * smp_rmb [D]
791 * write pending_send_sz * read write_index
792 * ... * ...
793 */
794
ringbuf_send_avail(VMBusSendRingBuf * ringbuf)795 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
796 {
797 /* don't trust guest data */
798 if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
799 return 0;
800 }
801 return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
802 ringbuf->common.len, false);
803 }
804
ringbuf_send_update_idx(VMBusChannel * chan)805 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
806 {
807 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
808 vmbus_ring_buffer *rb;
809 uint32_t written;
810
811 written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
812 ringbuf->common.len, true);
813 if (!written) {
814 return 0;
815 }
816
817 rb = ringbuf_map_hdr(&ringbuf->common);
818 if (!rb) {
819 return -EFAULT;
820 }
821
822 ringbuf->reserved -= written;
823
824 /* prevent reorder with the data operation and packet write */
825 smp_mb(); /* barrier pair [C] */
826 rb->write_index = ringbuf->wr_idx;
827
828 /*
829 * If the producer earlier indicated that it wants to be notified when the
830 * consumer frees certain amount of space in the ring buffer, that amount
831 * is reduced by the size of the completed write.
832 */
833 if (ringbuf->wanted) {
834 /* otherwise reservation would fail */
835 assert(ringbuf->wanted < written);
836 ringbuf->wanted -= written;
837 /* prevent reorder with write_index write */
838 smp_wmb(); /* barrier pair [D] */
839 rb->pending_send_sz = ringbuf->wanted;
840 }
841
842 /* prevent reorder with write_index or pending_send_sz write */
843 smp_mb(); /* barrier pair [A] */
844 ringbuf->last_seen_rd_idx = rb->read_index;
845
846 /*
847 * The consumer may have missed the reduction of pending_send_sz and skip
848 * notification, so re-check the blocking condition, and, if it's no longer
849 * true, ensure processing another iteration by simulating consumer's
850 * notification.
851 */
852 if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
853 vmbus_channel_notify_host(chan);
854 }
855
856 /* skip notification by consumer's request */
857 if (rb->interrupt_mask) {
858 goto out;
859 }
860
861 /*
862 * The consumer hasn't caught up with the producer's previous state so it's
863 * not blocked.
864 * (last_seen_rd_idx comes from the guest but it's safe to use w/o
865 * validation here as it only affects notification.)
866 */
867 if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
868 ringbuf->common.len, true) > written) {
869 goto out;
870 }
871
872 vmbus_channel_notify_guest(chan);
873 out:
874 ringbuf_unmap_hdr(&ringbuf->common, rb, true);
875 ringbuf->last_wr_idx = ringbuf->wr_idx;
876 return written;
877 }
878
vmbus_channel_reserve(VMBusChannel * chan,uint32_t desclen,uint32_t msglen)879 int vmbus_channel_reserve(VMBusChannel *chan,
880 uint32_t desclen, uint32_t msglen)
881 {
882 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
883 vmbus_ring_buffer *rb = NULL;
884 vmbus_packet_hdr hdr;
885 uint32_t needed = ringbuf->reserved +
886 vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
887
888 /* avoid touching the guest memory if possible */
889 if (likely(needed <= ringbuf_send_avail(ringbuf))) {
890 goto success;
891 }
892
893 rb = ringbuf_map_hdr(&ringbuf->common);
894 if (!rb) {
895 return -EFAULT;
896 }
897
898 /* fetch read index from guest memory and try again */
899 ringbuf->last_seen_rd_idx = rb->read_index;
900
901 if (likely(needed <= ringbuf_send_avail(ringbuf))) {
902 goto success;
903 }
904
905 rb->pending_send_sz = needed;
906
907 /*
908 * The consumer may have made progress and freed up some space before
909 * seeing updated pending_send_sz, so re-read read_index (preventing
910 * reorder with the pending_send_sz write) and try again.
911 */
912 smp_mb(); /* barrier pair [A] */
913 ringbuf->last_seen_rd_idx = rb->read_index;
914
915 if (needed > ringbuf_send_avail(ringbuf)) {
916 goto out;
917 }
918
919 success:
920 ringbuf->reserved = needed;
921 needed = 0;
922
923 /* clear pending_send_sz if it was set */
924 if (ringbuf->wanted) {
925 if (!rb) {
926 rb = ringbuf_map_hdr(&ringbuf->common);
927 if (!rb) {
928 /* failure to clear pending_send_sz is non-fatal */
929 goto out;
930 }
931 }
932
933 rb->pending_send_sz = 0;
934 }
935
936 /* prevent reorder of the following data operation with read_index read */
937 smp_mb(); /* barrier pair [B] */
938
939 out:
940 if (rb) {
941 ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
942 }
943 ringbuf->wanted = needed;
944 return needed ? -ENOSPC : 0;
945 }
946
vmbus_channel_send(VMBusChannel * chan,uint16_t pkt_type,void * desc,uint32_t desclen,void * msg,uint32_t msglen,bool need_comp,uint64_t transaction_id)947 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
948 void *desc, uint32_t desclen,
949 void *msg, uint32_t msglen,
950 bool need_comp, uint64_t transaction_id)
951 {
952 ssize_t ret = 0;
953 vmbus_packet_hdr hdr;
954 uint32_t totlen;
955 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
956
957 if (!vmbus_channel_is_open(chan)) {
958 return -EINVAL;
959 }
960
961 totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
962 hdr.type = pkt_type;
963 hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
964 hdr.transaction_id = transaction_id;
965
966 assert(totlen <= ringbuf->reserved);
967
968 ringbuf_start_io(&ringbuf->common);
969 ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
970 ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
971 if (ret < 0) {
972 goto out;
973 }
974 if (desclen) {
975 assert(desc);
976 ret = ringbuf_io(&ringbuf->common, desc, desclen);
977 if (ret < 0) {
978 goto out;
979 }
980 ringbuf_seek(&ringbuf->common,
981 ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
982 }
983 ret = ringbuf_io(&ringbuf->common, msg, msglen);
984 if (ret < 0) {
985 goto out;
986 }
987 ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
988 ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
989 ret = 0;
990 out:
991 ringbuf_end_io(&ringbuf->common);
992 if (ret) {
993 return ret;
994 }
995 return ringbuf_send_update_idx(chan);
996 }
997
vmbus_channel_send_completion(VMBusChanReq * req,void * msg,uint32_t msglen)998 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
999 void *msg, uint32_t msglen)
1000 {
1001 assert(req->need_comp);
1002 return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1003 msg, msglen, false, req->transaction_id);
1004 }
1005
sgl_from_gpa_ranges(QEMUSGList * sgl,VMBusDevice * dev,VMBusRingBufCommon * ringbuf,uint32_t len)1006 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1007 VMBusRingBufCommon *ringbuf, uint32_t len)
1008 {
1009 int ret;
1010 vmbus_pkt_gpa_direct hdr;
1011 hwaddr curaddr = 0;
1012 hwaddr curlen = 0;
1013 int num;
1014
1015 if (len < sizeof(hdr)) {
1016 return -EIO;
1017 }
1018 ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1019 if (ret < 0) {
1020 return ret;
1021 }
1022 len -= sizeof(hdr);
1023
1024 num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1025 if (num < 0) {
1026 return -EIO;
1027 }
1028 qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1029
1030 for (; hdr.rangecount; hdr.rangecount--) {
1031 vmbus_gpa_range range;
1032
1033 if (len < sizeof(range)) {
1034 goto eio;
1035 }
1036 ret = ringbuf_io(ringbuf, &range, sizeof(range));
1037 if (ret < 0) {
1038 goto err;
1039 }
1040 len -= sizeof(range);
1041
1042 if (range.byte_offset & TARGET_PAGE_MASK) {
1043 goto eio;
1044 }
1045
1046 for (; range.byte_count; range.byte_offset = 0) {
1047 uint64_t paddr;
1048 uint32_t plen = MIN(range.byte_count,
1049 TARGET_PAGE_SIZE - range.byte_offset);
1050
1051 if (len < sizeof(uint64_t)) {
1052 goto eio;
1053 }
1054 ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1055 if (ret < 0) {
1056 goto err;
1057 }
1058 len -= sizeof(uint64_t);
1059 paddr <<= TARGET_PAGE_BITS;
1060 paddr |= range.byte_offset;
1061 range.byte_count -= plen;
1062
1063 if (curaddr + curlen == paddr) {
1064 /* consecutive fragments - join */
1065 curlen += plen;
1066 } else {
1067 if (curlen) {
1068 qemu_sglist_add(sgl, curaddr, curlen);
1069 }
1070
1071 curaddr = paddr;
1072 curlen = plen;
1073 }
1074 }
1075 }
1076
1077 if (curlen) {
1078 qemu_sglist_add(sgl, curaddr, curlen);
1079 }
1080
1081 return 0;
1082 eio:
1083 ret = -EIO;
1084 err:
1085 qemu_sglist_destroy(sgl);
1086 return ret;
1087 }
1088
vmbus_alloc_req(VMBusChannel * chan,uint32_t size,uint16_t pkt_type,uint32_t msglen,uint64_t transaction_id,bool need_comp)1089 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1090 uint32_t size, uint16_t pkt_type,
1091 uint32_t msglen, uint64_t transaction_id,
1092 bool need_comp)
1093 {
1094 VMBusChanReq *req;
1095 uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1096 uint32_t totlen = msgoff + msglen;
1097
1098 req = g_malloc0(totlen);
1099 req->chan = chan;
1100 req->pkt_type = pkt_type;
1101 req->msg = (void *)req + msgoff;
1102 req->msglen = msglen;
1103 req->transaction_id = transaction_id;
1104 req->need_comp = need_comp;
1105 return req;
1106 }
1107
vmbus_channel_recv_start(VMBusChannel * chan)1108 int vmbus_channel_recv_start(VMBusChannel *chan)
1109 {
1110 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1111 vmbus_ring_buffer *rb;
1112
1113 rb = ringbuf_map_hdr(&ringbuf->common);
1114 if (!rb) {
1115 return -EFAULT;
1116 }
1117 ringbuf->last_seen_wr_idx = rb->write_index;
1118 ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1119
1120 if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1121 return -EOVERFLOW;
1122 }
1123
1124 /* prevent reorder of the following data operation with write_index read */
1125 smp_mb(); /* barrier pair [C] */
1126 return 0;
1127 }
1128
vmbus_channel_recv_peek(VMBusChannel * chan,uint32_t size)1129 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1130 {
1131 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1132 vmbus_packet_hdr hdr = {};
1133 VMBusChanReq *req;
1134 uint32_t avail;
1135 uint32_t totlen, pktlen, msglen, msgoff, desclen;
1136
1137 assert(size >= sizeof(*req));
1138
1139 /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1140 avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1141 ringbuf->common.len, true);
1142 if (avail < sizeof(hdr)) {
1143 return NULL;
1144 }
1145
1146 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1147 if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1148 return NULL;
1149 }
1150
1151 pktlen = hdr.len_qwords * sizeof(uint64_t);
1152 totlen = pktlen + VMBUS_PKT_TRAILER;
1153 if (totlen > avail) {
1154 return NULL;
1155 }
1156
1157 msgoff = hdr.offset_qwords * sizeof(uint64_t);
1158 if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1159 error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1160 return NULL;
1161 }
1162
1163 msglen = pktlen - msgoff;
1164
1165 req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1166 hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1167
1168 switch (hdr.type) {
1169 case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1170 desclen = msgoff - sizeof(hdr);
1171 if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1172 desclen) < 0) {
1173 error_report("%s: failed to convert GPA ranges to SGL", __func__);
1174 goto free_req;
1175 }
1176 break;
1177 case VMBUS_PACKET_DATA_INBAND:
1178 case VMBUS_PACKET_COMP:
1179 break;
1180 default:
1181 error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1182 goto free_req;
1183 }
1184
1185 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1186 if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1187 goto free_req;
1188 }
1189 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1190
1191 return req;
1192 free_req:
1193 vmbus_free_req(req);
1194 return NULL;
1195 }
1196
vmbus_channel_recv_pop(VMBusChannel * chan)1197 void vmbus_channel_recv_pop(VMBusChannel *chan)
1198 {
1199 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1200 ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1201 }
1202
vmbus_channel_recv_done(VMBusChannel * chan)1203 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1204 {
1205 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1206 vmbus_ring_buffer *rb;
1207 uint32_t read;
1208
1209 read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1210 ringbuf->common.len, true);
1211 if (!read) {
1212 return 0;
1213 }
1214
1215 rb = ringbuf_map_hdr(&ringbuf->common);
1216 if (!rb) {
1217 return -EFAULT;
1218 }
1219
1220 /* prevent reorder with the data operation and packet read */
1221 smp_mb(); /* barrier pair [B] */
1222 rb->read_index = ringbuf->rd_idx;
1223
1224 /* prevent reorder of the following pending_send_sz read */
1225 smp_mb(); /* barrier pair [A] */
1226
1227 if (rb->interrupt_mask) {
1228 goto out;
1229 }
1230
1231 if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1232 uint32_t wr_idx, wr_avail;
1233 uint32_t wanted = rb->pending_send_sz;
1234
1235 if (!wanted) {
1236 goto out;
1237 }
1238
1239 /* prevent reorder with pending_send_sz read */
1240 smp_rmb(); /* barrier pair [D] */
1241 wr_idx = rb->write_index;
1242
1243 wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1244 true);
1245
1246 /* the producer wasn't blocked on the consumer state */
1247 if (wr_avail >= read + wanted) {
1248 goto out;
1249 }
1250 /* there's not enough space for the producer to make progress */
1251 if (wr_avail < wanted) {
1252 goto out;
1253 }
1254 }
1255
1256 vmbus_channel_notify_guest(chan);
1257 out:
1258 ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1259 ringbuf->last_rd_idx = ringbuf->rd_idx;
1260 return read;
1261 }
1262
vmbus_free_req(void * req)1263 void vmbus_free_req(void *req)
1264 {
1265 VMBusChanReq *r = req;
1266
1267 if (!req) {
1268 return;
1269 }
1270
1271 if (r->sgl.dev) {
1272 qemu_sglist_destroy(&r->sgl);
1273 }
1274 g_free(req);
1275 }
1276
channel_event_cb(EventNotifier * e)1277 static void channel_event_cb(EventNotifier *e)
1278 {
1279 VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1280 if (event_notifier_test_and_clear(e)) {
1281 /*
1282 * All receives are supposed to happen within the device worker, so
1283 * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1284 * potentially reuse the cached mapping throughout the worker.
1285 * Can't do this for sends as they may happen outside the device
1286 * worker.
1287 */
1288 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1289 ringbuf_start_io(&ringbuf->common);
1290 chan->notify_cb(chan);
1291 ringbuf_end_io(&ringbuf->common);
1292
1293 }
1294 }
1295
alloc_chan_id(VMBus * vmbus)1296 static int alloc_chan_id(VMBus *vmbus)
1297 {
1298 int ret;
1299
1300 ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1301 if (ret == VMBUS_CHANID_COUNT) {
1302 return -ENOMEM;
1303 }
1304 return ret + VMBUS_FIRST_CHANID;
1305 }
1306
register_chan_id(VMBusChannel * chan)1307 static int register_chan_id(VMBusChannel *chan)
1308 {
1309 return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1310 chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1311 }
1312
unregister_chan_id(VMBusChannel * chan)1313 static void unregister_chan_id(VMBusChannel *chan)
1314 {
1315 clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1316 }
1317
chan_connection_id(VMBusChannel * chan)1318 static uint32_t chan_connection_id(VMBusChannel *chan)
1319 {
1320 return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1321 }
1322
init_channel(VMBus * vmbus,VMBusDevice * dev,VMBusDeviceClass * vdc,VMBusChannel * chan,uint16_t idx,Error ** errp)1323 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1324 VMBusChannel *chan, uint16_t idx, Error **errp)
1325 {
1326 int res;
1327
1328 chan->dev = dev;
1329 chan->notify_cb = vdc->chan_notify_cb;
1330 chan->subchan_idx = idx;
1331 chan->vmbus = vmbus;
1332
1333 res = alloc_chan_id(vmbus);
1334 if (res < 0) {
1335 error_setg(errp, "no spare channel id");
1336 return;
1337 }
1338 chan->id = res;
1339 register_chan_id(chan);
1340
1341 /*
1342 * The guest drivers depend on the device subchannels (idx #1+) to be
1343 * offered after the primary channel (idx #0) of that device. To ensure
1344 * that, record the channels on the channel list in the order they appear
1345 * within the device.
1346 */
1347 QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1348 }
1349
deinit_channel(VMBusChannel * chan)1350 static void deinit_channel(VMBusChannel *chan)
1351 {
1352 assert(chan->state == VMCHAN_INIT);
1353 QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1354 unregister_chan_id(chan);
1355 }
1356
create_channels(VMBus * vmbus,VMBusDevice * dev,Error ** errp)1357 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1358 {
1359 uint16_t i;
1360 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1361 Error *err = NULL;
1362
1363 dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1364 if (dev->num_channels < 1) {
1365 error_setg(errp, "invalid #channels: %u", dev->num_channels);
1366 return;
1367 }
1368
1369 dev->channels = g_new0(VMBusChannel, dev->num_channels);
1370 for (i = 0; i < dev->num_channels; i++) {
1371 init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1372 if (err) {
1373 goto err_init;
1374 }
1375 }
1376
1377 return;
1378
1379 err_init:
1380 while (i--) {
1381 deinit_channel(&dev->channels[i]);
1382 }
1383 error_propagate(errp, err);
1384 }
1385
free_channels(VMBusDevice * dev)1386 static void free_channels(VMBusDevice *dev)
1387 {
1388 uint16_t i;
1389 for (i = 0; i < dev->num_channels; i++) {
1390 deinit_channel(&dev->channels[i]);
1391 }
1392 g_free(dev->channels);
1393 }
1394
make_sint_route(VMBus * vmbus,uint32_t vp_index)1395 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1396 {
1397 VMBusChannel *chan;
1398
1399 if (vp_index == vmbus->target_vp) {
1400 hyperv_sint_route_ref(vmbus->sint_route);
1401 return vmbus->sint_route;
1402 }
1403
1404 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1405 if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1406 hyperv_sint_route_ref(chan->notify_route);
1407 return chan->notify_route;
1408 }
1409 }
1410
1411 return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1412 }
1413
open_channel(VMBusChannel * chan)1414 static void open_channel(VMBusChannel *chan)
1415 {
1416 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1417
1418 chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1419 if (!chan->gpadl) {
1420 return;
1421 }
1422
1423 if (ringbufs_init(chan)) {
1424 goto put_gpadl;
1425 }
1426
1427 if (event_notifier_init(&chan->notifier, 0)) {
1428 goto put_gpadl;
1429 }
1430
1431 event_notifier_set_handler(&chan->notifier, channel_event_cb);
1432
1433 if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1434 &chan->notifier)) {
1435 goto cleanup_notifier;
1436 }
1437
1438 chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1439 if (!chan->notify_route) {
1440 goto clear_event_flag_handler;
1441 }
1442
1443 if (vdc->open_channel && vdc->open_channel(chan)) {
1444 goto unref_sint_route;
1445 }
1446
1447 chan->is_open = true;
1448 return;
1449
1450 unref_sint_route:
1451 hyperv_sint_route_unref(chan->notify_route);
1452 clear_event_flag_handler:
1453 hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1454 cleanup_notifier:
1455 event_notifier_set_handler(&chan->notifier, NULL);
1456 event_notifier_cleanup(&chan->notifier);
1457 put_gpadl:
1458 vmbus_put_gpadl(chan->gpadl);
1459 }
1460
close_channel(VMBusChannel * chan)1461 static void close_channel(VMBusChannel *chan)
1462 {
1463 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1464
1465 if (!chan->is_open) {
1466 return;
1467 }
1468
1469 if (vdc->close_channel) {
1470 vdc->close_channel(chan);
1471 }
1472
1473 hyperv_sint_route_unref(chan->notify_route);
1474 hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1475 event_notifier_set_handler(&chan->notifier, NULL);
1476 event_notifier_cleanup(&chan->notifier);
1477 vmbus_put_gpadl(chan->gpadl);
1478 chan->is_open = false;
1479 }
1480
channel_post_load(void * opaque,int version_id)1481 static int channel_post_load(void *opaque, int version_id)
1482 {
1483 VMBusChannel *chan = opaque;
1484
1485 return register_chan_id(chan);
1486 }
1487
1488 static const VMStateDescription vmstate_channel = {
1489 .name = "vmbus/channel",
1490 .version_id = 0,
1491 .minimum_version_id = 0,
1492 .post_load = channel_post_load,
1493 .fields = (const VMStateField[]) {
1494 VMSTATE_UINT32(id, VMBusChannel),
1495 VMSTATE_UINT16(subchan_idx, VMBusChannel),
1496 VMSTATE_UINT32(open_id, VMBusChannel),
1497 VMSTATE_UINT32(target_vp, VMBusChannel),
1498 VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1499 VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1500 VMSTATE_UINT8(offer_state, VMBusChannel),
1501 VMSTATE_UINT8(state, VMBusChannel),
1502 VMSTATE_END_OF_LIST()
1503 }
1504 };
1505
find_channel(VMBus * vmbus,uint32_t id)1506 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1507 {
1508 VMBusChannel *chan;
1509 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1510 if (chan->id == id) {
1511 return chan;
1512 }
1513 }
1514 return NULL;
1515 }
1516
enqueue_incoming_message(VMBus * vmbus,const struct hyperv_post_message_input * msg)1517 static int enqueue_incoming_message(VMBus *vmbus,
1518 const struct hyperv_post_message_input *msg)
1519 {
1520 int ret = 0;
1521 uint8_t idx, prev_size;
1522
1523 qemu_mutex_lock(&vmbus->rx_queue_lock);
1524
1525 if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1526 ret = -ENOBUFS;
1527 goto out;
1528 }
1529
1530 prev_size = vmbus->rx_queue_size;
1531 idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1532 memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1533 vmbus->rx_queue_size++;
1534
1535 /* only need to resched if the queue was empty before */
1536 if (!prev_size) {
1537 vmbus_resched(vmbus);
1538 }
1539 out:
1540 qemu_mutex_unlock(&vmbus->rx_queue_lock);
1541 return ret;
1542 }
1543
vmbus_recv_message(const struct hyperv_post_message_input * msg,void * data)1544 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1545 void *data)
1546 {
1547 VMBus *vmbus = data;
1548 struct vmbus_message_header *vmbus_msg;
1549
1550 if (msg->message_type != HV_MESSAGE_VMBUS) {
1551 return HV_STATUS_INVALID_HYPERCALL_INPUT;
1552 }
1553
1554 if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1555 return HV_STATUS_INVALID_HYPERCALL_INPUT;
1556 }
1557
1558 vmbus_msg = (struct vmbus_message_header *)msg->payload;
1559
1560 trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1561
1562 if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1563 vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1564 error_report("vmbus: unknown message type %#x",
1565 vmbus_msg->message_type);
1566 return HV_STATUS_INVALID_HYPERCALL_INPUT;
1567 }
1568
1569 if (enqueue_incoming_message(vmbus, msg)) {
1570 return HV_STATUS_INSUFFICIENT_BUFFERS;
1571 }
1572 return HV_STATUS_SUCCESS;
1573 }
1574
vmbus_initialized(VMBus * vmbus)1575 static bool vmbus_initialized(VMBus *vmbus)
1576 {
1577 return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1578 }
1579
vmbus_reset_all(VMBus * vmbus)1580 static void vmbus_reset_all(VMBus *vmbus)
1581 {
1582 bus_cold_reset(BUS(vmbus));
1583 }
1584
post_msg(VMBus * vmbus,void * msgdata,uint32_t msglen)1585 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1586 {
1587 int ret;
1588 struct hyperv_message msg = {
1589 .header.message_type = HV_MESSAGE_VMBUS,
1590 };
1591
1592 assert(!vmbus->msg_in_progress);
1593 assert(msglen <= sizeof(msg.payload));
1594 assert(msglen >= sizeof(struct vmbus_message_header));
1595
1596 vmbus->msg_in_progress = true;
1597
1598 trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1599 msglen);
1600
1601 memcpy(msg.payload, msgdata, msglen);
1602 msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1603
1604 ret = hyperv_post_msg(vmbus->sint_route, &msg);
1605 if (ret == 0 || ret == -EAGAIN) {
1606 return;
1607 }
1608
1609 error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1610 vmbus_reset_all(vmbus);
1611 }
1612
vmbus_init(VMBus * vmbus)1613 static int vmbus_init(VMBus *vmbus)
1614 {
1615 if (vmbus->target_vp != (uint32_t)-1) {
1616 vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1617 vmbus_msg_cb, vmbus);
1618 if (!vmbus->sint_route) {
1619 error_report("failed to set up SINT route");
1620 return -ENOMEM;
1621 }
1622 }
1623 return 0;
1624 }
1625
vmbus_deinit(VMBus * vmbus)1626 static void vmbus_deinit(VMBus *vmbus)
1627 {
1628 VMBusGpadl *gpadl, *tmp_gpadl;
1629 VMBusChannel *chan;
1630
1631 QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1632 if (gpadl->state == VMGPADL_TORNDOWN) {
1633 continue;
1634 }
1635 vmbus_put_gpadl(gpadl);
1636 }
1637
1638 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1639 chan->offer_state = VMOFFER_INIT;
1640 }
1641
1642 hyperv_sint_route_unref(vmbus->sint_route);
1643 vmbus->sint_route = NULL;
1644 vmbus->int_page_gpa = 0;
1645 vmbus->target_vp = (uint32_t)-1;
1646 vmbus->version = 0;
1647 vmbus->state = VMBUS_LISTEN;
1648 vmbus->msg_in_progress = false;
1649 }
1650
handle_initiate_contact(VMBus * vmbus,vmbus_message_initiate_contact * msg,uint32_t msglen)1651 static void handle_initiate_contact(VMBus *vmbus,
1652 vmbus_message_initiate_contact *msg,
1653 uint32_t msglen)
1654 {
1655 if (msglen < sizeof(*msg)) {
1656 return;
1657 }
1658
1659 trace_vmbus_initiate_contact(msg->version_requested >> 16,
1660 msg->version_requested & 0xffff,
1661 msg->target_vcpu, msg->monitor_page1,
1662 msg->monitor_page2, msg->interrupt_page);
1663
1664 /*
1665 * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1666 * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1667 * before handing over to OS loader.
1668 */
1669 vmbus_reset_all(vmbus);
1670
1671 vmbus->target_vp = msg->target_vcpu;
1672 vmbus->version = msg->version_requested;
1673 if (vmbus->version < VMBUS_VERSION_WIN8) {
1674 /* linux passes interrupt page even when it doesn't need it */
1675 vmbus->int_page_gpa = msg->interrupt_page;
1676 }
1677 vmbus->state = VMBUS_HANDSHAKE;
1678
1679 if (vmbus_init(vmbus)) {
1680 error_report("failed to init vmbus; aborting");
1681 vmbus_deinit(vmbus);
1682 return;
1683 }
1684 }
1685
send_handshake(VMBus * vmbus)1686 static void send_handshake(VMBus *vmbus)
1687 {
1688 struct vmbus_message_version_response msg = {
1689 .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1690 .version_supported = vmbus_initialized(vmbus),
1691 };
1692
1693 post_msg(vmbus, &msg, sizeof(msg));
1694 }
1695
handle_request_offers(VMBus * vmbus,void * msgdata,uint32_t msglen)1696 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1697 {
1698 VMBusChannel *chan;
1699
1700 if (!vmbus_initialized(vmbus)) {
1701 return;
1702 }
1703
1704 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1705 if (chan->offer_state == VMOFFER_INIT) {
1706 chan->offer_state = VMOFFER_SENDING;
1707 break;
1708 }
1709 }
1710
1711 vmbus->state = VMBUS_OFFER;
1712 }
1713
send_offer(VMBus * vmbus)1714 static void send_offer(VMBus *vmbus)
1715 {
1716 VMBusChannel *chan;
1717 struct vmbus_message_header alloffers_msg = {
1718 .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1719 };
1720
1721 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1722 if (chan->offer_state == VMOFFER_SENDING) {
1723 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1724 /* Hyper-V wants LE GUIDs */
1725 QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1726 QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1727 struct vmbus_message_offer_channel msg = {
1728 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1729 .child_relid = chan->id,
1730 .connection_id = chan_connection_id(chan),
1731 .channel_flags = vdc->channel_flags,
1732 .mmio_size_mb = vdc->mmio_size_mb,
1733 .sub_channel_index = vmbus_channel_idx(chan),
1734 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1735 };
1736
1737 memcpy(msg.type_uuid, &classid, sizeof(classid));
1738 memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1739
1740 trace_vmbus_send_offer(chan->id, chan->dev);
1741
1742 post_msg(vmbus, &msg, sizeof(msg));
1743 return;
1744 }
1745 }
1746
1747 /* no more offers, send terminator message */
1748 trace_vmbus_terminate_offers();
1749 post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1750 }
1751
complete_offer(VMBus * vmbus)1752 static bool complete_offer(VMBus *vmbus)
1753 {
1754 VMBusChannel *chan;
1755
1756 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1757 if (chan->offer_state == VMOFFER_SENDING) {
1758 chan->offer_state = VMOFFER_SENT;
1759 goto next_offer;
1760 }
1761 }
1762 /*
1763 * no transitioning channels found so this is completing the terminator
1764 * message, and vmbus can move to the next state
1765 */
1766 return true;
1767
1768 next_offer:
1769 /* try to mark another channel for offering */
1770 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1771 if (chan->offer_state == VMOFFER_INIT) {
1772 chan->offer_state = VMOFFER_SENDING;
1773 break;
1774 }
1775 }
1776 /*
1777 * if an offer has been sent there are more offers or the terminator yet to
1778 * send, so no state transition for vmbus
1779 */
1780 return false;
1781 }
1782
1783
handle_gpadl_header(VMBus * vmbus,vmbus_message_gpadl_header * msg,uint32_t msglen)1784 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1785 uint32_t msglen)
1786 {
1787 VMBusGpadl *gpadl;
1788 uint32_t num_gfns, i;
1789
1790 /* must include at least one gpa range */
1791 if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1792 !vmbus_initialized(vmbus)) {
1793 return;
1794 }
1795
1796 num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1797 sizeof(msg->range[0].pfn_array[0]);
1798
1799 trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1800
1801 /*
1802 * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1803 * ranges each with arbitrary size and alignment. However in practice only
1804 * single-range page-aligned GPADLs have been observed so just ignore
1805 * anything else and simplify things greatly.
1806 */
1807 if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1808 (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1809 return;
1810 }
1811
1812 /* ignore requests to create already existing GPADLs */
1813 if (find_gpadl(vmbus, msg->gpadl_id)) {
1814 return;
1815 }
1816
1817 gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1818
1819 for (i = 0; i < num_gfns &&
1820 (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1821 i++) {
1822 gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1823 }
1824
1825 if (gpadl_full(gpadl)) {
1826 vmbus->state = VMBUS_CREATE_GPADL;
1827 }
1828 }
1829
handle_gpadl_body(VMBus * vmbus,vmbus_message_gpadl_body * msg,uint32_t msglen)1830 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1831 uint32_t msglen)
1832 {
1833 VMBusGpadl *gpadl;
1834 uint32_t num_gfns_left, i;
1835
1836 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1837 return;
1838 }
1839
1840 trace_vmbus_gpadl_body(msg->gpadl_id);
1841
1842 gpadl = find_gpadl(vmbus, msg->gpadl_id);
1843 if (!gpadl) {
1844 return;
1845 }
1846
1847 num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1848 assert(num_gfns_left);
1849
1850 for (i = 0; i < num_gfns_left &&
1851 (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1852 gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1853 }
1854
1855 if (gpadl_full(gpadl)) {
1856 vmbus->state = VMBUS_CREATE_GPADL;
1857 }
1858 }
1859
send_create_gpadl(VMBus * vmbus)1860 static void send_create_gpadl(VMBus *vmbus)
1861 {
1862 VMBusGpadl *gpadl;
1863
1864 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1865 if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1866 struct vmbus_message_gpadl_created msg = {
1867 .header.message_type = VMBUS_MSG_GPADL_CREATED,
1868 .gpadl_id = gpadl->id,
1869 .child_relid = gpadl->child_relid,
1870 };
1871
1872 trace_vmbus_gpadl_created(gpadl->id);
1873 post_msg(vmbus, &msg, sizeof(msg));
1874 return;
1875 }
1876 }
1877
1878 g_assert_not_reached();
1879 }
1880
complete_create_gpadl(VMBus * vmbus)1881 static bool complete_create_gpadl(VMBus *vmbus)
1882 {
1883 VMBusGpadl *gpadl;
1884
1885 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1886 if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1887 gpadl->state = VMGPADL_ALIVE;
1888
1889 return true;
1890 }
1891 }
1892
1893 g_assert_not_reached();
1894 }
1895
handle_gpadl_teardown(VMBus * vmbus,vmbus_message_gpadl_teardown * msg,uint32_t msglen)1896 static void handle_gpadl_teardown(VMBus *vmbus,
1897 vmbus_message_gpadl_teardown *msg,
1898 uint32_t msglen)
1899 {
1900 VMBusGpadl *gpadl;
1901
1902 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1903 return;
1904 }
1905
1906 trace_vmbus_gpadl_teardown(msg->gpadl_id);
1907
1908 gpadl = find_gpadl(vmbus, msg->gpadl_id);
1909 if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
1910 return;
1911 }
1912
1913 gpadl->state = VMGPADL_TEARINGDOWN;
1914 vmbus->state = VMBUS_TEARDOWN_GPADL;
1915 }
1916
send_teardown_gpadl(VMBus * vmbus)1917 static void send_teardown_gpadl(VMBus *vmbus)
1918 {
1919 VMBusGpadl *gpadl;
1920
1921 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1922 if (gpadl->state == VMGPADL_TEARINGDOWN) {
1923 struct vmbus_message_gpadl_torndown msg = {
1924 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
1925 .gpadl_id = gpadl->id,
1926 };
1927
1928 trace_vmbus_gpadl_torndown(gpadl->id);
1929 post_msg(vmbus, &msg, sizeof(msg));
1930 return;
1931 }
1932 }
1933
1934 g_assert_not_reached();
1935 }
1936
complete_teardown_gpadl(VMBus * vmbus)1937 static bool complete_teardown_gpadl(VMBus *vmbus)
1938 {
1939 VMBusGpadl *gpadl;
1940
1941 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1942 if (gpadl->state == VMGPADL_TEARINGDOWN) {
1943 gpadl->state = VMGPADL_TORNDOWN;
1944 vmbus_put_gpadl(gpadl);
1945 return true;
1946 }
1947 }
1948
1949 g_assert_not_reached();
1950 }
1951
handle_open_channel(VMBus * vmbus,vmbus_message_open_channel * msg,uint32_t msglen)1952 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
1953 uint32_t msglen)
1954 {
1955 VMBusChannel *chan;
1956
1957 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1958 return;
1959 }
1960
1961 trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
1962 msg->target_vp);
1963 chan = find_channel(vmbus, msg->child_relid);
1964 if (!chan || chan->state != VMCHAN_INIT) {
1965 return;
1966 }
1967
1968 chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
1969 chan->ringbuf_send_offset = msg->ring_buffer_offset;
1970 chan->target_vp = msg->target_vp;
1971 chan->open_id = msg->open_id;
1972
1973 open_channel(chan);
1974
1975 chan->state = VMCHAN_OPENING;
1976 vmbus->state = VMBUS_OPEN_CHANNEL;
1977 }
1978
send_open_channel(VMBus * vmbus)1979 static void send_open_channel(VMBus *vmbus)
1980 {
1981 VMBusChannel *chan;
1982
1983 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1984 if (chan->state == VMCHAN_OPENING) {
1985 struct vmbus_message_open_result msg = {
1986 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
1987 .child_relid = chan->id,
1988 .open_id = chan->open_id,
1989 .status = !vmbus_channel_is_open(chan),
1990 };
1991
1992 trace_vmbus_channel_open(chan->id, msg.status);
1993 post_msg(vmbus, &msg, sizeof(msg));
1994 return;
1995 }
1996 }
1997
1998 g_assert_not_reached();
1999 }
2000
complete_open_channel(VMBus * vmbus)2001 static bool complete_open_channel(VMBus *vmbus)
2002 {
2003 VMBusChannel *chan;
2004
2005 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2006 if (chan->state == VMCHAN_OPENING) {
2007 if (vmbus_channel_is_open(chan)) {
2008 chan->state = VMCHAN_OPEN;
2009 /*
2010 * simulate guest notification of ringbuffer space made
2011 * available, for the channel protocols where the host
2012 * initiates the communication
2013 */
2014 vmbus_channel_notify_host(chan);
2015 } else {
2016 chan->state = VMCHAN_INIT;
2017 }
2018 return true;
2019 }
2020 }
2021
2022 g_assert_not_reached();
2023 }
2024
vdev_reset_on_close(VMBusDevice * vdev)2025 static void vdev_reset_on_close(VMBusDevice *vdev)
2026 {
2027 uint16_t i;
2028
2029 for (i = 0; i < vdev->num_channels; i++) {
2030 if (vmbus_channel_is_open(&vdev->channels[i])) {
2031 return;
2032 }
2033 }
2034
2035 /* all channels closed -- reset device */
2036 device_cold_reset(DEVICE(vdev));
2037 }
2038
handle_close_channel(VMBus * vmbus,vmbus_message_close_channel * msg,uint32_t msglen)2039 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2040 uint32_t msglen)
2041 {
2042 VMBusChannel *chan;
2043
2044 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2045 return;
2046 }
2047
2048 trace_vmbus_close_channel(msg->child_relid);
2049
2050 chan = find_channel(vmbus, msg->child_relid);
2051 if (!chan) {
2052 return;
2053 }
2054
2055 close_channel(chan);
2056 chan->state = VMCHAN_INIT;
2057
2058 vdev_reset_on_close(chan->dev);
2059 }
2060
handle_unload(VMBus * vmbus,void * msg,uint32_t msglen)2061 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2062 {
2063 vmbus->state = VMBUS_UNLOAD;
2064 }
2065
send_unload(VMBus * vmbus)2066 static void send_unload(VMBus *vmbus)
2067 {
2068 vmbus_message_header msg = {
2069 .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2070 };
2071
2072 qemu_mutex_lock(&vmbus->rx_queue_lock);
2073 vmbus->rx_queue_size = 0;
2074 qemu_mutex_unlock(&vmbus->rx_queue_lock);
2075
2076 post_msg(vmbus, &msg, sizeof(msg));
2077 }
2078
complete_unload(VMBus * vmbus)2079 static bool complete_unload(VMBus *vmbus)
2080 {
2081 vmbus_reset_all(vmbus);
2082 return true;
2083 }
2084
process_message(VMBus * vmbus)2085 static void process_message(VMBus *vmbus)
2086 {
2087 struct hyperv_post_message_input *hv_msg;
2088 struct vmbus_message_header *msg;
2089 void *msgdata;
2090 uint32_t msglen;
2091
2092 qemu_mutex_lock(&vmbus->rx_queue_lock);
2093
2094 if (!vmbus->rx_queue_size) {
2095 goto unlock;
2096 }
2097
2098 hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2099 msglen = hv_msg->payload_size;
2100 if (msglen < sizeof(*msg)) {
2101 goto out;
2102 }
2103 msgdata = hv_msg->payload;
2104 msg = msgdata;
2105
2106 trace_vmbus_process_incoming_message(msg->message_type);
2107
2108 switch (msg->message_type) {
2109 case VMBUS_MSG_INITIATE_CONTACT:
2110 handle_initiate_contact(vmbus, msgdata, msglen);
2111 break;
2112 case VMBUS_MSG_REQUESTOFFERS:
2113 handle_request_offers(vmbus, msgdata, msglen);
2114 break;
2115 case VMBUS_MSG_GPADL_HEADER:
2116 handle_gpadl_header(vmbus, msgdata, msglen);
2117 break;
2118 case VMBUS_MSG_GPADL_BODY:
2119 handle_gpadl_body(vmbus, msgdata, msglen);
2120 break;
2121 case VMBUS_MSG_GPADL_TEARDOWN:
2122 handle_gpadl_teardown(vmbus, msgdata, msglen);
2123 break;
2124 case VMBUS_MSG_OPENCHANNEL:
2125 handle_open_channel(vmbus, msgdata, msglen);
2126 break;
2127 case VMBUS_MSG_CLOSECHANNEL:
2128 handle_close_channel(vmbus, msgdata, msglen);
2129 break;
2130 case VMBUS_MSG_UNLOAD:
2131 handle_unload(vmbus, msgdata, msglen);
2132 break;
2133 default:
2134 error_report("unknown message type %#x", msg->message_type);
2135 break;
2136 }
2137
2138 out:
2139 vmbus->rx_queue_size--;
2140 vmbus->rx_queue_head++;
2141 vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2142
2143 vmbus_resched(vmbus);
2144 unlock:
2145 qemu_mutex_unlock(&vmbus->rx_queue_lock);
2146 }
2147
2148 static const struct {
2149 void (*run)(VMBus *vmbus);
2150 bool (*complete)(VMBus *vmbus);
2151 } state_runner[] = {
2152 [VMBUS_LISTEN] = {process_message, NULL},
2153 [VMBUS_HANDSHAKE] = {send_handshake, NULL},
2154 [VMBUS_OFFER] = {send_offer, complete_offer},
2155 [VMBUS_CREATE_GPADL] = {send_create_gpadl, complete_create_gpadl},
2156 [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2157 [VMBUS_OPEN_CHANNEL] = {send_open_channel, complete_open_channel},
2158 [VMBUS_UNLOAD] = {send_unload, complete_unload},
2159 };
2160
vmbus_do_run(VMBus * vmbus)2161 static void vmbus_do_run(VMBus *vmbus)
2162 {
2163 if (vmbus->msg_in_progress) {
2164 return;
2165 }
2166
2167 assert(vmbus->state < VMBUS_STATE_MAX);
2168 assert(state_runner[vmbus->state].run);
2169 state_runner[vmbus->state].run(vmbus);
2170 }
2171
vmbus_run(void * opaque)2172 static void vmbus_run(void *opaque)
2173 {
2174 VMBus *vmbus = opaque;
2175
2176 /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2177 if (vmbus->in_progress) {
2178 return;
2179 }
2180
2181 vmbus->in_progress = true;
2182 /*
2183 * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2184 * should go *after* the code that can result in aio_poll; otherwise
2185 * reschedules can be missed. No idea how to enforce that.
2186 */
2187 vmbus_do_run(vmbus);
2188 vmbus->in_progress = false;
2189 }
2190
vmbus_msg_cb(void * data,int status)2191 static void vmbus_msg_cb(void *data, int status)
2192 {
2193 VMBus *vmbus = data;
2194 bool (*complete)(VMBus *vmbus);
2195
2196 assert(vmbus->msg_in_progress);
2197
2198 trace_vmbus_msg_cb(status);
2199
2200 if (status == -EAGAIN) {
2201 goto out;
2202 }
2203 if (status) {
2204 error_report("message delivery fatal failure: %d; aborting vmbus",
2205 status);
2206 vmbus_reset_all(vmbus);
2207 return;
2208 }
2209
2210 assert(vmbus->state < VMBUS_STATE_MAX);
2211 complete = state_runner[vmbus->state].complete;
2212 if (!complete || complete(vmbus)) {
2213 vmbus->state = VMBUS_LISTEN;
2214 }
2215 out:
2216 vmbus->msg_in_progress = false;
2217 vmbus_resched(vmbus);
2218 }
2219
vmbus_resched(VMBus * vmbus)2220 static void vmbus_resched(VMBus *vmbus)
2221 {
2222 aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2223 }
2224
vmbus_signal_event(EventNotifier * e)2225 static void vmbus_signal_event(EventNotifier *e)
2226 {
2227 VMBusChannel *chan;
2228 VMBus *vmbus = container_of(e, VMBus, notifier);
2229 unsigned long *int_map;
2230 hwaddr addr, len;
2231 bool is_dirty = false;
2232
2233 if (!event_notifier_test_and_clear(e)) {
2234 return;
2235 }
2236
2237 trace_vmbus_signal_event();
2238
2239 if (!vmbus->int_page_gpa) {
2240 return;
2241 }
2242
2243 addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2244 len = TARGET_PAGE_SIZE / 2;
2245 int_map = cpu_physical_memory_map(addr, &len, 1);
2246 if (len != TARGET_PAGE_SIZE / 2) {
2247 goto unmap;
2248 }
2249
2250 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2251 if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2252 if (!vmbus_channel_is_open(chan)) {
2253 continue;
2254 }
2255 vmbus_channel_notify_host(chan);
2256 is_dirty = true;
2257 }
2258 }
2259
2260 unmap:
2261 cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2262 }
2263
vmbus_dev_realize(DeviceState * dev,Error ** errp)2264 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2265 {
2266 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2267 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2268 VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2269 BusChild *child;
2270 Error *err = NULL;
2271 char idstr[UUID_STR_LEN];
2272
2273 assert(!qemu_uuid_is_null(&vdev->instanceid));
2274
2275 if (!qemu_uuid_is_null(&vdc->instanceid)) {
2276 /* Class wants to only have a single instance with a fixed UUID */
2277 if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2278 error_setg(&err, "instance id can't be changed");
2279 goto error_out;
2280 }
2281 }
2282
2283 /* Check for instance id collision for this class id */
2284 QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2285 VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2286
2287 if (child_dev == vdev) {
2288 continue;
2289 }
2290
2291 if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2292 qemu_uuid_unparse(&vdev->instanceid, idstr);
2293 error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2294 goto error_out;
2295 }
2296 }
2297
2298 vdev->dma_as = &address_space_memory;
2299
2300 create_channels(vmbus, vdev, &err);
2301 if (err) {
2302 goto error_out;
2303 }
2304
2305 if (vdc->vmdev_realize) {
2306 vdc->vmdev_realize(vdev, &err);
2307 if (err) {
2308 goto err_vdc_realize;
2309 }
2310 }
2311 return;
2312
2313 err_vdc_realize:
2314 free_channels(vdev);
2315 error_out:
2316 error_propagate(errp, err);
2317 }
2318
vmbus_dev_reset(DeviceState * dev)2319 static void vmbus_dev_reset(DeviceState *dev)
2320 {
2321 uint16_t i;
2322 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2323 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2324
2325 if (vdev->channels) {
2326 for (i = 0; i < vdev->num_channels; i++) {
2327 VMBusChannel *chan = &vdev->channels[i];
2328 close_channel(chan);
2329 chan->state = VMCHAN_INIT;
2330 }
2331 }
2332
2333 if (vdc->vmdev_reset) {
2334 vdc->vmdev_reset(vdev);
2335 }
2336 }
2337
vmbus_dev_unrealize(DeviceState * dev)2338 static void vmbus_dev_unrealize(DeviceState *dev)
2339 {
2340 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2341 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2342
2343 if (vdc->vmdev_unrealize) {
2344 vdc->vmdev_unrealize(vdev);
2345 }
2346 free_channels(vdev);
2347 }
2348
2349 static const Property vmbus_dev_props[] = {
2350 DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2351 };
2352
2353
vmbus_dev_class_init(ObjectClass * klass,const void * data)2354 static void vmbus_dev_class_init(ObjectClass *klass, const void *data)
2355 {
2356 DeviceClass *kdev = DEVICE_CLASS(klass);
2357 device_class_set_props(kdev, vmbus_dev_props);
2358 kdev->bus_type = TYPE_VMBUS;
2359 kdev->realize = vmbus_dev_realize;
2360 kdev->unrealize = vmbus_dev_unrealize;
2361 device_class_set_legacy_reset(kdev, vmbus_dev_reset);
2362 }
2363
vmbus_dev_instance_init(Object * obj)2364 static void vmbus_dev_instance_init(Object *obj)
2365 {
2366 VMBusDevice *vdev = VMBUS_DEVICE(obj);
2367 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2368
2369 if (!qemu_uuid_is_null(&vdc->instanceid)) {
2370 /* Class wants to only have a single instance with a fixed UUID */
2371 vdev->instanceid = vdc->instanceid;
2372 }
2373 }
2374
2375 const VMStateDescription vmstate_vmbus_dev = {
2376 .name = TYPE_VMBUS_DEVICE,
2377 .version_id = 0,
2378 .minimum_version_id = 0,
2379 .fields = (const VMStateField[]) {
2380 VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2381 VMSTATE_UINT16(num_channels, VMBusDevice),
2382 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2383 num_channels, vmstate_channel,
2384 VMBusChannel),
2385 VMSTATE_END_OF_LIST()
2386 }
2387 };
2388
2389 /* vmbus generic device base */
2390 static const TypeInfo vmbus_dev_type_info = {
2391 .name = TYPE_VMBUS_DEVICE,
2392 .parent = TYPE_DEVICE,
2393 .abstract = true,
2394 .instance_size = sizeof(VMBusDevice),
2395 .class_size = sizeof(VMBusDeviceClass),
2396 .class_init = vmbus_dev_class_init,
2397 .instance_init = vmbus_dev_instance_init,
2398 };
2399
vmbus_realize(BusState * bus,Error ** errp)2400 static void vmbus_realize(BusState *bus, Error **errp)
2401 {
2402 int ret = 0;
2403 VMBus *vmbus = VMBUS(bus);
2404
2405 qemu_mutex_init(&vmbus->rx_queue_lock);
2406
2407 QTAILQ_INIT(&vmbus->gpadl_list);
2408 QTAILQ_INIT(&vmbus->channel_list);
2409
2410 ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2411 vmbus_recv_message, vmbus);
2412 if (ret != 0) {
2413 error_setg(errp, "hyperv set message handler failed: %d", ret);
2414 goto error_out;
2415 }
2416
2417 ret = event_notifier_init(&vmbus->notifier, 0);
2418 if (ret != 0) {
2419 error_setg(errp, "event notifier failed to init with %d", ret);
2420 goto remove_msg_handler;
2421 }
2422
2423 event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2424 ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2425 &vmbus->notifier);
2426 if (ret != 0) {
2427 error_setg(errp, "hyperv set event handler failed with %d", ret);
2428 goto clear_event_notifier;
2429 }
2430
2431 return;
2432
2433 clear_event_notifier:
2434 event_notifier_cleanup(&vmbus->notifier);
2435 remove_msg_handler:
2436 hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2437 error_out:
2438 qemu_mutex_destroy(&vmbus->rx_queue_lock);
2439 }
2440
vmbus_unrealize(BusState * bus)2441 static void vmbus_unrealize(BusState *bus)
2442 {
2443 VMBus *vmbus = VMBUS(bus);
2444
2445 hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2446 hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2447 event_notifier_cleanup(&vmbus->notifier);
2448
2449 qemu_mutex_destroy(&vmbus->rx_queue_lock);
2450 }
2451
vmbus_reset_hold(Object * obj,ResetType type)2452 static void vmbus_reset_hold(Object *obj, ResetType type)
2453 {
2454 vmbus_deinit(VMBUS(obj));
2455 }
2456
vmbus_get_dev_path(DeviceState * dev)2457 static char *vmbus_get_dev_path(DeviceState *dev)
2458 {
2459 BusState *bus = qdev_get_parent_bus(dev);
2460 return qdev_get_dev_path(bus->parent);
2461 }
2462
vmbus_get_fw_dev_path(DeviceState * dev)2463 static char *vmbus_get_fw_dev_path(DeviceState *dev)
2464 {
2465 VMBusDevice *vdev = VMBUS_DEVICE(dev);
2466 char uuid[UUID_STR_LEN];
2467
2468 qemu_uuid_unparse(&vdev->instanceid, uuid);
2469 return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2470 }
2471
vmbus_class_init(ObjectClass * klass,const void * data)2472 static void vmbus_class_init(ObjectClass *klass, const void *data)
2473 {
2474 BusClass *k = BUS_CLASS(klass);
2475 ResettableClass *rc = RESETTABLE_CLASS(klass);
2476
2477 k->get_dev_path = vmbus_get_dev_path;
2478 k->get_fw_dev_path = vmbus_get_fw_dev_path;
2479 k->realize = vmbus_realize;
2480 k->unrealize = vmbus_unrealize;
2481 rc->phases.hold = vmbus_reset_hold;
2482 }
2483
vmbus_pre_load(void * opaque)2484 static int vmbus_pre_load(void *opaque)
2485 {
2486 VMBusChannel *chan;
2487 VMBus *vmbus = VMBUS(opaque);
2488
2489 /*
2490 * channel IDs allocated by the source will come in the migration stream
2491 * for each channel, so clean up the ones allocated at realize
2492 */
2493 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2494 unregister_chan_id(chan);
2495 }
2496
2497 return 0;
2498 }
vmbus_post_load(void * opaque,int version_id)2499 static int vmbus_post_load(void *opaque, int version_id)
2500 {
2501 int ret;
2502 VMBus *vmbus = VMBUS(opaque);
2503 VMBusGpadl *gpadl;
2504 VMBusChannel *chan;
2505
2506 ret = vmbus_init(vmbus);
2507 if (ret) {
2508 return ret;
2509 }
2510
2511 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2512 gpadl->vmbus = vmbus;
2513 gpadl->refcount = 1;
2514 }
2515
2516 /*
2517 * reopening channels depends on initialized vmbus so it's done here
2518 * instead of channel_post_load()
2519 */
2520 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2521
2522 if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2523 open_channel(chan);
2524 }
2525
2526 if (chan->state != VMCHAN_OPEN) {
2527 continue;
2528 }
2529
2530 if (!vmbus_channel_is_open(chan)) {
2531 /* reopen failed, abort loading */
2532 return -1;
2533 }
2534
2535 /* resume processing on the guest side if it missed the notification */
2536 hyperv_sint_route_set_sint(chan->notify_route);
2537 /* ditto on the host side */
2538 vmbus_channel_notify_host(chan);
2539 }
2540
2541 vmbus_resched(vmbus);
2542 return 0;
2543 }
2544
2545 static const VMStateDescription vmstate_post_message_input = {
2546 .name = "vmbus/hyperv_post_message_input",
2547 .version_id = 0,
2548 .minimum_version_id = 0,
2549 .fields = (const VMStateField[]) {
2550 /*
2551 * skip connection_id and message_type as they are validated before
2552 * queueing and ignored on dequeueing
2553 */
2554 VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2555 VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2556 HV_MESSAGE_PAYLOAD_SIZE),
2557 VMSTATE_END_OF_LIST()
2558 }
2559 };
2560
vmbus_rx_queue_needed(void * opaque)2561 static bool vmbus_rx_queue_needed(void *opaque)
2562 {
2563 VMBus *vmbus = VMBUS(opaque);
2564 return vmbus->rx_queue_size;
2565 }
2566
2567 static const VMStateDescription vmstate_rx_queue = {
2568 .name = "vmbus/rx_queue",
2569 .version_id = 0,
2570 .minimum_version_id = 0,
2571 .needed = vmbus_rx_queue_needed,
2572 .fields = (const VMStateField[]) {
2573 VMSTATE_UINT8(rx_queue_head, VMBus),
2574 VMSTATE_UINT8(rx_queue_size, VMBus),
2575 VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2576 HV_MSG_QUEUE_LEN, 0,
2577 vmstate_post_message_input,
2578 struct hyperv_post_message_input),
2579 VMSTATE_END_OF_LIST()
2580 }
2581 };
2582
2583 static const VMStateDescription vmstate_vmbus = {
2584 .name = TYPE_VMBUS,
2585 .version_id = 0,
2586 .minimum_version_id = 0,
2587 .pre_load = vmbus_pre_load,
2588 .post_load = vmbus_post_load,
2589 .fields = (const VMStateField[]) {
2590 VMSTATE_UINT8(state, VMBus),
2591 VMSTATE_UINT32(version, VMBus),
2592 VMSTATE_UINT32(target_vp, VMBus),
2593 VMSTATE_UINT64(int_page_gpa, VMBus),
2594 VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2595 vmstate_gpadl, VMBusGpadl, link),
2596 VMSTATE_END_OF_LIST()
2597 },
2598 .subsections = (const VMStateDescription * const []) {
2599 &vmstate_rx_queue,
2600 NULL
2601 }
2602 };
2603
2604 static const TypeInfo vmbus_type_info = {
2605 .name = TYPE_VMBUS,
2606 .parent = TYPE_BUS,
2607 .instance_size = sizeof(VMBus),
2608 .class_init = vmbus_class_init,
2609 };
2610
vmbus_bridge_realize(DeviceState * dev,Error ** errp)2611 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2612 {
2613 VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2614
2615 /*
2616 * here there's at least one vmbus bridge that is being realized, so
2617 * vmbus_bridge_find can only return NULL if it's not unique
2618 */
2619 if (!vmbus_bridge_find()) {
2620 error_setg(errp, "there can be at most one %s in the system",
2621 TYPE_VMBUS_BRIDGE);
2622 return;
2623 }
2624
2625 if (!hyperv_is_synic_enabled()) {
2626 error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2627 return;
2628 }
2629
2630 if (!hyperv_are_vmbus_recommended_features_enabled()) {
2631 warn_report("VMBus enabled without the recommended set of Hyper-V features: "
2632 "hv-stimer, hv-vapic and hv-runtime. "
2633 "Some Windows versions might not boot or enable the VMBus device");
2634 }
2635
2636 bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2637 }
2638
vmbus_bridge_ofw_unit_address(const SysBusDevice * dev)2639 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2640 {
2641 /* there can be only one VMBus */
2642 return g_strdup("0");
2643 }
2644
2645 static const VMStateDescription vmstate_vmbus_bridge = {
2646 .name = TYPE_VMBUS_BRIDGE,
2647 .version_id = 0,
2648 .minimum_version_id = 0,
2649 .fields = (const VMStateField[]) {
2650 VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2651 VMSTATE_END_OF_LIST()
2652 },
2653 };
2654
2655 static const Property vmbus_bridge_props[] = {
2656 DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2657 };
2658
vmbus_bridge_class_init(ObjectClass * klass,const void * data)2659 static void vmbus_bridge_class_init(ObjectClass *klass, const void *data)
2660 {
2661 DeviceClass *k = DEVICE_CLASS(klass);
2662 SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2663
2664 k->realize = vmbus_bridge_realize;
2665 k->fw_name = "vmbus";
2666 sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2667 set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2668 k->vmsd = &vmstate_vmbus_bridge;
2669 device_class_set_props(k, vmbus_bridge_props);
2670 /* override SysBusDevice's default */
2671 k->user_creatable = true;
2672 }
2673
2674 static const TypeInfo vmbus_bridge_type_info = {
2675 .name = TYPE_VMBUS_BRIDGE,
2676 .parent = TYPE_SYS_BUS_DEVICE,
2677 .instance_size = sizeof(VMBusBridge),
2678 .class_init = vmbus_bridge_class_init,
2679 };
2680
vmbus_register_types(void)2681 static void vmbus_register_types(void)
2682 {
2683 type_register_static(&vmbus_bridge_type_info);
2684 type_register_static(&vmbus_dev_type_info);
2685 type_register_static(&vmbus_type_info);
2686 }
2687
2688 type_init(vmbus_register_types)
2689