1 /* 2 * QEMU Hyper-V VMBus 3 * 4 * Copyright (c) 2017-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/error-report.h" 12 #include "qemu/main-loop.h" 13 #include "qapi/error.h" 14 #include "migration/vmstate.h" 15 #include "hw/qdev-properties.h" 16 #include "hw/hyperv/hyperv.h" 17 #include "hw/hyperv/vmbus.h" 18 #include "hw/hyperv/vmbus-bridge.h" 19 #include "hw/sysbus.h" 20 #include "cpu.h" 21 #include "trace.h" 22 23 #define TYPE_VMBUS "vmbus" 24 #define VMBUS(obj) OBJECT_CHECK(VMBus, (obj), TYPE_VMBUS) 25 26 enum { 27 VMGPADL_INIT, 28 VMGPADL_ALIVE, 29 VMGPADL_TEARINGDOWN, 30 VMGPADL_TORNDOWN, 31 }; 32 33 struct VMBusGpadl { 34 /* GPADL id */ 35 uint32_t id; 36 /* associated channel id (rudimentary?) */ 37 uint32_t child_relid; 38 39 /* number of pages in the GPADL as declared in GPADL_HEADER message */ 40 uint32_t num_gfns; 41 /* 42 * Due to limited message size, GPADL may not fit fully in a single 43 * GPADL_HEADER message, and is further popluated using GPADL_BODY 44 * messages. @seen_gfns is the number of pages seen so far; once it 45 * reaches @num_gfns, the GPADL is ready to use. 46 */ 47 uint32_t seen_gfns; 48 /* array of GFNs (of size @num_gfns once allocated) */ 49 uint64_t *gfns; 50 51 uint8_t state; 52 53 QTAILQ_ENTRY(VMBusGpadl) link; 54 VMBus *vmbus; 55 unsigned refcount; 56 }; 57 58 /* 59 * Wrap sequential read from / write to GPADL. 60 */ 61 typedef struct GpadlIter { 62 VMBusGpadl *gpadl; 63 AddressSpace *as; 64 DMADirection dir; 65 /* offset into GPADL where the next i/o will be performed */ 66 uint32_t off; 67 /* 68 * Cached mapping of the currently accessed page, up to page boundary. 69 * Updated lazily on i/o. 70 * Note: MemoryRegionCache can not be used here because pages in the GPADL 71 * are non-contiguous and may belong to different memory regions. 72 */ 73 void *map; 74 /* offset after last i/o (i.e. not affected by seek) */ 75 uint32_t last_off; 76 /* 77 * Indicator that the iterator is active and may have a cached mapping. 78 * Allows to enforce bracketing of all i/o (which may create cached 79 * mappings) and thus exclude mapping leaks. 80 */ 81 bool active; 82 } GpadlIter; 83 84 /* 85 * Ring buffer. There are two of them, sitting in the same GPADL, for each 86 * channel. 87 * Each ring buffer consists of a set of pages, with the first page containing 88 * the ring buffer header, and the remaining pages being for data packets. 89 */ 90 typedef struct VMBusRingBufCommon { 91 AddressSpace *as; 92 /* GPA of the ring buffer header */ 93 dma_addr_t rb_addr; 94 /* start and length of the ring buffer data area within GPADL */ 95 uint32_t base; 96 uint32_t len; 97 98 GpadlIter iter; 99 } VMBusRingBufCommon; 100 101 typedef struct VMBusSendRingBuf { 102 VMBusRingBufCommon common; 103 /* current write index, to be committed at the end of send */ 104 uint32_t wr_idx; 105 /* write index at the start of send */ 106 uint32_t last_wr_idx; 107 /* space to be requested from the guest */ 108 uint32_t wanted; 109 /* space reserved for planned sends */ 110 uint32_t reserved; 111 /* last seen read index */ 112 uint32_t last_seen_rd_idx; 113 } VMBusSendRingBuf; 114 115 typedef struct VMBusRecvRingBuf { 116 VMBusRingBufCommon common; 117 /* current read index, to be committed at the end of receive */ 118 uint32_t rd_idx; 119 /* read index at the start of receive */ 120 uint32_t last_rd_idx; 121 /* last seen write index */ 122 uint32_t last_seen_wr_idx; 123 } VMBusRecvRingBuf; 124 125 126 enum { 127 VMOFFER_INIT, 128 VMOFFER_SENDING, 129 VMOFFER_SENT, 130 }; 131 132 enum { 133 VMCHAN_INIT, 134 VMCHAN_OPENING, 135 VMCHAN_OPEN, 136 }; 137 138 struct VMBusChannel { 139 VMBusDevice *dev; 140 141 /* channel id */ 142 uint32_t id; 143 /* 144 * subchannel index within the device; subchannel #0 is "primary" and 145 * always exists 146 */ 147 uint16_t subchan_idx; 148 uint32_t open_id; 149 /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */ 150 uint32_t target_vp; 151 /* GPADL id to use for the ring buffers */ 152 uint32_t ringbuf_gpadl; 153 /* start (in pages) of the send ring buffer within @ringbuf_gpadl */ 154 uint32_t ringbuf_send_offset; 155 156 uint8_t offer_state; 157 uint8_t state; 158 bool is_open; 159 160 /* main device worker; copied from the device class */ 161 VMBusChannelNotifyCb notify_cb; 162 /* 163 * guest->host notifications, either sent directly or dispatched via 164 * interrupt page (older VMBus) 165 */ 166 EventNotifier notifier; 167 168 VMBus *vmbus; 169 /* 170 * SINT route to signal with host->guest notifications; may be shared with 171 * the main VMBus SINT route 172 */ 173 HvSintRoute *notify_route; 174 VMBusGpadl *gpadl; 175 176 VMBusSendRingBuf send_ringbuf; 177 VMBusRecvRingBuf recv_ringbuf; 178 179 QTAILQ_ENTRY(VMBusChannel) link; 180 }; 181 182 /* 183 * Hyper-V spec mandates that every message port has 16 buffers, which means 184 * that the guest can post up to this many messages without blocking. 185 * Therefore a queue for incoming messages has to be provided. 186 * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just 187 * doesn't transition to a new state until the message is known to have been 188 * successfully delivered to the respective SynIC message slot. 189 */ 190 #define HV_MSG_QUEUE_LEN 16 191 192 /* Hyper-V devices never use channel #0. Must be something special. */ 193 #define VMBUS_FIRST_CHANID 1 194 /* Each channel occupies one bit within a single event page sint slot. */ 195 #define VMBUS_CHANID_COUNT (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID) 196 /* Leave a few connection numbers for other purposes. */ 197 #define VMBUS_CHAN_CONNECTION_OFFSET 16 198 199 /* 200 * Since the success or failure of sending a message is reported 201 * asynchronously, the VMBus state machine has effectively two entry points: 202 * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest 203 * message delivery status becomes known). Both are run as oneshot BHs on the 204 * main aio context, ensuring serialization. 205 */ 206 enum { 207 VMBUS_LISTEN, 208 VMBUS_HANDSHAKE, 209 VMBUS_OFFER, 210 VMBUS_CREATE_GPADL, 211 VMBUS_TEARDOWN_GPADL, 212 VMBUS_OPEN_CHANNEL, 213 VMBUS_UNLOAD, 214 VMBUS_STATE_MAX 215 }; 216 217 struct VMBus { 218 BusState parent; 219 220 uint8_t state; 221 /* protection against recursive aio_poll (see vmbus_run) */ 222 bool in_progress; 223 /* whether there's a message being delivered to the guest */ 224 bool msg_in_progress; 225 uint32_t version; 226 /* VP_INDEX of the vCPU to send messages and interrupts to */ 227 uint32_t target_vp; 228 HvSintRoute *sint_route; 229 /* 230 * interrupt page for older protocol versions; newer ones use SynIC event 231 * flags directly 232 */ 233 hwaddr int_page_gpa; 234 235 DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT); 236 237 /* incoming message queue */ 238 struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN]; 239 uint8_t rx_queue_head; 240 uint8_t rx_queue_size; 241 QemuMutex rx_queue_lock; 242 243 QTAILQ_HEAD(, VMBusGpadl) gpadl_list; 244 QTAILQ_HEAD(, VMBusChannel) channel_list; 245 246 /* 247 * guest->host notifications for older VMBus, to be dispatched via 248 * interrupt page 249 */ 250 EventNotifier notifier; 251 }; 252 253 static bool gpadl_full(VMBusGpadl *gpadl) 254 { 255 return gpadl->seen_gfns == gpadl->num_gfns; 256 } 257 258 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id, 259 uint32_t child_relid, uint32_t num_gfns) 260 { 261 VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1); 262 263 gpadl->id = id; 264 gpadl->child_relid = child_relid; 265 gpadl->num_gfns = num_gfns; 266 gpadl->gfns = g_new(uint64_t, num_gfns); 267 QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link); 268 gpadl->vmbus = vmbus; 269 gpadl->refcount = 1; 270 return gpadl; 271 } 272 273 static void free_gpadl(VMBusGpadl *gpadl) 274 { 275 QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link); 276 g_free(gpadl->gfns); 277 g_free(gpadl); 278 } 279 280 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id) 281 { 282 VMBusGpadl *gpadl; 283 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 284 if (gpadl->id == gpadl_id) { 285 return gpadl; 286 } 287 } 288 return NULL; 289 } 290 291 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id) 292 { 293 VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id); 294 if (!gpadl || !gpadl_full(gpadl)) { 295 return NULL; 296 } 297 gpadl->refcount++; 298 return gpadl; 299 } 300 301 void vmbus_put_gpadl(VMBusGpadl *gpadl) 302 { 303 if (!gpadl) { 304 return; 305 } 306 if (--gpadl->refcount) { 307 return; 308 } 309 free_gpadl(gpadl); 310 } 311 312 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl) 313 { 314 return gpadl->num_gfns * TARGET_PAGE_SIZE; 315 } 316 317 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl, 318 AddressSpace *as, DMADirection dir) 319 { 320 iter->gpadl = gpadl; 321 iter->as = as; 322 iter->dir = dir; 323 iter->active = false; 324 } 325 326 static inline void gpadl_iter_cache_unmap(GpadlIter *iter) 327 { 328 uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK; 329 uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1; 330 331 /* mapping is only done to do non-zero amount of i/o */ 332 assert(iter->last_off > 0); 333 assert(map_start_in_page < io_end_in_page); 334 335 dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page, 336 iter->dir, io_end_in_page - map_start_in_page); 337 } 338 339 /* 340 * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf. 341 * The direction of the copy is determined by @iter->dir. 342 * The caller must ensure the operation overflows neither @buf nor the GPADL 343 * (there's an assert for the latter). 344 * Reuse the currently mapped page in the GPADL if possible. 345 */ 346 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len) 347 { 348 ssize_t ret = len; 349 350 assert(iter->active); 351 352 while (len) { 353 uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK; 354 uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page; 355 uint32_t cplen = MIN(pgleft, len); 356 void *p; 357 358 /* try to reuse the cached mapping */ 359 if (iter->map) { 360 uint32_t map_start_in_page = 361 (uintptr_t)iter->map & ~TARGET_PAGE_MASK; 362 uint32_t off_base = iter->off & ~TARGET_PAGE_MASK; 363 uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK; 364 if (off_base != mapped_base || off_in_page < map_start_in_page) { 365 gpadl_iter_cache_unmap(iter); 366 iter->map = NULL; 367 } 368 } 369 370 if (!iter->map) { 371 dma_addr_t maddr; 372 dma_addr_t mlen = pgleft; 373 uint32_t idx = iter->off >> TARGET_PAGE_BITS; 374 assert(idx < iter->gpadl->num_gfns); 375 376 maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page; 377 378 iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir); 379 if (mlen != pgleft) { 380 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0); 381 iter->map = NULL; 382 return -EFAULT; 383 } 384 } 385 386 p = (void *)(((uintptr_t)iter->map & TARGET_PAGE_MASK) | off_in_page); 387 if (iter->dir == DMA_DIRECTION_FROM_DEVICE) { 388 memcpy(p, buf, cplen); 389 } else { 390 memcpy(buf, p, cplen); 391 } 392 393 buf += cplen; 394 len -= cplen; 395 iter->off += cplen; 396 iter->last_off = iter->off; 397 } 398 399 return ret; 400 } 401 402 /* 403 * Position the iterator @iter at new offset @new_off. 404 * If this results in the cached mapping being unusable with the new offset, 405 * unmap it. 406 */ 407 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off) 408 { 409 assert(iter->active); 410 iter->off = new_off; 411 } 412 413 /* 414 * Start a series of i/o on the GPADL. 415 * After this i/o and seek operations on @iter become legal. 416 */ 417 static inline void gpadl_iter_start_io(GpadlIter *iter) 418 { 419 assert(!iter->active); 420 /* mapping is cached lazily on i/o */ 421 iter->map = NULL; 422 iter->active = true; 423 } 424 425 /* 426 * End the eariler started series of i/o on the GPADL and release the cached 427 * mapping if any. 428 */ 429 static inline void gpadl_iter_end_io(GpadlIter *iter) 430 { 431 assert(iter->active); 432 433 if (iter->map) { 434 gpadl_iter_cache_unmap(iter); 435 } 436 437 iter->active = false; 438 } 439 440 static void vmbus_resched(VMBus *vmbus); 441 static void vmbus_msg_cb(void *data, int status); 442 443 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, 444 const struct iovec *iov, size_t iov_cnt) 445 { 446 GpadlIter iter; 447 size_t i; 448 ssize_t ret = 0; 449 450 gpadl_iter_init(&iter, gpadl, chan->dev->dma_as, 451 DMA_DIRECTION_FROM_DEVICE); 452 gpadl_iter_start_io(&iter); 453 gpadl_iter_seek(&iter, off); 454 for (i = 0; i < iov_cnt; i++) { 455 ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len); 456 if (ret < 0) { 457 goto out; 458 } 459 } 460 out: 461 gpadl_iter_end_io(&iter); 462 return ret; 463 } 464 465 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, 466 unsigned iov_cnt, size_t len, size_t off) 467 { 468 int ret_cnt = 0, ret; 469 unsigned i; 470 QEMUSGList *sgl = &req->sgl; 471 ScatterGatherEntry *sg = sgl->sg; 472 473 for (i = 0; i < sgl->nsg; i++) { 474 if (sg[i].len > off) { 475 break; 476 } 477 off -= sg[i].len; 478 } 479 for (; len && i < sgl->nsg; i++) { 480 dma_addr_t mlen = MIN(sg[i].len - off, len); 481 dma_addr_t addr = sg[i].base + off; 482 len -= mlen; 483 off = 0; 484 485 for (; mlen; ret_cnt++) { 486 dma_addr_t l = mlen; 487 dma_addr_t a = addr; 488 489 if (ret_cnt == iov_cnt) { 490 ret = -ENOBUFS; 491 goto err; 492 } 493 494 iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir); 495 if (!l) { 496 ret = -EFAULT; 497 goto err; 498 } 499 iov[ret_cnt].iov_len = l; 500 addr += l; 501 mlen -= l; 502 } 503 } 504 505 return ret_cnt; 506 err: 507 vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0); 508 return ret; 509 } 510 511 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, 512 unsigned iov_cnt, size_t accessed) 513 { 514 QEMUSGList *sgl = &req->sgl; 515 unsigned i; 516 517 for (i = 0; i < iov_cnt; i++) { 518 size_t acsd = MIN(accessed, iov[i].iov_len); 519 dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd); 520 accessed -= acsd; 521 } 522 } 523 524 static const VMStateDescription vmstate_gpadl = { 525 .name = "vmbus/gpadl", 526 .version_id = 0, 527 .minimum_version_id = 0, 528 .fields = (VMStateField[]) { 529 VMSTATE_UINT32(id, VMBusGpadl), 530 VMSTATE_UINT32(child_relid, VMBusGpadl), 531 VMSTATE_UINT32(num_gfns, VMBusGpadl), 532 VMSTATE_UINT32(seen_gfns, VMBusGpadl), 533 VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0, 534 vmstate_info_uint64, uint64_t), 535 VMSTATE_UINT8(state, VMBusGpadl), 536 VMSTATE_END_OF_LIST() 537 } 538 }; 539 540 /* 541 * Wrap the index into a ring buffer of @len bytes. 542 * @idx is assumed not to exceed twice the size of the ringbuffer, so only 543 * single wraparound is considered. 544 */ 545 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len) 546 { 547 if (idx >= len) { 548 idx -= len; 549 } 550 return idx; 551 } 552 553 /* 554 * Circular difference between two indices into a ring buffer of @len bytes. 555 * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch 556 * up write index but not vice versa. 557 */ 558 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len, 559 bool allow_catchup) 560 { 561 return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len); 562 } 563 564 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf) 565 { 566 vmbus_ring_buffer *rb; 567 dma_addr_t mlen = sizeof(*rb); 568 569 rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen, 570 DMA_DIRECTION_FROM_DEVICE); 571 if (mlen != sizeof(*rb)) { 572 dma_memory_unmap(ringbuf->as, rb, mlen, 573 DMA_DIRECTION_FROM_DEVICE, 0); 574 return NULL; 575 } 576 return rb; 577 } 578 579 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf, 580 vmbus_ring_buffer *rb, bool dirty) 581 { 582 assert(rb); 583 584 dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE, 585 dirty ? sizeof(*rb) : 0); 586 } 587 588 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl, 589 AddressSpace *as, DMADirection dir, 590 uint32_t begin, uint32_t end) 591 { 592 ringbuf->as = as; 593 ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS; 594 ringbuf->base = (begin + 1) << TARGET_PAGE_BITS; 595 ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS; 596 gpadl_iter_init(&ringbuf->iter, gpadl, as, dir); 597 } 598 599 static int ringbufs_init(VMBusChannel *chan) 600 { 601 vmbus_ring_buffer *rb; 602 VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf; 603 VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf; 604 605 if (chan->ringbuf_send_offset <= 1 || 606 chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) { 607 return -EINVAL; 608 } 609 610 ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as, 611 DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset); 612 ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as, 613 DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset, 614 chan->gpadl->num_gfns); 615 send_ringbuf->wanted = 0; 616 send_ringbuf->reserved = 0; 617 618 rb = ringbuf_map_hdr(&recv_ringbuf->common); 619 if (!rb) { 620 return -EFAULT; 621 } 622 recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index; 623 ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false); 624 625 rb = ringbuf_map_hdr(&send_ringbuf->common); 626 if (!rb) { 627 return -EFAULT; 628 } 629 send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index; 630 send_ringbuf->last_seen_rd_idx = rb->read_index; 631 rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ; 632 ringbuf_unmap_hdr(&send_ringbuf->common, rb, true); 633 634 if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len || 635 send_ringbuf->wr_idx >= send_ringbuf->common.len) { 636 return -EOVERFLOW; 637 } 638 639 return 0; 640 } 641 642 /* 643 * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping 644 * around if needed. 645 * @len is assumed not to exceed the size of the ringbuffer, so only single 646 * wraparound is considered. 647 */ 648 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len) 649 { 650 ssize_t ret1 = 0, ret2 = 0; 651 uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off; 652 653 if (len >= remain) { 654 ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain); 655 if (ret1 < 0) { 656 return ret1; 657 } 658 gpadl_iter_seek(&ringbuf->iter, ringbuf->base); 659 buf += remain; 660 len -= remain; 661 } 662 ret2 = gpadl_iter_io(&ringbuf->iter, buf, len); 663 if (ret2 < 0) { 664 return ret2; 665 } 666 return ret1 + ret2; 667 } 668 669 /* 670 * Position the circular iterator within @ringbuf to offset @new_off, wrapping 671 * around if needed. 672 * @new_off is assumed not to exceed twice the size of the ringbuffer, so only 673 * single wraparound is considered. 674 */ 675 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off) 676 { 677 gpadl_iter_seek(&ringbuf->iter, 678 ringbuf->base + rb_idx_wrap(new_off, ringbuf->len)); 679 } 680 681 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf) 682 { 683 return ringbuf->iter.off - ringbuf->base; 684 } 685 686 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf) 687 { 688 gpadl_iter_start_io(&ringbuf->iter); 689 } 690 691 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf) 692 { 693 gpadl_iter_end_io(&ringbuf->iter); 694 } 695 696 VMBusDevice *vmbus_channel_device(VMBusChannel *chan) 697 { 698 return chan->dev; 699 } 700 701 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx) 702 { 703 if (chan_idx >= dev->num_channels) { 704 return NULL; 705 } 706 return &dev->channels[chan_idx]; 707 } 708 709 uint32_t vmbus_channel_idx(VMBusChannel *chan) 710 { 711 return chan - chan->dev->channels; 712 } 713 714 void vmbus_channel_notify_host(VMBusChannel *chan) 715 { 716 event_notifier_set(&chan->notifier); 717 } 718 719 bool vmbus_channel_is_open(VMBusChannel *chan) 720 { 721 return chan->is_open; 722 } 723 724 /* 725 * Notify the guest side about the data to work on in the channel ring buffer. 726 * The notification is done by signaling a dedicated per-channel SynIC event 727 * flag (more recent guests) or setting a bit in the interrupt page and firing 728 * the VMBus SINT (older guests). 729 */ 730 static int vmbus_channel_notify_guest(VMBusChannel *chan) 731 { 732 int res = 0; 733 unsigned long *int_map, mask; 734 unsigned idx; 735 hwaddr addr = chan->vmbus->int_page_gpa; 736 hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0; 737 738 trace_vmbus_channel_notify_guest(chan->id); 739 740 if (!addr) { 741 return hyperv_set_event_flag(chan->notify_route, chan->id); 742 } 743 744 int_map = cpu_physical_memory_map(addr, &len, 1); 745 if (len != TARGET_PAGE_SIZE / 2) { 746 res = -ENXIO; 747 goto unmap; 748 } 749 750 idx = BIT_WORD(chan->id); 751 mask = BIT_MASK(chan->id); 752 if ((atomic_fetch_or(&int_map[idx], mask) & mask) != mask) { 753 res = hyperv_sint_route_set_sint(chan->notify_route); 754 dirty = len; 755 } 756 757 unmap: 758 cpu_physical_memory_unmap(int_map, len, 1, dirty); 759 return res; 760 } 761 762 #define VMBUS_PKT_TRAILER sizeof(uint64_t) 763 764 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr, 765 uint32_t desclen, uint32_t msglen) 766 { 767 hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) + 768 DIV_ROUND_UP(desclen, sizeof(uint64_t)); 769 hdr->len_qwords = hdr->offset_qwords + 770 DIV_ROUND_UP(msglen, sizeof(uint64_t)); 771 return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER; 772 } 773 774 /* 775 * Simplified ring buffer operation with paired barriers annotations in the 776 * producer and consumer loops: 777 * 778 * producer * consumer 779 * ~~~~~~~~ * ~~~~~~~~ 780 * write pending_send_sz * read write_index 781 * smp_mb [A] * smp_mb [C] 782 * read read_index * read packet 783 * smp_mb [B] * read/write out-of-band data 784 * read/write out-of-band data * smp_mb [B] 785 * write packet * write read_index 786 * smp_mb [C] * smp_mb [A] 787 * write write_index * read pending_send_sz 788 * smp_wmb [D] * smp_rmb [D] 789 * write pending_send_sz * read write_index 790 * ... * ... 791 */ 792 793 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf) 794 { 795 /* don't trust guest data */ 796 if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) { 797 return 0; 798 } 799 return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx, 800 ringbuf->common.len, false); 801 } 802 803 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan) 804 { 805 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; 806 vmbus_ring_buffer *rb; 807 uint32_t written; 808 809 written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx, 810 ringbuf->common.len, true); 811 if (!written) { 812 return 0; 813 } 814 815 rb = ringbuf_map_hdr(&ringbuf->common); 816 if (!rb) { 817 return -EFAULT; 818 } 819 820 ringbuf->reserved -= written; 821 822 /* prevent reorder with the data operation and packet write */ 823 smp_mb(); /* barrier pair [C] */ 824 rb->write_index = ringbuf->wr_idx; 825 826 /* 827 * If the producer earlier indicated that it wants to be notified when the 828 * consumer frees certain amount of space in the ring buffer, that amount 829 * is reduced by the size of the completed write. 830 */ 831 if (ringbuf->wanted) { 832 /* otherwise reservation would fail */ 833 assert(ringbuf->wanted < written); 834 ringbuf->wanted -= written; 835 /* prevent reorder with write_index write */ 836 smp_wmb(); /* barrier pair [D] */ 837 rb->pending_send_sz = ringbuf->wanted; 838 } 839 840 /* prevent reorder with write_index or pending_send_sz write */ 841 smp_mb(); /* barrier pair [A] */ 842 ringbuf->last_seen_rd_idx = rb->read_index; 843 844 /* 845 * The consumer may have missed the reduction of pending_send_sz and skip 846 * notification, so re-check the blocking condition, and, if it's no longer 847 * true, ensure processing another iteration by simulating consumer's 848 * notification. 849 */ 850 if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) { 851 vmbus_channel_notify_host(chan); 852 } 853 854 /* skip notification by consumer's request */ 855 if (rb->interrupt_mask) { 856 goto out; 857 } 858 859 /* 860 * The consumer hasn't caught up with the producer's previous state so it's 861 * not blocked. 862 * (last_seen_rd_idx comes from the guest but it's safe to use w/o 863 * validation here as it only affects notification.) 864 */ 865 if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx, 866 ringbuf->common.len, true) > written) { 867 goto out; 868 } 869 870 vmbus_channel_notify_guest(chan); 871 out: 872 ringbuf_unmap_hdr(&ringbuf->common, rb, true); 873 ringbuf->last_wr_idx = ringbuf->wr_idx; 874 return written; 875 } 876 877 int vmbus_channel_reserve(VMBusChannel *chan, 878 uint32_t desclen, uint32_t msglen) 879 { 880 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; 881 vmbus_ring_buffer *rb = NULL; 882 vmbus_packet_hdr hdr; 883 uint32_t needed = ringbuf->reserved + 884 vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); 885 886 /* avoid touching the guest memory if possible */ 887 if (likely(needed <= ringbuf_send_avail(ringbuf))) { 888 goto success; 889 } 890 891 rb = ringbuf_map_hdr(&ringbuf->common); 892 if (!rb) { 893 return -EFAULT; 894 } 895 896 /* fetch read index from guest memory and try again */ 897 ringbuf->last_seen_rd_idx = rb->read_index; 898 899 if (likely(needed <= ringbuf_send_avail(ringbuf))) { 900 goto success; 901 } 902 903 rb->pending_send_sz = needed; 904 905 /* 906 * The consumer may have made progress and freed up some space before 907 * seeing updated pending_send_sz, so re-read read_index (preventing 908 * reorder with the pending_send_sz write) and try again. 909 */ 910 smp_mb(); /* barrier pair [A] */ 911 ringbuf->last_seen_rd_idx = rb->read_index; 912 913 if (needed > ringbuf_send_avail(ringbuf)) { 914 goto out; 915 } 916 917 success: 918 ringbuf->reserved = needed; 919 needed = 0; 920 921 /* clear pending_send_sz if it was set */ 922 if (ringbuf->wanted) { 923 if (!rb) { 924 rb = ringbuf_map_hdr(&ringbuf->common); 925 if (!rb) { 926 /* failure to clear pending_send_sz is non-fatal */ 927 goto out; 928 } 929 } 930 931 rb->pending_send_sz = 0; 932 } 933 934 /* prevent reorder of the following data operation with read_index read */ 935 smp_mb(); /* barrier pair [B] */ 936 937 out: 938 if (rb) { 939 ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed); 940 } 941 ringbuf->wanted = needed; 942 return needed ? -ENOSPC : 0; 943 } 944 945 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, 946 void *desc, uint32_t desclen, 947 void *msg, uint32_t msglen, 948 bool need_comp, uint64_t transaction_id) 949 { 950 ssize_t ret = 0; 951 vmbus_packet_hdr hdr; 952 uint32_t totlen; 953 VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; 954 955 if (!vmbus_channel_is_open(chan)) { 956 return -EINVAL; 957 } 958 959 totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); 960 hdr.type = pkt_type; 961 hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0; 962 hdr.transaction_id = transaction_id; 963 964 assert(totlen <= ringbuf->reserved); 965 966 ringbuf_start_io(&ringbuf->common); 967 ringbuf_seek(&ringbuf->common, ringbuf->wr_idx); 968 ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)); 969 if (ret < 0) { 970 goto out; 971 } 972 if (desclen) { 973 assert(desc); 974 ret = ringbuf_io(&ringbuf->common, desc, desclen); 975 if (ret < 0) { 976 goto out; 977 } 978 ringbuf_seek(&ringbuf->common, 979 ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t)); 980 } 981 ret = ringbuf_io(&ringbuf->common, msg, msglen); 982 if (ret < 0) { 983 goto out; 984 } 985 ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen); 986 ringbuf->wr_idx = ringbuf_tell(&ringbuf->common); 987 ret = 0; 988 out: 989 ringbuf_end_io(&ringbuf->common); 990 if (ret) { 991 return ret; 992 } 993 return ringbuf_send_update_idx(chan); 994 } 995 996 ssize_t vmbus_channel_send_completion(VMBusChanReq *req, 997 void *msg, uint32_t msglen) 998 { 999 assert(req->need_comp); 1000 return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0, 1001 msg, msglen, false, req->transaction_id); 1002 } 1003 1004 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev, 1005 VMBusRingBufCommon *ringbuf, uint32_t len) 1006 { 1007 int ret; 1008 vmbus_pkt_gpa_direct hdr; 1009 hwaddr curaddr = 0; 1010 hwaddr curlen = 0; 1011 int num; 1012 1013 if (len < sizeof(hdr)) { 1014 return -EIO; 1015 } 1016 ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr)); 1017 if (ret < 0) { 1018 return ret; 1019 } 1020 len -= sizeof(hdr); 1021 1022 num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t); 1023 if (num < 0) { 1024 return -EIO; 1025 } 1026 qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as); 1027 1028 for (; hdr.rangecount; hdr.rangecount--) { 1029 vmbus_gpa_range range; 1030 1031 if (len < sizeof(range)) { 1032 goto eio; 1033 } 1034 ret = ringbuf_io(ringbuf, &range, sizeof(range)); 1035 if (ret < 0) { 1036 goto err; 1037 } 1038 len -= sizeof(range); 1039 1040 if (range.byte_offset & TARGET_PAGE_MASK) { 1041 goto eio; 1042 } 1043 1044 for (; range.byte_count; range.byte_offset = 0) { 1045 uint64_t paddr; 1046 uint32_t plen = MIN(range.byte_count, 1047 TARGET_PAGE_SIZE - range.byte_offset); 1048 1049 if (len < sizeof(uint64_t)) { 1050 goto eio; 1051 } 1052 ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr)); 1053 if (ret < 0) { 1054 goto err; 1055 } 1056 len -= sizeof(uint64_t); 1057 paddr <<= TARGET_PAGE_BITS; 1058 paddr |= range.byte_offset; 1059 range.byte_count -= plen; 1060 1061 if (curaddr + curlen == paddr) { 1062 /* consecutive fragments - join */ 1063 curlen += plen; 1064 } else { 1065 if (curlen) { 1066 qemu_sglist_add(sgl, curaddr, curlen); 1067 } 1068 1069 curaddr = paddr; 1070 curlen = plen; 1071 } 1072 } 1073 } 1074 1075 if (curlen) { 1076 qemu_sglist_add(sgl, curaddr, curlen); 1077 } 1078 1079 return 0; 1080 eio: 1081 ret = -EIO; 1082 err: 1083 qemu_sglist_destroy(sgl); 1084 return ret; 1085 } 1086 1087 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan, 1088 uint32_t size, uint16_t pkt_type, 1089 uint32_t msglen, uint64_t transaction_id, 1090 bool need_comp) 1091 { 1092 VMBusChanReq *req; 1093 uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg)); 1094 uint32_t totlen = msgoff + msglen; 1095 1096 req = g_malloc0(totlen); 1097 req->chan = chan; 1098 req->pkt_type = pkt_type; 1099 req->msg = (void *)req + msgoff; 1100 req->msglen = msglen; 1101 req->transaction_id = transaction_id; 1102 req->need_comp = need_comp; 1103 return req; 1104 } 1105 1106 int vmbus_channel_recv_start(VMBusChannel *chan) 1107 { 1108 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1109 vmbus_ring_buffer *rb; 1110 1111 rb = ringbuf_map_hdr(&ringbuf->common); 1112 if (!rb) { 1113 return -EFAULT; 1114 } 1115 ringbuf->last_seen_wr_idx = rb->write_index; 1116 ringbuf_unmap_hdr(&ringbuf->common, rb, false); 1117 1118 if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) { 1119 return -EOVERFLOW; 1120 } 1121 1122 /* prevent reorder of the following data operation with write_index read */ 1123 smp_mb(); /* barrier pair [C] */ 1124 return 0; 1125 } 1126 1127 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size) 1128 { 1129 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1130 vmbus_packet_hdr hdr = {}; 1131 VMBusChanReq *req; 1132 uint32_t avail; 1133 uint32_t totlen, pktlen, msglen, msgoff, desclen; 1134 1135 assert(size >= sizeof(*req)); 1136 1137 /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */ 1138 avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx, 1139 ringbuf->common.len, true); 1140 if (avail < sizeof(hdr)) { 1141 return NULL; 1142 } 1143 1144 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx); 1145 if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) { 1146 return NULL; 1147 } 1148 1149 pktlen = hdr.len_qwords * sizeof(uint64_t); 1150 totlen = pktlen + VMBUS_PKT_TRAILER; 1151 if (totlen > avail) { 1152 return NULL; 1153 } 1154 1155 msgoff = hdr.offset_qwords * sizeof(uint64_t); 1156 if (msgoff > pktlen || msgoff < sizeof(hdr)) { 1157 error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen); 1158 return NULL; 1159 } 1160 1161 msglen = pktlen - msgoff; 1162 1163 req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id, 1164 hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION); 1165 1166 switch (hdr.type) { 1167 case VMBUS_PACKET_DATA_USING_GPA_DIRECT: 1168 desclen = msgoff - sizeof(hdr); 1169 if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common, 1170 desclen) < 0) { 1171 error_report("%s: failed to convert GPA ranges to SGL", __func__); 1172 goto free_req; 1173 } 1174 break; 1175 case VMBUS_PACKET_DATA_INBAND: 1176 case VMBUS_PACKET_COMP: 1177 break; 1178 default: 1179 error_report("%s: unexpected msg type: %x", __func__, hdr.type); 1180 goto free_req; 1181 } 1182 1183 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff); 1184 if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) { 1185 goto free_req; 1186 } 1187 ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen); 1188 1189 return req; 1190 free_req: 1191 vmbus_free_req(req); 1192 return NULL; 1193 } 1194 1195 void vmbus_channel_recv_pop(VMBusChannel *chan) 1196 { 1197 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1198 ringbuf->rd_idx = ringbuf_tell(&ringbuf->common); 1199 } 1200 1201 ssize_t vmbus_channel_recv_done(VMBusChannel *chan) 1202 { 1203 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1204 vmbus_ring_buffer *rb; 1205 uint32_t read; 1206 1207 read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx, 1208 ringbuf->common.len, true); 1209 if (!read) { 1210 return 0; 1211 } 1212 1213 rb = ringbuf_map_hdr(&ringbuf->common); 1214 if (!rb) { 1215 return -EFAULT; 1216 } 1217 1218 /* prevent reorder with the data operation and packet read */ 1219 smp_mb(); /* barrier pair [B] */ 1220 rb->read_index = ringbuf->rd_idx; 1221 1222 /* prevent reorder of the following pending_send_sz read */ 1223 smp_mb(); /* barrier pair [A] */ 1224 1225 if (rb->interrupt_mask) { 1226 goto out; 1227 } 1228 1229 if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) { 1230 uint32_t wr_idx, wr_avail; 1231 uint32_t wanted = rb->pending_send_sz; 1232 1233 if (!wanted) { 1234 goto out; 1235 } 1236 1237 /* prevent reorder with pending_send_sz read */ 1238 smp_rmb(); /* barrier pair [D] */ 1239 wr_idx = rb->write_index; 1240 1241 wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len, 1242 true); 1243 1244 /* the producer wasn't blocked on the consumer state */ 1245 if (wr_avail >= read + wanted) { 1246 goto out; 1247 } 1248 /* there's not enough space for the producer to make progress */ 1249 if (wr_avail < wanted) { 1250 goto out; 1251 } 1252 } 1253 1254 vmbus_channel_notify_guest(chan); 1255 out: 1256 ringbuf_unmap_hdr(&ringbuf->common, rb, true); 1257 ringbuf->last_rd_idx = ringbuf->rd_idx; 1258 return read; 1259 } 1260 1261 void vmbus_free_req(void *req) 1262 { 1263 VMBusChanReq *r = req; 1264 1265 if (!req) { 1266 return; 1267 } 1268 1269 if (r->sgl.dev) { 1270 qemu_sglist_destroy(&r->sgl); 1271 } 1272 g_free(req); 1273 } 1274 1275 static void channel_event_cb(EventNotifier *e) 1276 { 1277 VMBusChannel *chan = container_of(e, VMBusChannel, notifier); 1278 if (event_notifier_test_and_clear(e)) { 1279 /* 1280 * All receives are supposed to happen within the device worker, so 1281 * bracket it with ringbuf_start/end_io on the receive ringbuffer, and 1282 * potentially reuse the cached mapping throughout the worker. 1283 * Can't do this for sends as they may happen outside the device 1284 * worker. 1285 */ 1286 VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1287 ringbuf_start_io(&ringbuf->common); 1288 chan->notify_cb(chan); 1289 ringbuf_end_io(&ringbuf->common); 1290 1291 } 1292 } 1293 1294 static int alloc_chan_id(VMBus *vmbus) 1295 { 1296 int ret; 1297 1298 ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0); 1299 if (ret == VMBUS_CHANID_COUNT) { 1300 return -ENOMEM; 1301 } 1302 return ret + VMBUS_FIRST_CHANID; 1303 } 1304 1305 static int register_chan_id(VMBusChannel *chan) 1306 { 1307 return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID, 1308 chan->vmbus->chanid_bitmap) ? -EEXIST : 0; 1309 } 1310 1311 static void unregister_chan_id(VMBusChannel *chan) 1312 { 1313 clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap); 1314 } 1315 1316 static uint32_t chan_connection_id(VMBusChannel *chan) 1317 { 1318 return VMBUS_CHAN_CONNECTION_OFFSET + chan->id; 1319 } 1320 1321 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc, 1322 VMBusChannel *chan, uint16_t idx, Error **errp) 1323 { 1324 int res; 1325 1326 chan->dev = dev; 1327 chan->notify_cb = vdc->chan_notify_cb; 1328 chan->subchan_idx = idx; 1329 chan->vmbus = vmbus; 1330 1331 res = alloc_chan_id(vmbus); 1332 if (res < 0) { 1333 error_setg(errp, "no spare channel id"); 1334 return; 1335 } 1336 chan->id = res; 1337 register_chan_id(chan); 1338 1339 /* 1340 * The guest drivers depend on the device subchannels (idx #1+) to be 1341 * offered after the primary channel (idx #0) of that device. To ensure 1342 * that, record the channels on the channel list in the order they appear 1343 * within the device. 1344 */ 1345 QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link); 1346 } 1347 1348 static void deinit_channel(VMBusChannel *chan) 1349 { 1350 assert(chan->state == VMCHAN_INIT); 1351 QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link); 1352 unregister_chan_id(chan); 1353 } 1354 1355 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp) 1356 { 1357 uint16_t i; 1358 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev); 1359 Error *err = NULL; 1360 1361 dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1; 1362 if (dev->num_channels < 1) { 1363 error_setg(&err, "invalid #channels: %u", dev->num_channels); 1364 goto error_out; 1365 } 1366 1367 dev->channels = g_new0(VMBusChannel, dev->num_channels); 1368 for (i = 0; i < dev->num_channels; i++) { 1369 init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err); 1370 if (err) { 1371 goto err_init; 1372 } 1373 } 1374 1375 return; 1376 1377 err_init: 1378 while (i--) { 1379 deinit_channel(&dev->channels[i]); 1380 } 1381 error_out: 1382 error_propagate(errp, err); 1383 } 1384 1385 static void free_channels(VMBusDevice *dev) 1386 { 1387 uint16_t i; 1388 for (i = 0; i < dev->num_channels; i++) { 1389 deinit_channel(&dev->channels[i]); 1390 } 1391 g_free(dev->channels); 1392 } 1393 1394 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index) 1395 { 1396 VMBusChannel *chan; 1397 1398 if (vp_index == vmbus->target_vp) { 1399 hyperv_sint_route_ref(vmbus->sint_route); 1400 return vmbus->sint_route; 1401 } 1402 1403 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1404 if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) { 1405 hyperv_sint_route_ref(chan->notify_route); 1406 return chan->notify_route; 1407 } 1408 } 1409 1410 return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL); 1411 } 1412 1413 static void open_channel(VMBusChannel *chan) 1414 { 1415 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); 1416 1417 chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl); 1418 if (!chan->gpadl) { 1419 return; 1420 } 1421 1422 if (ringbufs_init(chan)) { 1423 goto put_gpadl; 1424 } 1425 1426 if (event_notifier_init(&chan->notifier, 0)) { 1427 goto put_gpadl; 1428 } 1429 1430 event_notifier_set_handler(&chan->notifier, channel_event_cb); 1431 1432 if (hyperv_set_event_flag_handler(chan_connection_id(chan), 1433 &chan->notifier)) { 1434 goto cleanup_notifier; 1435 } 1436 1437 chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp); 1438 if (!chan->notify_route) { 1439 goto clear_event_flag_handler; 1440 } 1441 1442 if (vdc->open_channel && vdc->open_channel(chan)) { 1443 goto unref_sint_route; 1444 } 1445 1446 chan->is_open = true; 1447 return; 1448 1449 unref_sint_route: 1450 hyperv_sint_route_unref(chan->notify_route); 1451 clear_event_flag_handler: 1452 hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); 1453 cleanup_notifier: 1454 event_notifier_set_handler(&chan->notifier, NULL); 1455 event_notifier_cleanup(&chan->notifier); 1456 put_gpadl: 1457 vmbus_put_gpadl(chan->gpadl); 1458 } 1459 1460 static void close_channel(VMBusChannel *chan) 1461 { 1462 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); 1463 1464 if (!chan->is_open) { 1465 return; 1466 } 1467 1468 if (vdc->close_channel) { 1469 vdc->close_channel(chan); 1470 } 1471 1472 hyperv_sint_route_unref(chan->notify_route); 1473 hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); 1474 event_notifier_set_handler(&chan->notifier, NULL); 1475 event_notifier_cleanup(&chan->notifier); 1476 vmbus_put_gpadl(chan->gpadl); 1477 chan->is_open = false; 1478 } 1479 1480 static int channel_post_load(void *opaque, int version_id) 1481 { 1482 VMBusChannel *chan = opaque; 1483 1484 return register_chan_id(chan); 1485 } 1486 1487 static const VMStateDescription vmstate_channel = { 1488 .name = "vmbus/channel", 1489 .version_id = 0, 1490 .minimum_version_id = 0, 1491 .post_load = channel_post_load, 1492 .fields = (VMStateField[]) { 1493 VMSTATE_UINT32(id, VMBusChannel), 1494 VMSTATE_UINT16(subchan_idx, VMBusChannel), 1495 VMSTATE_UINT32(open_id, VMBusChannel), 1496 VMSTATE_UINT32(target_vp, VMBusChannel), 1497 VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel), 1498 VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel), 1499 VMSTATE_UINT8(offer_state, VMBusChannel), 1500 VMSTATE_UINT8(state, VMBusChannel), 1501 VMSTATE_END_OF_LIST() 1502 } 1503 }; 1504 1505 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id) 1506 { 1507 VMBusChannel *chan; 1508 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1509 if (chan->id == id) { 1510 return chan; 1511 } 1512 } 1513 return NULL; 1514 } 1515 1516 static int enqueue_incoming_message(VMBus *vmbus, 1517 const struct hyperv_post_message_input *msg) 1518 { 1519 int ret = 0; 1520 uint8_t idx, prev_size; 1521 1522 qemu_mutex_lock(&vmbus->rx_queue_lock); 1523 1524 if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) { 1525 ret = -ENOBUFS; 1526 goto out; 1527 } 1528 1529 prev_size = vmbus->rx_queue_size; 1530 idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN; 1531 memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg)); 1532 vmbus->rx_queue_size++; 1533 1534 /* only need to resched if the queue was empty before */ 1535 if (!prev_size) { 1536 vmbus_resched(vmbus); 1537 } 1538 out: 1539 qemu_mutex_unlock(&vmbus->rx_queue_lock); 1540 return ret; 1541 } 1542 1543 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg, 1544 void *data) 1545 { 1546 VMBus *vmbus = data; 1547 struct vmbus_message_header *vmbus_msg; 1548 1549 if (msg->message_type != HV_MESSAGE_VMBUS) { 1550 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1551 } 1552 1553 if (msg->payload_size < sizeof(struct vmbus_message_header)) { 1554 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1555 } 1556 1557 vmbus_msg = (struct vmbus_message_header *)msg->payload; 1558 1559 trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size); 1560 1561 if (vmbus_msg->message_type == VMBUS_MSG_INVALID || 1562 vmbus_msg->message_type >= VMBUS_MSG_COUNT) { 1563 error_report("vmbus: unknown message type %#x", 1564 vmbus_msg->message_type); 1565 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1566 } 1567 1568 if (enqueue_incoming_message(vmbus, msg)) { 1569 return HV_STATUS_INSUFFICIENT_BUFFERS; 1570 } 1571 return HV_STATUS_SUCCESS; 1572 } 1573 1574 static bool vmbus_initialized(VMBus *vmbus) 1575 { 1576 return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT; 1577 } 1578 1579 static void vmbus_reset_all(VMBus *vmbus) 1580 { 1581 qbus_reset_all(BUS(vmbus)); 1582 } 1583 1584 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen) 1585 { 1586 int ret; 1587 struct hyperv_message msg = { 1588 .header.message_type = HV_MESSAGE_VMBUS, 1589 }; 1590 1591 assert(!vmbus->msg_in_progress); 1592 assert(msglen <= sizeof(msg.payload)); 1593 assert(msglen >= sizeof(struct vmbus_message_header)); 1594 1595 vmbus->msg_in_progress = true; 1596 1597 trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type, 1598 msglen); 1599 1600 memcpy(msg.payload, msgdata, msglen); 1601 msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN); 1602 1603 ret = hyperv_post_msg(vmbus->sint_route, &msg); 1604 if (ret == 0 || ret == -EAGAIN) { 1605 return; 1606 } 1607 1608 error_report("message delivery fatal failure: %d; aborting vmbus", ret); 1609 vmbus_reset_all(vmbus); 1610 } 1611 1612 static int vmbus_init(VMBus *vmbus) 1613 { 1614 if (vmbus->target_vp != (uint32_t)-1) { 1615 vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT, 1616 vmbus_msg_cb, vmbus); 1617 if (!vmbus->sint_route) { 1618 error_report("failed to set up SINT route"); 1619 return -ENOMEM; 1620 } 1621 } 1622 return 0; 1623 } 1624 1625 static void vmbus_deinit(VMBus *vmbus) 1626 { 1627 VMBusGpadl *gpadl, *tmp_gpadl; 1628 VMBusChannel *chan; 1629 1630 QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) { 1631 if (gpadl->state == VMGPADL_TORNDOWN) { 1632 continue; 1633 } 1634 vmbus_put_gpadl(gpadl); 1635 } 1636 1637 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1638 chan->offer_state = VMOFFER_INIT; 1639 } 1640 1641 hyperv_sint_route_unref(vmbus->sint_route); 1642 vmbus->sint_route = NULL; 1643 vmbus->int_page_gpa = 0; 1644 vmbus->target_vp = (uint32_t)-1; 1645 vmbus->version = 0; 1646 vmbus->state = VMBUS_LISTEN; 1647 vmbus->msg_in_progress = false; 1648 } 1649 1650 static void handle_initiate_contact(VMBus *vmbus, 1651 vmbus_message_initiate_contact *msg, 1652 uint32_t msglen) 1653 { 1654 if (msglen < sizeof(*msg)) { 1655 return; 1656 } 1657 1658 trace_vmbus_initiate_contact(msg->version_requested >> 16, 1659 msg->version_requested & 0xffff, 1660 msg->target_vcpu, msg->monitor_page1, 1661 msg->monitor_page2, msg->interrupt_page); 1662 1663 /* 1664 * Reset vmbus on INITIATE_CONTACT regardless of its previous state. 1665 * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down 1666 * before handing over to OS loader. 1667 */ 1668 vmbus_reset_all(vmbus); 1669 1670 vmbus->target_vp = msg->target_vcpu; 1671 vmbus->version = msg->version_requested; 1672 if (vmbus->version < VMBUS_VERSION_WIN8) { 1673 /* linux passes interrupt page even when it doesn't need it */ 1674 vmbus->int_page_gpa = msg->interrupt_page; 1675 } 1676 vmbus->state = VMBUS_HANDSHAKE; 1677 1678 if (vmbus_init(vmbus)) { 1679 error_report("failed to init vmbus; aborting"); 1680 vmbus_deinit(vmbus); 1681 return; 1682 } 1683 } 1684 1685 static void send_handshake(VMBus *vmbus) 1686 { 1687 struct vmbus_message_version_response msg = { 1688 .header.message_type = VMBUS_MSG_VERSION_RESPONSE, 1689 .version_supported = vmbus_initialized(vmbus), 1690 }; 1691 1692 post_msg(vmbus, &msg, sizeof(msg)); 1693 } 1694 1695 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen) 1696 { 1697 VMBusChannel *chan; 1698 1699 if (!vmbus_initialized(vmbus)) { 1700 return; 1701 } 1702 1703 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1704 if (chan->offer_state == VMOFFER_INIT) { 1705 chan->offer_state = VMOFFER_SENDING; 1706 break; 1707 } 1708 } 1709 1710 vmbus->state = VMBUS_OFFER; 1711 } 1712 1713 static void send_offer(VMBus *vmbus) 1714 { 1715 VMBusChannel *chan; 1716 struct vmbus_message_header alloffers_msg = { 1717 .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED, 1718 }; 1719 1720 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1721 if (chan->offer_state == VMOFFER_SENDING) { 1722 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); 1723 /* Hyper-V wants LE GUIDs */ 1724 QemuUUID classid = qemu_uuid_bswap(vdc->classid); 1725 QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid); 1726 struct vmbus_message_offer_channel msg = { 1727 .header.message_type = VMBUS_MSG_OFFERCHANNEL, 1728 .child_relid = chan->id, 1729 .connection_id = chan_connection_id(chan), 1730 .channel_flags = vdc->channel_flags, 1731 .mmio_size_mb = vdc->mmio_size_mb, 1732 .sub_channel_index = vmbus_channel_idx(chan), 1733 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED, 1734 }; 1735 1736 memcpy(msg.type_uuid, &classid, sizeof(classid)); 1737 memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid)); 1738 1739 trace_vmbus_send_offer(chan->id, chan->dev); 1740 1741 post_msg(vmbus, &msg, sizeof(msg)); 1742 return; 1743 } 1744 } 1745 1746 /* no more offers, send terminator message */ 1747 trace_vmbus_terminate_offers(); 1748 post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg)); 1749 } 1750 1751 static bool complete_offer(VMBus *vmbus) 1752 { 1753 VMBusChannel *chan; 1754 1755 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1756 if (chan->offer_state == VMOFFER_SENDING) { 1757 chan->offer_state = VMOFFER_SENT; 1758 goto next_offer; 1759 } 1760 } 1761 /* 1762 * no transitioning channels found so this is completing the terminator 1763 * message, and vmbus can move to the next state 1764 */ 1765 return true; 1766 1767 next_offer: 1768 /* try to mark another channel for offering */ 1769 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1770 if (chan->offer_state == VMOFFER_INIT) { 1771 chan->offer_state = VMOFFER_SENDING; 1772 break; 1773 } 1774 } 1775 /* 1776 * if an offer has been sent there are more offers or the terminator yet to 1777 * send, so no state transition for vmbus 1778 */ 1779 return false; 1780 } 1781 1782 1783 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg, 1784 uint32_t msglen) 1785 { 1786 VMBusGpadl *gpadl; 1787 uint32_t num_gfns, i; 1788 1789 /* must include at least one gpa range */ 1790 if (msglen < sizeof(*msg) + sizeof(msg->range[0]) || 1791 !vmbus_initialized(vmbus)) { 1792 return; 1793 } 1794 1795 num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) / 1796 sizeof(msg->range[0].pfn_array[0]); 1797 1798 trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns); 1799 1800 /* 1801 * In theory the GPADL_HEADER message can define a GPADL with multiple GPA 1802 * ranges each with arbitrary size and alignment. However in practice only 1803 * single-range page-aligned GPADLs have been observed so just ignore 1804 * anything else and simplify things greatly. 1805 */ 1806 if (msg->rangecount != 1 || msg->range[0].byte_offset || 1807 (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) { 1808 return; 1809 } 1810 1811 /* ignore requests to create already existing GPADLs */ 1812 if (find_gpadl(vmbus, msg->gpadl_id)) { 1813 return; 1814 } 1815 1816 gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns); 1817 1818 for (i = 0; i < num_gfns && 1819 (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen; 1820 i++) { 1821 gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i]; 1822 } 1823 1824 if (gpadl_full(gpadl)) { 1825 vmbus->state = VMBUS_CREATE_GPADL; 1826 } 1827 } 1828 1829 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg, 1830 uint32_t msglen) 1831 { 1832 VMBusGpadl *gpadl; 1833 uint32_t num_gfns_left, i; 1834 1835 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 1836 return; 1837 } 1838 1839 trace_vmbus_gpadl_body(msg->gpadl_id); 1840 1841 gpadl = find_gpadl(vmbus, msg->gpadl_id); 1842 if (!gpadl) { 1843 return; 1844 } 1845 1846 num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns; 1847 assert(num_gfns_left); 1848 1849 for (i = 0; i < num_gfns_left && 1850 (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) { 1851 gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i]; 1852 } 1853 1854 if (gpadl_full(gpadl)) { 1855 vmbus->state = VMBUS_CREATE_GPADL; 1856 } 1857 } 1858 1859 static void send_create_gpadl(VMBus *vmbus) 1860 { 1861 VMBusGpadl *gpadl; 1862 1863 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1864 if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { 1865 struct vmbus_message_gpadl_created msg = { 1866 .header.message_type = VMBUS_MSG_GPADL_CREATED, 1867 .gpadl_id = gpadl->id, 1868 .child_relid = gpadl->child_relid, 1869 }; 1870 1871 trace_vmbus_gpadl_created(gpadl->id); 1872 post_msg(vmbus, &msg, sizeof(msg)); 1873 return; 1874 } 1875 } 1876 1877 assert(false); 1878 } 1879 1880 static bool complete_create_gpadl(VMBus *vmbus) 1881 { 1882 VMBusGpadl *gpadl; 1883 1884 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1885 if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { 1886 gpadl->state = VMGPADL_ALIVE; 1887 1888 return true; 1889 } 1890 } 1891 1892 assert(false); 1893 return false; 1894 } 1895 1896 static void handle_gpadl_teardown(VMBus *vmbus, 1897 vmbus_message_gpadl_teardown *msg, 1898 uint32_t msglen) 1899 { 1900 VMBusGpadl *gpadl; 1901 1902 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 1903 return; 1904 } 1905 1906 trace_vmbus_gpadl_teardown(msg->gpadl_id); 1907 1908 gpadl = find_gpadl(vmbus, msg->gpadl_id); 1909 if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) { 1910 return; 1911 } 1912 1913 gpadl->state = VMGPADL_TEARINGDOWN; 1914 vmbus->state = VMBUS_TEARDOWN_GPADL; 1915 } 1916 1917 static void send_teardown_gpadl(VMBus *vmbus) 1918 { 1919 VMBusGpadl *gpadl; 1920 1921 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1922 if (gpadl->state == VMGPADL_TEARINGDOWN) { 1923 struct vmbus_message_gpadl_torndown msg = { 1924 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN, 1925 .gpadl_id = gpadl->id, 1926 }; 1927 1928 trace_vmbus_gpadl_torndown(gpadl->id); 1929 post_msg(vmbus, &msg, sizeof(msg)); 1930 return; 1931 } 1932 } 1933 1934 assert(false); 1935 } 1936 1937 static bool complete_teardown_gpadl(VMBus *vmbus) 1938 { 1939 VMBusGpadl *gpadl; 1940 1941 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1942 if (gpadl->state == VMGPADL_TEARINGDOWN) { 1943 gpadl->state = VMGPADL_TORNDOWN; 1944 vmbus_put_gpadl(gpadl); 1945 return true; 1946 } 1947 } 1948 1949 assert(false); 1950 return false; 1951 } 1952 1953 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg, 1954 uint32_t msglen) 1955 { 1956 VMBusChannel *chan; 1957 1958 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 1959 return; 1960 } 1961 1962 trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id, 1963 msg->target_vp); 1964 chan = find_channel(vmbus, msg->child_relid); 1965 if (!chan || chan->state != VMCHAN_INIT) { 1966 return; 1967 } 1968 1969 chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id; 1970 chan->ringbuf_send_offset = msg->ring_buffer_offset; 1971 chan->target_vp = msg->target_vp; 1972 chan->open_id = msg->open_id; 1973 1974 open_channel(chan); 1975 1976 chan->state = VMCHAN_OPENING; 1977 vmbus->state = VMBUS_OPEN_CHANNEL; 1978 } 1979 1980 static void send_open_channel(VMBus *vmbus) 1981 { 1982 VMBusChannel *chan; 1983 1984 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1985 if (chan->state == VMCHAN_OPENING) { 1986 struct vmbus_message_open_result msg = { 1987 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT, 1988 .child_relid = chan->id, 1989 .open_id = chan->open_id, 1990 .status = !vmbus_channel_is_open(chan), 1991 }; 1992 1993 trace_vmbus_channel_open(chan->id, msg.status); 1994 post_msg(vmbus, &msg, sizeof(msg)); 1995 return; 1996 } 1997 } 1998 1999 assert(false); 2000 } 2001 2002 static bool complete_open_channel(VMBus *vmbus) 2003 { 2004 VMBusChannel *chan; 2005 2006 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2007 if (chan->state == VMCHAN_OPENING) { 2008 if (vmbus_channel_is_open(chan)) { 2009 chan->state = VMCHAN_OPEN; 2010 /* 2011 * simulate guest notification of ringbuffer space made 2012 * available, for the channel protocols where the host 2013 * initiates the communication 2014 */ 2015 vmbus_channel_notify_host(chan); 2016 } else { 2017 chan->state = VMCHAN_INIT; 2018 } 2019 return true; 2020 } 2021 } 2022 2023 assert(false); 2024 return false; 2025 } 2026 2027 static void vdev_reset_on_close(VMBusDevice *vdev) 2028 { 2029 uint16_t i; 2030 2031 for (i = 0; i < vdev->num_channels; i++) { 2032 if (vmbus_channel_is_open(&vdev->channels[i])) { 2033 return; 2034 } 2035 } 2036 2037 /* all channels closed -- reset device */ 2038 qdev_reset_all(DEVICE(vdev)); 2039 } 2040 2041 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg, 2042 uint32_t msglen) 2043 { 2044 VMBusChannel *chan; 2045 2046 if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 2047 return; 2048 } 2049 2050 trace_vmbus_close_channel(msg->child_relid); 2051 2052 chan = find_channel(vmbus, msg->child_relid); 2053 if (!chan) { 2054 return; 2055 } 2056 2057 close_channel(chan); 2058 chan->state = VMCHAN_INIT; 2059 2060 vdev_reset_on_close(chan->dev); 2061 } 2062 2063 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen) 2064 { 2065 vmbus->state = VMBUS_UNLOAD; 2066 } 2067 2068 static void send_unload(VMBus *vmbus) 2069 { 2070 vmbus_message_header msg = { 2071 .message_type = VMBUS_MSG_UNLOAD_RESPONSE, 2072 }; 2073 2074 qemu_mutex_lock(&vmbus->rx_queue_lock); 2075 vmbus->rx_queue_size = 0; 2076 qemu_mutex_unlock(&vmbus->rx_queue_lock); 2077 2078 post_msg(vmbus, &msg, sizeof(msg)); 2079 return; 2080 } 2081 2082 static bool complete_unload(VMBus *vmbus) 2083 { 2084 vmbus_reset_all(vmbus); 2085 return true; 2086 } 2087 2088 static void process_message(VMBus *vmbus) 2089 { 2090 struct hyperv_post_message_input *hv_msg; 2091 struct vmbus_message_header *msg; 2092 void *msgdata; 2093 uint32_t msglen; 2094 2095 qemu_mutex_lock(&vmbus->rx_queue_lock); 2096 2097 if (!vmbus->rx_queue_size) { 2098 goto unlock; 2099 } 2100 2101 hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head]; 2102 msglen = hv_msg->payload_size; 2103 if (msglen < sizeof(*msg)) { 2104 goto out; 2105 } 2106 msgdata = hv_msg->payload; 2107 msg = (struct vmbus_message_header *)msgdata; 2108 2109 trace_vmbus_process_incoming_message(msg->message_type); 2110 2111 switch (msg->message_type) { 2112 case VMBUS_MSG_INITIATE_CONTACT: 2113 handle_initiate_contact(vmbus, msgdata, msglen); 2114 break; 2115 case VMBUS_MSG_REQUESTOFFERS: 2116 handle_request_offers(vmbus, msgdata, msglen); 2117 break; 2118 case VMBUS_MSG_GPADL_HEADER: 2119 handle_gpadl_header(vmbus, msgdata, msglen); 2120 break; 2121 case VMBUS_MSG_GPADL_BODY: 2122 handle_gpadl_body(vmbus, msgdata, msglen); 2123 break; 2124 case VMBUS_MSG_GPADL_TEARDOWN: 2125 handle_gpadl_teardown(vmbus, msgdata, msglen); 2126 break; 2127 case VMBUS_MSG_OPENCHANNEL: 2128 handle_open_channel(vmbus, msgdata, msglen); 2129 break; 2130 case VMBUS_MSG_CLOSECHANNEL: 2131 handle_close_channel(vmbus, msgdata, msglen); 2132 break; 2133 case VMBUS_MSG_UNLOAD: 2134 handle_unload(vmbus, msgdata, msglen); 2135 break; 2136 default: 2137 error_report("unknown message type %#x", msg->message_type); 2138 break; 2139 } 2140 2141 out: 2142 vmbus->rx_queue_size--; 2143 vmbus->rx_queue_head++; 2144 vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN; 2145 2146 vmbus_resched(vmbus); 2147 unlock: 2148 qemu_mutex_unlock(&vmbus->rx_queue_lock); 2149 } 2150 2151 static const struct { 2152 void (*run)(VMBus *vmbus); 2153 bool (*complete)(VMBus *vmbus); 2154 } state_runner[] = { 2155 [VMBUS_LISTEN] = {process_message, NULL}, 2156 [VMBUS_HANDSHAKE] = {send_handshake, NULL}, 2157 [VMBUS_OFFER] = {send_offer, complete_offer}, 2158 [VMBUS_CREATE_GPADL] = {send_create_gpadl, complete_create_gpadl}, 2159 [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl}, 2160 [VMBUS_OPEN_CHANNEL] = {send_open_channel, complete_open_channel}, 2161 [VMBUS_UNLOAD] = {send_unload, complete_unload}, 2162 }; 2163 2164 static void vmbus_do_run(VMBus *vmbus) 2165 { 2166 if (vmbus->msg_in_progress) { 2167 return; 2168 } 2169 2170 assert(vmbus->state < VMBUS_STATE_MAX); 2171 assert(state_runner[vmbus->state].run); 2172 state_runner[vmbus->state].run(vmbus); 2173 } 2174 2175 static void vmbus_run(void *opaque) 2176 { 2177 VMBus *vmbus = opaque; 2178 2179 /* make sure no recursion happens (e.g. due to recursive aio_poll()) */ 2180 if (vmbus->in_progress) { 2181 return; 2182 } 2183 2184 vmbus->in_progress = true; 2185 /* 2186 * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it 2187 * should go *after* the code that can result in aio_poll; otherwise 2188 * reschedules can be missed. No idea how to enforce that. 2189 */ 2190 vmbus_do_run(vmbus); 2191 vmbus->in_progress = false; 2192 } 2193 2194 static void vmbus_msg_cb(void *data, int status) 2195 { 2196 VMBus *vmbus = data; 2197 bool (*complete)(VMBus *vmbus); 2198 2199 assert(vmbus->msg_in_progress); 2200 2201 trace_vmbus_msg_cb(status); 2202 2203 if (status == -EAGAIN) { 2204 goto out; 2205 } 2206 if (status) { 2207 error_report("message delivery fatal failure: %d; aborting vmbus", 2208 status); 2209 vmbus_reset_all(vmbus); 2210 return; 2211 } 2212 2213 assert(vmbus->state < VMBUS_STATE_MAX); 2214 complete = state_runner[vmbus->state].complete; 2215 if (!complete || complete(vmbus)) { 2216 vmbus->state = VMBUS_LISTEN; 2217 } 2218 out: 2219 vmbus->msg_in_progress = false; 2220 vmbus_resched(vmbus); 2221 } 2222 2223 static void vmbus_resched(VMBus *vmbus) 2224 { 2225 aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus); 2226 } 2227 2228 static void vmbus_signal_event(EventNotifier *e) 2229 { 2230 VMBusChannel *chan; 2231 VMBus *vmbus = container_of(e, VMBus, notifier); 2232 unsigned long *int_map; 2233 hwaddr addr, len; 2234 bool is_dirty = false; 2235 2236 if (!event_notifier_test_and_clear(e)) { 2237 return; 2238 } 2239 2240 trace_vmbus_signal_event(); 2241 2242 if (!vmbus->int_page_gpa) { 2243 return; 2244 } 2245 2246 addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2; 2247 len = TARGET_PAGE_SIZE / 2; 2248 int_map = cpu_physical_memory_map(addr, &len, 1); 2249 if (len != TARGET_PAGE_SIZE / 2) { 2250 goto unmap; 2251 } 2252 2253 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2254 if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) { 2255 if (!vmbus_channel_is_open(chan)) { 2256 continue; 2257 } 2258 vmbus_channel_notify_host(chan); 2259 is_dirty = true; 2260 } 2261 } 2262 2263 unmap: 2264 cpu_physical_memory_unmap(int_map, len, 1, is_dirty); 2265 } 2266 2267 static void vmbus_dev_realize(DeviceState *dev, Error **errp) 2268 { 2269 VMBusDevice *vdev = VMBUS_DEVICE(dev); 2270 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2271 VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev)); 2272 BusChild *child; 2273 Error *err = NULL; 2274 char idstr[UUID_FMT_LEN + 1]; 2275 2276 assert(!qemu_uuid_is_null(&vdev->instanceid)); 2277 2278 /* Check for instance id collision for this class id */ 2279 QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) { 2280 VMBusDevice *child_dev = VMBUS_DEVICE(child->child); 2281 2282 if (child_dev == vdev) { 2283 continue; 2284 } 2285 2286 if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) { 2287 qemu_uuid_unparse(&vdev->instanceid, idstr); 2288 error_setg(&err, "duplicate vmbus device instance id %s", idstr); 2289 goto error_out; 2290 } 2291 } 2292 2293 vdev->dma_as = &address_space_memory; 2294 2295 create_channels(vmbus, vdev, &err); 2296 if (err) { 2297 goto error_out; 2298 } 2299 2300 if (vdc->vmdev_realize) { 2301 vdc->vmdev_realize(vdev, &err); 2302 if (err) { 2303 goto err_vdc_realize; 2304 } 2305 } 2306 return; 2307 2308 err_vdc_realize: 2309 free_channels(vdev); 2310 error_out: 2311 error_propagate(errp, err); 2312 } 2313 2314 static void vmbus_dev_reset(DeviceState *dev) 2315 { 2316 uint16_t i; 2317 VMBusDevice *vdev = VMBUS_DEVICE(dev); 2318 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2319 2320 if (vdev->channels) { 2321 for (i = 0; i < vdev->num_channels; i++) { 2322 VMBusChannel *chan = &vdev->channels[i]; 2323 close_channel(chan); 2324 chan->state = VMCHAN_INIT; 2325 } 2326 } 2327 2328 if (vdc->vmdev_reset) { 2329 vdc->vmdev_reset(vdev); 2330 } 2331 } 2332 2333 static void vmbus_dev_unrealize(DeviceState *dev) 2334 { 2335 VMBusDevice *vdev = VMBUS_DEVICE(dev); 2336 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2337 2338 if (vdc->vmdev_unrealize) { 2339 vdc->vmdev_unrealize(vdev); 2340 } 2341 free_channels(vdev); 2342 } 2343 2344 static void vmbus_dev_class_init(ObjectClass *klass, void *data) 2345 { 2346 DeviceClass *kdev = DEVICE_CLASS(klass); 2347 kdev->bus_type = TYPE_VMBUS; 2348 kdev->realize = vmbus_dev_realize; 2349 kdev->unrealize = vmbus_dev_unrealize; 2350 kdev->reset = vmbus_dev_reset; 2351 } 2352 2353 static Property vmbus_dev_instanceid = 2354 DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid); 2355 2356 static void vmbus_dev_instance_init(Object *obj) 2357 { 2358 VMBusDevice *vdev = VMBUS_DEVICE(obj); 2359 VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2360 2361 if (!qemu_uuid_is_null(&vdc->instanceid)) { 2362 /* Class wants to only have a single instance with a fixed UUID */ 2363 vdev->instanceid = vdc->instanceid; 2364 } else { 2365 qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid); 2366 } 2367 } 2368 2369 const VMStateDescription vmstate_vmbus_dev = { 2370 .name = TYPE_VMBUS_DEVICE, 2371 .version_id = 0, 2372 .minimum_version_id = 0, 2373 .fields = (VMStateField[]) { 2374 VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16), 2375 VMSTATE_UINT16(num_channels, VMBusDevice), 2376 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice, 2377 num_channels, vmstate_channel, 2378 VMBusChannel), 2379 VMSTATE_END_OF_LIST() 2380 } 2381 }; 2382 2383 /* vmbus generic device base */ 2384 static const TypeInfo vmbus_dev_type_info = { 2385 .name = TYPE_VMBUS_DEVICE, 2386 .parent = TYPE_DEVICE, 2387 .abstract = true, 2388 .instance_size = sizeof(VMBusDevice), 2389 .class_size = sizeof(VMBusDeviceClass), 2390 .class_init = vmbus_dev_class_init, 2391 .instance_init = vmbus_dev_instance_init, 2392 }; 2393 2394 static void vmbus_realize(BusState *bus, Error **errp) 2395 { 2396 int ret = 0; 2397 Error *local_err = NULL; 2398 VMBus *vmbus = VMBUS(bus); 2399 2400 qemu_mutex_init(&vmbus->rx_queue_lock); 2401 2402 QTAILQ_INIT(&vmbus->gpadl_list); 2403 QTAILQ_INIT(&vmbus->channel_list); 2404 2405 ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, 2406 vmbus_recv_message, vmbus); 2407 if (ret != 0) { 2408 error_setg(&local_err, "hyperv set message handler failed: %d", ret); 2409 goto error_out; 2410 } 2411 2412 ret = event_notifier_init(&vmbus->notifier, 0); 2413 if (ret != 0) { 2414 error_setg(&local_err, "event notifier failed to init with %d", ret); 2415 goto remove_msg_handler; 2416 } 2417 2418 event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event); 2419 ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, 2420 &vmbus->notifier); 2421 if (ret != 0) { 2422 error_setg(&local_err, "hyperv set event handler failed with %d", ret); 2423 goto clear_event_notifier; 2424 } 2425 2426 return; 2427 2428 clear_event_notifier: 2429 event_notifier_cleanup(&vmbus->notifier); 2430 remove_msg_handler: 2431 hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); 2432 error_out: 2433 qemu_mutex_destroy(&vmbus->rx_queue_lock); 2434 error_propagate(errp, local_err); 2435 } 2436 2437 static void vmbus_unrealize(BusState *bus) 2438 { 2439 VMBus *vmbus = VMBUS(bus); 2440 2441 hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); 2442 hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL); 2443 event_notifier_cleanup(&vmbus->notifier); 2444 2445 qemu_mutex_destroy(&vmbus->rx_queue_lock); 2446 } 2447 2448 static void vmbus_reset(BusState *bus) 2449 { 2450 vmbus_deinit(VMBUS(bus)); 2451 } 2452 2453 static char *vmbus_get_dev_path(DeviceState *dev) 2454 { 2455 BusState *bus = qdev_get_parent_bus(dev); 2456 return qdev_get_dev_path(bus->parent); 2457 } 2458 2459 static char *vmbus_get_fw_dev_path(DeviceState *dev) 2460 { 2461 VMBusDevice *vdev = VMBUS_DEVICE(dev); 2462 char uuid[UUID_FMT_LEN + 1]; 2463 2464 qemu_uuid_unparse(&vdev->instanceid, uuid); 2465 return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid); 2466 } 2467 2468 static void vmbus_class_init(ObjectClass *klass, void *data) 2469 { 2470 BusClass *k = BUS_CLASS(klass); 2471 2472 k->get_dev_path = vmbus_get_dev_path; 2473 k->get_fw_dev_path = vmbus_get_fw_dev_path; 2474 k->realize = vmbus_realize; 2475 k->unrealize = vmbus_unrealize; 2476 k->reset = vmbus_reset; 2477 } 2478 2479 static int vmbus_pre_load(void *opaque) 2480 { 2481 VMBusChannel *chan; 2482 VMBus *vmbus = VMBUS(opaque); 2483 2484 /* 2485 * channel IDs allocated by the source will come in the migration stream 2486 * for each channel, so clean up the ones allocated at realize 2487 */ 2488 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2489 unregister_chan_id(chan); 2490 } 2491 2492 return 0; 2493 } 2494 static int vmbus_post_load(void *opaque, int version_id) 2495 { 2496 int ret; 2497 VMBus *vmbus = VMBUS(opaque); 2498 VMBusGpadl *gpadl; 2499 VMBusChannel *chan; 2500 2501 ret = vmbus_init(vmbus); 2502 if (ret) { 2503 return ret; 2504 } 2505 2506 QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 2507 gpadl->vmbus = vmbus; 2508 gpadl->refcount = 1; 2509 } 2510 2511 /* 2512 * reopening channels depends on initialized vmbus so it's done here 2513 * instead of channel_post_load() 2514 */ 2515 QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2516 2517 if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) { 2518 open_channel(chan); 2519 } 2520 2521 if (chan->state != VMCHAN_OPEN) { 2522 continue; 2523 } 2524 2525 if (!vmbus_channel_is_open(chan)) { 2526 /* reopen failed, abort loading */ 2527 return -1; 2528 } 2529 2530 /* resume processing on the guest side if it missed the notification */ 2531 hyperv_sint_route_set_sint(chan->notify_route); 2532 /* ditto on the host side */ 2533 vmbus_channel_notify_host(chan); 2534 } 2535 2536 vmbus_resched(vmbus); 2537 return 0; 2538 } 2539 2540 static const VMStateDescription vmstate_post_message_input = { 2541 .name = "vmbus/hyperv_post_message_input", 2542 .version_id = 0, 2543 .minimum_version_id = 0, 2544 .fields = (VMStateField[]) { 2545 /* 2546 * skip connection_id and message_type as they are validated before 2547 * queueing and ignored on dequeueing 2548 */ 2549 VMSTATE_UINT32(payload_size, struct hyperv_post_message_input), 2550 VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input, 2551 HV_MESSAGE_PAYLOAD_SIZE), 2552 VMSTATE_END_OF_LIST() 2553 } 2554 }; 2555 2556 static bool vmbus_rx_queue_needed(void *opaque) 2557 { 2558 VMBus *vmbus = VMBUS(opaque); 2559 return vmbus->rx_queue_size; 2560 } 2561 2562 static const VMStateDescription vmstate_rx_queue = { 2563 .name = "vmbus/rx_queue", 2564 .version_id = 0, 2565 .minimum_version_id = 0, 2566 .needed = vmbus_rx_queue_needed, 2567 .fields = (VMStateField[]) { 2568 VMSTATE_UINT8(rx_queue_head, VMBus), 2569 VMSTATE_UINT8(rx_queue_size, VMBus), 2570 VMSTATE_STRUCT_ARRAY(rx_queue, VMBus, 2571 HV_MSG_QUEUE_LEN, 0, 2572 vmstate_post_message_input, 2573 struct hyperv_post_message_input), 2574 VMSTATE_END_OF_LIST() 2575 } 2576 }; 2577 2578 static const VMStateDescription vmstate_vmbus = { 2579 .name = TYPE_VMBUS, 2580 .version_id = 0, 2581 .minimum_version_id = 0, 2582 .pre_load = vmbus_pre_load, 2583 .post_load = vmbus_post_load, 2584 .fields = (VMStateField[]) { 2585 VMSTATE_UINT8(state, VMBus), 2586 VMSTATE_UINT32(version, VMBus), 2587 VMSTATE_UINT32(target_vp, VMBus), 2588 VMSTATE_UINT64(int_page_gpa, VMBus), 2589 VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0, 2590 vmstate_gpadl, VMBusGpadl, link), 2591 VMSTATE_END_OF_LIST() 2592 }, 2593 .subsections = (const VMStateDescription * []) { 2594 &vmstate_rx_queue, 2595 NULL 2596 } 2597 }; 2598 2599 static const TypeInfo vmbus_type_info = { 2600 .name = TYPE_VMBUS, 2601 .parent = TYPE_BUS, 2602 .instance_size = sizeof(VMBus), 2603 .class_init = vmbus_class_init, 2604 }; 2605 2606 static void vmbus_bridge_realize(DeviceState *dev, Error **errp) 2607 { 2608 VMBusBridge *bridge = VMBUS_BRIDGE(dev); 2609 2610 /* 2611 * here there's at least one vmbus bridge that is being realized, so 2612 * vmbus_bridge_find can only return NULL if it's not unique 2613 */ 2614 if (!vmbus_bridge_find()) { 2615 error_setg(errp, "there can be at most one %s in the system", 2616 TYPE_VMBUS_BRIDGE); 2617 return; 2618 } 2619 2620 if (!hyperv_is_synic_enabled()) { 2621 error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX"); 2622 return; 2623 } 2624 2625 bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus")); 2626 } 2627 2628 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev) 2629 { 2630 /* there can be only one VMBus */ 2631 return g_strdup("0"); 2632 } 2633 2634 static const VMStateDescription vmstate_vmbus_bridge = { 2635 .name = TYPE_VMBUS_BRIDGE, 2636 .version_id = 0, 2637 .minimum_version_id = 0, 2638 .fields = (VMStateField[]) { 2639 VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus), 2640 VMSTATE_END_OF_LIST() 2641 }, 2642 }; 2643 2644 static void vmbus_bridge_class_init(ObjectClass *klass, void *data) 2645 { 2646 DeviceClass *k = DEVICE_CLASS(klass); 2647 SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass); 2648 2649 k->realize = vmbus_bridge_realize; 2650 k->fw_name = "vmbus"; 2651 sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address; 2652 set_bit(DEVICE_CATEGORY_BRIDGE, k->categories); 2653 k->vmsd = &vmstate_vmbus_bridge; 2654 /* override SysBusDevice's default */ 2655 k->user_creatable = true; 2656 } 2657 2658 static const TypeInfo vmbus_bridge_type_info = { 2659 .name = TYPE_VMBUS_BRIDGE, 2660 .parent = TYPE_SYS_BUS_DEVICE, 2661 .instance_size = sizeof(VMBusBridge), 2662 .class_init = vmbus_bridge_class_init, 2663 }; 2664 2665 static void vmbus_register_types(void) 2666 { 2667 type_register_static(&vmbus_bridge_type_info); 2668 type_register_static(&vmbus_dev_type_info); 2669 type_register_static(&vmbus_type_info); 2670 } 2671 2672 type_init(vmbus_register_types) 2673