1 /* 2 * Hyper-V guest/hypervisor interaction 3 * 4 * Copyright (c) 2015-2018 Virtuozzo International GmbH. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/module.h" 13 #include "qapi/error.h" 14 #include "exec/address-spaces.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/bitops.h" 17 #include "qemu/error-report.h" 18 #include "qemu/lockable.h" 19 #include "qemu/queue.h" 20 #include "qemu/rcu.h" 21 #include "qemu/rcu_queue.h" 22 #include "hw/hyperv/hyperv.h" 23 #include "qom/object.h" 24 25 struct SynICState { 26 DeviceState parent_obj; 27 28 CPUState *cs; 29 30 bool enabled; 31 hwaddr msg_page_addr; 32 hwaddr event_page_addr; 33 MemoryRegion msg_page_mr; 34 MemoryRegion event_page_mr; 35 struct hyperv_message_page *msg_page; 36 struct hyperv_event_flags_page *event_page; 37 }; 38 typedef struct SynICState SynICState; 39 40 #define TYPE_SYNIC "hyperv-synic" 41 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) 42 43 static bool synic_enabled; 44 45 bool hyperv_is_synic_enabled(void) 46 { 47 return synic_enabled; 48 } 49 50 static SynICState *get_synic(CPUState *cs) 51 { 52 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); 53 } 54 55 static void synic_update(SynICState *synic, bool enable, 56 hwaddr msg_page_addr, hwaddr event_page_addr) 57 { 58 59 synic->enabled = enable; 60 if (synic->msg_page_addr != msg_page_addr) { 61 if (synic->msg_page_addr) { 62 memory_region_del_subregion(get_system_memory(), 63 &synic->msg_page_mr); 64 } 65 if (msg_page_addr) { 66 memory_region_add_subregion(get_system_memory(), msg_page_addr, 67 &synic->msg_page_mr); 68 } 69 synic->msg_page_addr = msg_page_addr; 70 } 71 if (synic->event_page_addr != event_page_addr) { 72 if (synic->event_page_addr) { 73 memory_region_del_subregion(get_system_memory(), 74 &synic->event_page_mr); 75 } 76 if (event_page_addr) { 77 memory_region_add_subregion(get_system_memory(), event_page_addr, 78 &synic->event_page_mr); 79 } 80 synic->event_page_addr = event_page_addr; 81 } 82 } 83 84 void hyperv_synic_update(CPUState *cs, bool enable, 85 hwaddr msg_page_addr, hwaddr event_page_addr) 86 { 87 SynICState *synic = get_synic(cs); 88 89 if (!synic) { 90 return; 91 } 92 93 synic_update(synic, enable, msg_page_addr, event_page_addr); 94 } 95 96 static void synic_realize(DeviceState *dev, Error **errp) 97 { 98 Object *obj = OBJECT(dev); 99 SynICState *synic = SYNIC(dev); 100 char *msgp_name, *eventp_name; 101 uint32_t vp_index; 102 103 /* memory region names have to be globally unique */ 104 vp_index = hyperv_vp_index(synic->cs); 105 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); 106 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); 107 108 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, 109 sizeof(*synic->msg_page), &error_abort); 110 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, 111 sizeof(*synic->event_page), &error_abort); 112 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); 113 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); 114 115 g_free(msgp_name); 116 g_free(eventp_name); 117 } 118 static void synic_reset(DeviceState *dev) 119 { 120 SynICState *synic = SYNIC(dev); 121 memset(synic->msg_page, 0, sizeof(*synic->msg_page)); 122 memset(synic->event_page, 0, sizeof(*synic->event_page)); 123 synic_update(synic, false, 0, 0); 124 } 125 126 static void synic_class_init(ObjectClass *klass, void *data) 127 { 128 DeviceClass *dc = DEVICE_CLASS(klass); 129 130 dc->realize = synic_realize; 131 dc->reset = synic_reset; 132 dc->user_creatable = false; 133 } 134 135 void hyperv_synic_add(CPUState *cs) 136 { 137 Object *obj; 138 SynICState *synic; 139 140 obj = object_new(TYPE_SYNIC); 141 synic = SYNIC(obj); 142 synic->cs = cs; 143 object_property_add_child(OBJECT(cs), "synic", obj); 144 object_unref(obj); 145 qdev_realize(DEVICE(obj), NULL, &error_abort); 146 synic_enabled = true; 147 } 148 149 void hyperv_synic_reset(CPUState *cs) 150 { 151 SynICState *synic = get_synic(cs); 152 153 if (synic) { 154 device_legacy_reset(DEVICE(synic)); 155 } 156 } 157 158 static const TypeInfo synic_type_info = { 159 .name = TYPE_SYNIC, 160 .parent = TYPE_DEVICE, 161 .instance_size = sizeof(SynICState), 162 .class_init = synic_class_init, 163 }; 164 165 static void synic_register_types(void) 166 { 167 type_register_static(&synic_type_info); 168 } 169 170 type_init(synic_register_types) 171 172 /* 173 * KVM has its own message producers (SynIC timers). To guarantee 174 * serialization with both KVM vcpu and the guest cpu, the messages are first 175 * staged in an intermediate area and then posted to the SynIC message page in 176 * the vcpu thread. 177 */ 178 typedef struct HvSintStagedMessage { 179 /* message content staged by hyperv_post_msg */ 180 struct hyperv_message msg; 181 /* callback + data (r/o) to complete the processing in a BH */ 182 HvSintMsgCb cb; 183 void *cb_data; 184 /* message posting status filled by cpu_post_msg */ 185 int status; 186 /* passing the buck: */ 187 enum { 188 /* initial state */ 189 HV_STAGED_MSG_FREE, 190 /* 191 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> 192 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu 193 */ 194 HV_STAGED_MSG_BUSY, 195 /* 196 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, 197 * notify the guest, records the status, marks the posting done (BUSY 198 * -> POSTED), and schedules sint_msg_bh BH 199 */ 200 HV_STAGED_MSG_POSTED, 201 /* 202 * sint_msg_bh (BH) verifies that the posting is done, runs the 203 * callback, and starts over (POSTED -> FREE) 204 */ 205 } state; 206 } HvSintStagedMessage; 207 208 struct HvSintRoute { 209 uint32_t sint; 210 SynICState *synic; 211 int gsi; 212 EventNotifier sint_set_notifier; 213 EventNotifier sint_ack_notifier; 214 215 HvSintStagedMessage *staged_msg; 216 217 unsigned refcount; 218 }; 219 220 static CPUState *hyperv_find_vcpu(uint32_t vp_index) 221 { 222 CPUState *cs = qemu_get_cpu(vp_index); 223 assert(hyperv_vp_index(cs) == vp_index); 224 return cs; 225 } 226 227 /* 228 * BH to complete the processing of a staged message. 229 */ 230 static void sint_msg_bh(void *opaque) 231 { 232 HvSintRoute *sint_route = opaque; 233 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 234 235 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { 236 /* status nor ready yet (spurious ack from guest?), ignore */ 237 return; 238 } 239 240 staged_msg->cb(staged_msg->cb_data, staged_msg->status); 241 staged_msg->status = 0; 242 243 /* staged message processing finished, ready to start over */ 244 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); 245 /* drop the reference taken in hyperv_post_msg */ 246 hyperv_sint_route_unref(sint_route); 247 } 248 249 /* 250 * Worker to transfer the message from the staging area into the SynIC message 251 * page in vcpu context. 252 */ 253 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) 254 { 255 HvSintRoute *sint_route = data.host_ptr; 256 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 257 SynICState *synic = sint_route->synic; 258 struct hyperv_message *dst_msg; 259 bool wait_for_sint_ack = false; 260 261 assert(staged_msg->state == HV_STAGED_MSG_BUSY); 262 263 if (!synic->enabled || !synic->msg_page_addr) { 264 staged_msg->status = -ENXIO; 265 goto posted; 266 } 267 268 dst_msg = &synic->msg_page->slot[sint_route->sint]; 269 270 if (dst_msg->header.message_type != HV_MESSAGE_NONE) { 271 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; 272 staged_msg->status = -EAGAIN; 273 wait_for_sint_ack = true; 274 } else { 275 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); 276 staged_msg->status = hyperv_sint_route_set_sint(sint_route); 277 } 278 279 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); 280 281 posted: 282 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); 283 /* 284 * Notify the msg originator of the progress made; if the slot was busy we 285 * set msg_pending flag in it so it will be the guest who will do EOM and 286 * trigger the notification from KVM via sint_ack_notifier 287 */ 288 if (!wait_for_sint_ack) { 289 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, 290 sint_route); 291 } 292 } 293 294 /* 295 * Post a Hyper-V message to the staging area, for delivery to guest in the 296 * vcpu thread. 297 */ 298 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) 299 { 300 HvSintStagedMessage *staged_msg = sint_route->staged_msg; 301 302 assert(staged_msg); 303 304 /* grab the staging area */ 305 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, 306 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { 307 return -EAGAIN; 308 } 309 310 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); 311 312 /* hold a reference on sint_route until the callback is finished */ 313 hyperv_sint_route_ref(sint_route); 314 315 /* schedule message posting attempt in vcpu thread */ 316 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, 317 RUN_ON_CPU_HOST_PTR(sint_route)); 318 return 0; 319 } 320 321 static void sint_ack_handler(EventNotifier *notifier) 322 { 323 HvSintRoute *sint_route = container_of(notifier, HvSintRoute, 324 sint_ack_notifier); 325 event_notifier_test_and_clear(notifier); 326 327 /* 328 * the guest consumed the previous message so complete the current one with 329 * -EAGAIN and let the msg originator retry 330 */ 331 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); 332 } 333 334 /* 335 * Set given event flag for a given sint on a given vcpu, and signal the sint. 336 */ 337 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) 338 { 339 int ret; 340 SynICState *synic = sint_route->synic; 341 unsigned long *flags, set_mask; 342 unsigned set_idx; 343 344 if (eventno > HV_EVENT_FLAGS_COUNT) { 345 return -EINVAL; 346 } 347 if (!synic->enabled || !synic->event_page_addr) { 348 return -ENXIO; 349 } 350 351 set_idx = BIT_WORD(eventno); 352 set_mask = BIT_MASK(eventno); 353 flags = synic->event_page->slot[sint_route->sint].flags; 354 355 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { 356 memory_region_set_dirty(&synic->event_page_mr, 0, 357 sizeof(*synic->event_page)); 358 ret = hyperv_sint_route_set_sint(sint_route); 359 } else { 360 ret = 0; 361 } 362 return ret; 363 } 364 365 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, 366 HvSintMsgCb cb, void *cb_data) 367 { 368 HvSintRoute *sint_route; 369 EventNotifier *ack_notifier; 370 int r, gsi; 371 CPUState *cs; 372 SynICState *synic; 373 374 cs = hyperv_find_vcpu(vp_index); 375 if (!cs) { 376 return NULL; 377 } 378 379 synic = get_synic(cs); 380 if (!synic) { 381 return NULL; 382 } 383 384 sint_route = g_new0(HvSintRoute, 1); 385 r = event_notifier_init(&sint_route->sint_set_notifier, false); 386 if (r) { 387 goto err; 388 } 389 390 391 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; 392 if (ack_notifier) { 393 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); 394 sint_route->staged_msg->cb = cb; 395 sint_route->staged_msg->cb_data = cb_data; 396 397 r = event_notifier_init(ack_notifier, false); 398 if (r) { 399 goto err_sint_set_notifier; 400 } 401 402 event_notifier_set_handler(ack_notifier, sint_ack_handler); 403 } 404 405 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); 406 if (gsi < 0) { 407 goto err_gsi; 408 } 409 410 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, 411 &sint_route->sint_set_notifier, 412 ack_notifier, gsi); 413 if (r) { 414 goto err_irqfd; 415 } 416 sint_route->gsi = gsi; 417 sint_route->synic = synic; 418 sint_route->sint = sint; 419 sint_route->refcount = 1; 420 421 return sint_route; 422 423 err_irqfd: 424 kvm_irqchip_release_virq(kvm_state, gsi); 425 err_gsi: 426 if (ack_notifier) { 427 event_notifier_set_handler(ack_notifier, NULL); 428 event_notifier_cleanup(ack_notifier); 429 g_free(sint_route->staged_msg); 430 } 431 err_sint_set_notifier: 432 event_notifier_cleanup(&sint_route->sint_set_notifier); 433 err: 434 g_free(sint_route); 435 436 return NULL; 437 } 438 439 void hyperv_sint_route_ref(HvSintRoute *sint_route) 440 { 441 sint_route->refcount++; 442 } 443 444 void hyperv_sint_route_unref(HvSintRoute *sint_route) 445 { 446 if (!sint_route) { 447 return; 448 } 449 450 assert(sint_route->refcount > 0); 451 452 if (--sint_route->refcount) { 453 return; 454 } 455 456 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, 457 &sint_route->sint_set_notifier, 458 sint_route->gsi); 459 kvm_irqchip_release_virq(kvm_state, sint_route->gsi); 460 if (sint_route->staged_msg) { 461 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); 462 event_notifier_cleanup(&sint_route->sint_ack_notifier); 463 g_free(sint_route->staged_msg); 464 } 465 event_notifier_cleanup(&sint_route->sint_set_notifier); 466 g_free(sint_route); 467 } 468 469 int hyperv_sint_route_set_sint(HvSintRoute *sint_route) 470 { 471 return event_notifier_set(&sint_route->sint_set_notifier); 472 } 473 474 typedef struct MsgHandler { 475 struct rcu_head rcu; 476 QLIST_ENTRY(MsgHandler) link; 477 uint32_t conn_id; 478 HvMsgHandler handler; 479 void *data; 480 } MsgHandler; 481 482 typedef struct EventFlagHandler { 483 struct rcu_head rcu; 484 QLIST_ENTRY(EventFlagHandler) link; 485 uint32_t conn_id; 486 EventNotifier *notifier; 487 } EventFlagHandler; 488 489 static QLIST_HEAD(, MsgHandler) msg_handlers; 490 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; 491 static QemuMutex handlers_mutex; 492 493 static void __attribute__((constructor)) hv_init(void) 494 { 495 QLIST_INIT(&msg_handlers); 496 QLIST_INIT(&event_flag_handlers); 497 qemu_mutex_init(&handlers_mutex); 498 } 499 500 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) 501 { 502 int ret; 503 MsgHandler *mh; 504 505 QEMU_LOCK_GUARD(&handlers_mutex); 506 QLIST_FOREACH(mh, &msg_handlers, link) { 507 if (mh->conn_id == conn_id) { 508 if (handler) { 509 ret = -EEXIST; 510 } else { 511 QLIST_REMOVE_RCU(mh, link); 512 g_free_rcu(mh, rcu); 513 ret = 0; 514 } 515 return ret; 516 } 517 } 518 519 if (handler) { 520 mh = g_new(MsgHandler, 1); 521 mh->conn_id = conn_id; 522 mh->handler = handler; 523 mh->data = data; 524 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); 525 ret = 0; 526 } else { 527 ret = -ENOENT; 528 } 529 530 return ret; 531 } 532 533 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) 534 { 535 uint16_t ret; 536 hwaddr len; 537 struct hyperv_post_message_input *msg; 538 MsgHandler *mh; 539 540 if (fast) { 541 return HV_STATUS_INVALID_HYPERCALL_CODE; 542 } 543 if (param & (__alignof__(*msg) - 1)) { 544 return HV_STATUS_INVALID_ALIGNMENT; 545 } 546 547 len = sizeof(*msg); 548 msg = cpu_physical_memory_map(param, &len, 0); 549 if (len < sizeof(*msg)) { 550 ret = HV_STATUS_INSUFFICIENT_MEMORY; 551 goto unmap; 552 } 553 if (msg->payload_size > sizeof(msg->payload)) { 554 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 555 goto unmap; 556 } 557 558 ret = HV_STATUS_INVALID_CONNECTION_ID; 559 WITH_RCU_READ_LOCK_GUARD() { 560 QLIST_FOREACH_RCU(mh, &msg_handlers, link) { 561 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { 562 ret = mh->handler(msg, mh->data); 563 break; 564 } 565 } 566 } 567 568 unmap: 569 cpu_physical_memory_unmap(msg, len, 0, 0); 570 return ret; 571 } 572 573 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 574 { 575 int ret; 576 EventFlagHandler *handler; 577 578 QEMU_LOCK_GUARD(&handlers_mutex); 579 QLIST_FOREACH(handler, &event_flag_handlers, link) { 580 if (handler->conn_id == conn_id) { 581 if (notifier) { 582 ret = -EEXIST; 583 } else { 584 QLIST_REMOVE_RCU(handler, link); 585 g_free_rcu(handler, rcu); 586 ret = 0; 587 } 588 return ret; 589 } 590 } 591 592 if (notifier) { 593 handler = g_new(EventFlagHandler, 1); 594 handler->conn_id = conn_id; 595 handler->notifier = notifier; 596 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); 597 ret = 0; 598 } else { 599 ret = -ENOENT; 600 } 601 602 return ret; 603 } 604 605 static bool process_event_flags_userspace; 606 607 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) 608 { 609 if (!process_event_flags_userspace && 610 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { 611 process_event_flags_userspace = true; 612 613 warn_report("Hyper-V event signaling is not supported by this kernel; " 614 "using slower userspace hypercall processing"); 615 } 616 617 if (!process_event_flags_userspace) { 618 struct kvm_hyperv_eventfd hvevfd = { 619 .conn_id = conn_id, 620 .fd = notifier ? event_notifier_get_fd(notifier) : -1, 621 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, 622 }; 623 624 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); 625 } 626 return set_event_flag_handler(conn_id, notifier); 627 } 628 629 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) 630 { 631 EventFlagHandler *handler; 632 633 if (unlikely(!fast)) { 634 hwaddr addr = param; 635 636 if (addr & (__alignof__(addr) - 1)) { 637 return HV_STATUS_INVALID_ALIGNMENT; 638 } 639 640 param = ldq_phys(&address_space_memory, addr); 641 } 642 643 /* 644 * Per spec, bits 32-47 contain the extra "flag number". However, we 645 * have no use for it, and in all known usecases it is zero, so just 646 * report lookup failure if it isn't. 647 */ 648 if (param & 0xffff00000000ULL) { 649 return HV_STATUS_INVALID_PORT_ID; 650 } 651 /* remaining bits are reserved-zero */ 652 if (param & ~HV_CONNECTION_ID_MASK) { 653 return HV_STATUS_INVALID_HYPERCALL_INPUT; 654 } 655 656 RCU_READ_LOCK_GUARD(); 657 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { 658 if (handler->conn_id == param) { 659 event_notifier_set(handler->notifier); 660 return 0; 661 } 662 } 663 return HV_STATUS_INVALID_CONNECTION_ID; 664 } 665