xref: /qemu/hw/hyperv/hyperv.c (revision ccbdf5e81b502b238748ab64366bba5bf4c056d3)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
20 #include "qemu/rcu.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23 #include "qom/object.h"
24 
25 struct SynICState {
26     DeviceState parent_obj;
27 
28     CPUState *cs;
29 
30     bool sctl_enabled;
31     hwaddr msg_page_addr;
32     hwaddr event_page_addr;
33     MemoryRegion msg_page_mr;
34     MemoryRegion event_page_mr;
35     struct hyperv_message_page *msg_page;
36     struct hyperv_event_flags_page *event_page;
37 
38     QemuMutex sint_routes_mutex;
39     QLIST_HEAD(, HvSintRoute) sint_routes;
40 };
41 
42 #define TYPE_SYNIC "hyperv-synic"
43 OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC)
44 
45 static bool synic_enabled;
46 
47 bool hyperv_is_synic_enabled(void)
48 {
49     return synic_enabled;
50 }
51 
52 static SynICState *get_synic(CPUState *cs)
53 {
54     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
55 }
56 
57 static void synic_update(SynICState *synic, bool sctl_enable,
58                          hwaddr msg_page_addr, hwaddr event_page_addr)
59 {
60 
61     synic->sctl_enabled = sctl_enable;
62     if (synic->msg_page_addr != msg_page_addr) {
63         if (synic->msg_page_addr) {
64             memory_region_del_subregion(get_system_memory(),
65                                         &synic->msg_page_mr);
66         }
67         if (msg_page_addr) {
68             memory_region_add_subregion(get_system_memory(), msg_page_addr,
69                                         &synic->msg_page_mr);
70         }
71         synic->msg_page_addr = msg_page_addr;
72     }
73     if (synic->event_page_addr != event_page_addr) {
74         if (synic->event_page_addr) {
75             memory_region_del_subregion(get_system_memory(),
76                                         &synic->event_page_mr);
77         }
78         if (event_page_addr) {
79             memory_region_add_subregion(get_system_memory(), event_page_addr,
80                                         &synic->event_page_mr);
81         }
82         synic->event_page_addr = event_page_addr;
83     }
84 }
85 
86 void hyperv_synic_update(CPUState *cs, bool sctl_enable,
87                          hwaddr msg_page_addr, hwaddr event_page_addr)
88 {
89     SynICState *synic = get_synic(cs);
90 
91     if (!synic) {
92         return;
93     }
94 
95     synic_update(synic, sctl_enable, msg_page_addr, event_page_addr);
96 }
97 
98 static void synic_realize(DeviceState *dev, Error **errp)
99 {
100     Object *obj = OBJECT(dev);
101     SynICState *synic = SYNIC(dev);
102     char *msgp_name, *eventp_name;
103     uint32_t vp_index;
104 
105     /* memory region names have to be globally unique */
106     vp_index = hyperv_vp_index(synic->cs);
107     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
108     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
109 
110     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
111                            sizeof(*synic->msg_page), &error_abort);
112     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
113                            sizeof(*synic->event_page), &error_abort);
114     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
115     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
116     qemu_mutex_init(&synic->sint_routes_mutex);
117     QLIST_INIT(&synic->sint_routes);
118 
119     g_free(msgp_name);
120     g_free(eventp_name);
121 }
122 
123 static void synic_reset(DeviceState *dev)
124 {
125     SynICState *synic = SYNIC(dev);
126     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
127     memset(synic->event_page, 0, sizeof(*synic->event_page));
128     synic_update(synic, false, 0, 0);
129     assert(QLIST_EMPTY(&synic->sint_routes));
130 }
131 
132 static void synic_class_init(ObjectClass *klass, void *data)
133 {
134     DeviceClass *dc = DEVICE_CLASS(klass);
135 
136     dc->realize = synic_realize;
137     dc->reset = synic_reset;
138     dc->user_creatable = false;
139 }
140 
141 void hyperv_synic_add(CPUState *cs)
142 {
143     Object *obj;
144     SynICState *synic;
145 
146     obj = object_new(TYPE_SYNIC);
147     synic = SYNIC(obj);
148     synic->cs = cs;
149     object_property_add_child(OBJECT(cs), "synic", obj);
150     object_unref(obj);
151     qdev_realize(DEVICE(obj), NULL, &error_abort);
152     synic_enabled = true;
153 }
154 
155 void hyperv_synic_reset(CPUState *cs)
156 {
157     SynICState *synic = get_synic(cs);
158 
159     if (synic) {
160         device_legacy_reset(DEVICE(synic));
161     }
162 }
163 
164 static const TypeInfo synic_type_info = {
165     .name = TYPE_SYNIC,
166     .parent = TYPE_DEVICE,
167     .instance_size = sizeof(SynICState),
168     .class_init = synic_class_init,
169 };
170 
171 static void synic_register_types(void)
172 {
173     type_register_static(&synic_type_info);
174 }
175 
176 type_init(synic_register_types)
177 
178 /*
179  * KVM has its own message producers (SynIC timers).  To guarantee
180  * serialization with both KVM vcpu and the guest cpu, the messages are first
181  * staged in an intermediate area and then posted to the SynIC message page in
182  * the vcpu thread.
183  */
184 typedef struct HvSintStagedMessage {
185     /* message content staged by hyperv_post_msg */
186     struct hyperv_message msg;
187     /* callback + data (r/o) to complete the processing in a BH */
188     HvSintMsgCb cb;
189     void *cb_data;
190     /* message posting status filled by cpu_post_msg */
191     int status;
192     /* passing the buck: */
193     enum {
194         /* initial state */
195         HV_STAGED_MSG_FREE,
196         /*
197          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
198          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
199          */
200         HV_STAGED_MSG_BUSY,
201         /*
202          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
203          * notify the guest, records the status, marks the posting done (BUSY
204          * -> POSTED), and schedules sint_msg_bh BH
205          */
206         HV_STAGED_MSG_POSTED,
207         /*
208          * sint_msg_bh (BH) verifies that the posting is done, runs the
209          * callback, and starts over (POSTED -> FREE)
210          */
211     } state;
212 } HvSintStagedMessage;
213 
214 struct HvSintRoute {
215     uint32_t sint;
216     SynICState *synic;
217     int gsi;
218     EventNotifier sint_set_notifier;
219     EventNotifier sint_ack_notifier;
220 
221     HvSintStagedMessage *staged_msg;
222 
223     unsigned refcount;
224     QLIST_ENTRY(HvSintRoute) link;
225 };
226 
227 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
228 {
229     CPUState *cs = qemu_get_cpu(vp_index);
230     assert(hyperv_vp_index(cs) == vp_index);
231     return cs;
232 }
233 
234 /*
235  * BH to complete the processing of a staged message.
236  */
237 static void sint_msg_bh(void *opaque)
238 {
239     HvSintRoute *sint_route = opaque;
240     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
241 
242     if (qatomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
243         /* status nor ready yet (spurious ack from guest?), ignore */
244         return;
245     }
246 
247     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
248     staged_msg->status = 0;
249 
250     /* staged message processing finished, ready to start over */
251     qatomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
252     /* drop the reference taken in hyperv_post_msg */
253     hyperv_sint_route_unref(sint_route);
254 }
255 
256 /*
257  * Worker to transfer the message from the staging area into the SynIC message
258  * page in vcpu context.
259  */
260 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
261 {
262     HvSintRoute *sint_route = data.host_ptr;
263     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
264     SynICState *synic = sint_route->synic;
265     struct hyperv_message *dst_msg;
266     bool wait_for_sint_ack = false;
267 
268     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
269 
270     if (!synic->msg_page_addr) {
271         staged_msg->status = -ENXIO;
272         goto posted;
273     }
274 
275     dst_msg = &synic->msg_page->slot[sint_route->sint];
276 
277     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
278         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
279         staged_msg->status = -EAGAIN;
280         wait_for_sint_ack = true;
281     } else {
282         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
283         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
284     }
285 
286     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
287 
288 posted:
289     qatomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
290     /*
291      * Notify the msg originator of the progress made; if the slot was busy we
292      * set msg_pending flag in it so it will be the guest who will do EOM and
293      * trigger the notification from KVM via sint_ack_notifier
294      */
295     if (!wait_for_sint_ack) {
296         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
297                                 sint_route);
298     }
299 }
300 
301 /*
302  * Post a Hyper-V message to the staging area, for delivery to guest in the
303  * vcpu thread.
304  */
305 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
306 {
307     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
308 
309     assert(staged_msg);
310 
311     /* grab the staging area */
312     if (qatomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
313                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
314         return -EAGAIN;
315     }
316 
317     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
318 
319     /* hold a reference on sint_route until the callback is finished */
320     hyperv_sint_route_ref(sint_route);
321 
322     /* schedule message posting attempt in vcpu thread */
323     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
324                      RUN_ON_CPU_HOST_PTR(sint_route));
325     return 0;
326 }
327 
328 static void sint_ack_handler(EventNotifier *notifier)
329 {
330     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
331                                            sint_ack_notifier);
332     event_notifier_test_and_clear(notifier);
333 
334     /*
335      * the guest consumed the previous message so complete the current one with
336      * -EAGAIN and let the msg originator retry
337      */
338     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
339 }
340 
341 /*
342  * Set given event flag for a given sint on a given vcpu, and signal the sint.
343  */
344 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
345 {
346     int ret;
347     SynICState *synic = sint_route->synic;
348     unsigned long *flags, set_mask;
349     unsigned set_idx;
350 
351     if (eventno > HV_EVENT_FLAGS_COUNT) {
352         return -EINVAL;
353     }
354     if (!synic->sctl_enabled || !synic->event_page_addr) {
355         return -ENXIO;
356     }
357 
358     set_idx = BIT_WORD(eventno);
359     set_mask = BIT_MASK(eventno);
360     flags = synic->event_page->slot[sint_route->sint].flags;
361 
362     if ((qatomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
363         memory_region_set_dirty(&synic->event_page_mr, 0,
364                                 sizeof(*synic->event_page));
365         ret = hyperv_sint_route_set_sint(sint_route);
366     } else {
367         ret = 0;
368     }
369     return ret;
370 }
371 
372 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
373                                    HvSintMsgCb cb, void *cb_data)
374 {
375     HvSintRoute *sint_route = NULL;
376     EventNotifier *ack_notifier = NULL;
377     int r, gsi;
378     CPUState *cs;
379     SynICState *synic;
380     bool ack_event_initialized = false;
381 
382     cs = hyperv_find_vcpu(vp_index);
383     if (!cs) {
384         return NULL;
385     }
386 
387     synic = get_synic(cs);
388     if (!synic) {
389         return NULL;
390     }
391 
392     sint_route = g_new0(HvSintRoute, 1);
393     if (!sint_route) {
394         return NULL;
395     }
396 
397     sint_route->synic = synic;
398     sint_route->sint = sint;
399     sint_route->refcount = 1;
400 
401     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
402     if (ack_notifier) {
403         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
404         if (!sint_route->staged_msg) {
405             goto cleanup_err_sint;
406         }
407         sint_route->staged_msg->cb = cb;
408         sint_route->staged_msg->cb_data = cb_data;
409 
410         r = event_notifier_init(ack_notifier, false);
411         if (r) {
412             goto cleanup_err_sint;
413         }
414         event_notifier_set_handler(ack_notifier, sint_ack_handler);
415         ack_event_initialized = true;
416     }
417 
418     /* See if we are done or we need to setup a GSI for this SintRoute */
419     if (!synic->sctl_enabled) {
420         goto cleanup;
421     }
422 
423     /* We need to setup a GSI for this SintRoute */
424     r = event_notifier_init(&sint_route->sint_set_notifier, false);
425     if (r) {
426         goto cleanup_err_sint;
427     }
428 
429     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
430     if (gsi < 0) {
431         goto cleanup_err_sint_notifier;
432     }
433 
434     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
435                                            &sint_route->sint_set_notifier,
436                                            ack_notifier, gsi);
437     if (r) {
438         goto cleanup_err_irqfd;
439     }
440     sint_route->gsi = gsi;
441 cleanup:
442     qemu_mutex_lock(&synic->sint_routes_mutex);
443     QLIST_INSERT_HEAD(&synic->sint_routes, sint_route, link);
444     qemu_mutex_unlock(&synic->sint_routes_mutex);
445     return sint_route;
446 
447 cleanup_err_irqfd:
448     kvm_irqchip_release_virq(kvm_state, gsi);
449 
450 cleanup_err_sint_notifier:
451     event_notifier_cleanup(&sint_route->sint_set_notifier);
452 
453 cleanup_err_sint:
454     if (ack_notifier) {
455         if (ack_event_initialized) {
456             event_notifier_set_handler(ack_notifier, NULL);
457             event_notifier_cleanup(ack_notifier);
458         }
459 
460         g_free(sint_route->staged_msg);
461     }
462 
463     g_free(sint_route);
464     return NULL;
465 }
466 
467 void hyperv_sint_route_ref(HvSintRoute *sint_route)
468 {
469     sint_route->refcount++;
470 }
471 
472 void hyperv_sint_route_unref(HvSintRoute *sint_route)
473 {
474     SynICState *synic;
475 
476     if (!sint_route) {
477         return;
478     }
479 
480     assert(sint_route->refcount > 0);
481 
482     if (--sint_route->refcount) {
483         return;
484     }
485 
486     synic = sint_route->synic;
487     qemu_mutex_lock(&synic->sint_routes_mutex);
488     QLIST_REMOVE(sint_route, link);
489     qemu_mutex_unlock(&synic->sint_routes_mutex);
490 
491     if (sint_route->gsi) {
492         kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
493                                               &sint_route->sint_set_notifier,
494                                               sint_route->gsi);
495         kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
496         event_notifier_cleanup(&sint_route->sint_set_notifier);
497     }
498 
499     if (sint_route->staged_msg) {
500         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
501         event_notifier_cleanup(&sint_route->sint_ack_notifier);
502         g_free(sint_route->staged_msg);
503     }
504     g_free(sint_route);
505 }
506 
507 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
508 {
509     if (!sint_route->gsi) {
510         return 0;
511     }
512 
513     return event_notifier_set(&sint_route->sint_set_notifier);
514 }
515 
516 typedef struct MsgHandler {
517     struct rcu_head rcu;
518     QLIST_ENTRY(MsgHandler) link;
519     uint32_t conn_id;
520     HvMsgHandler handler;
521     void *data;
522 } MsgHandler;
523 
524 typedef struct EventFlagHandler {
525     struct rcu_head rcu;
526     QLIST_ENTRY(EventFlagHandler) link;
527     uint32_t conn_id;
528     EventNotifier *notifier;
529 } EventFlagHandler;
530 
531 static QLIST_HEAD(, MsgHandler) msg_handlers;
532 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
533 static QemuMutex handlers_mutex;
534 
535 static void __attribute__((constructor)) hv_init(void)
536 {
537     QLIST_INIT(&msg_handlers);
538     QLIST_INIT(&event_flag_handlers);
539     qemu_mutex_init(&handlers_mutex);
540 }
541 
542 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
543 {
544     int ret;
545     MsgHandler *mh;
546 
547     QEMU_LOCK_GUARD(&handlers_mutex);
548     QLIST_FOREACH(mh, &msg_handlers, link) {
549         if (mh->conn_id == conn_id) {
550             if (handler) {
551                 ret = -EEXIST;
552             } else {
553                 QLIST_REMOVE_RCU(mh, link);
554                 g_free_rcu(mh, rcu);
555                 ret = 0;
556             }
557             return ret;
558         }
559     }
560 
561     if (handler) {
562         mh = g_new(MsgHandler, 1);
563         mh->conn_id = conn_id;
564         mh->handler = handler;
565         mh->data = data;
566         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
567         ret = 0;
568     } else {
569         ret = -ENOENT;
570     }
571 
572     return ret;
573 }
574 
575 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
576 {
577     uint16_t ret;
578     hwaddr len;
579     struct hyperv_post_message_input *msg;
580     MsgHandler *mh;
581 
582     if (fast) {
583         return HV_STATUS_INVALID_HYPERCALL_CODE;
584     }
585     if (param & (__alignof__(*msg) - 1)) {
586         return HV_STATUS_INVALID_ALIGNMENT;
587     }
588 
589     len = sizeof(*msg);
590     msg = cpu_physical_memory_map(param, &len, 0);
591     if (len < sizeof(*msg)) {
592         ret = HV_STATUS_INSUFFICIENT_MEMORY;
593         goto unmap;
594     }
595     if (msg->payload_size > sizeof(msg->payload)) {
596         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
597         goto unmap;
598     }
599 
600     ret = HV_STATUS_INVALID_CONNECTION_ID;
601     WITH_RCU_READ_LOCK_GUARD() {
602         QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
603             if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
604                 ret = mh->handler(msg, mh->data);
605                 break;
606             }
607         }
608     }
609 
610 unmap:
611     cpu_physical_memory_unmap(msg, len, 0, 0);
612     return ret;
613 }
614 
615 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
616 {
617     int ret;
618     EventFlagHandler *handler;
619 
620     QEMU_LOCK_GUARD(&handlers_mutex);
621     QLIST_FOREACH(handler, &event_flag_handlers, link) {
622         if (handler->conn_id == conn_id) {
623             if (notifier) {
624                 ret = -EEXIST;
625             } else {
626                 QLIST_REMOVE_RCU(handler, link);
627                 g_free_rcu(handler, rcu);
628                 ret = 0;
629             }
630             return ret;
631         }
632     }
633 
634     if (notifier) {
635         handler = g_new(EventFlagHandler, 1);
636         handler->conn_id = conn_id;
637         handler->notifier = notifier;
638         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
639         ret = 0;
640     } else {
641         ret = -ENOENT;
642     }
643 
644     return ret;
645 }
646 
647 static bool process_event_flags_userspace;
648 
649 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
650 {
651     if (!process_event_flags_userspace &&
652         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
653         process_event_flags_userspace = true;
654 
655         warn_report("Hyper-V event signaling is not supported by this kernel; "
656                     "using slower userspace hypercall processing");
657     }
658 
659     if (!process_event_flags_userspace) {
660         struct kvm_hyperv_eventfd hvevfd = {
661             .conn_id = conn_id,
662             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
663             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
664         };
665 
666         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
667     }
668     return set_event_flag_handler(conn_id, notifier);
669 }
670 
671 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
672 {
673     EventFlagHandler *handler;
674 
675     if (unlikely(!fast)) {
676         hwaddr addr = param;
677 
678         if (addr & (__alignof__(addr) - 1)) {
679             return HV_STATUS_INVALID_ALIGNMENT;
680         }
681 
682         param = ldq_phys(&address_space_memory, addr);
683     }
684 
685     /*
686      * Per spec, bits 32-47 contain the extra "flag number".  However, we
687      * have no use for it, and in all known usecases it is zero, so just
688      * report lookup failure if it isn't.
689      */
690     if (param & 0xffff00000000ULL) {
691         return HV_STATUS_INVALID_PORT_ID;
692     }
693     /* remaining bits are reserved-zero */
694     if (param & ~HV_CONNECTION_ID_MASK) {
695         return HV_STATUS_INVALID_HYPERCALL_INPUT;
696     }
697 
698     RCU_READ_LOCK_GUARD();
699     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
700         if (handler->conn_id == param) {
701             event_notifier_set(handler->notifier);
702             return 0;
703         }
704     }
705     return HV_STATUS_INVALID_CONNECTION_ID;
706 }
707