1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Microsoft Corporation.
4 *
5 * mshv_root module's main interrupt handler and associated functionality.
6 *
7 * Authors: Microsoft Linux virtualization team
8 */
9
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
12 #include <linux/mm.h>
13 #include <linux/io.h>
14 #include <linux/random.h>
15 #include <asm/mshyperv.h>
16
17 #include "mshv_eventfd.h"
18 #include "mshv.h"
19
synic_event_ring_get_queued_port(u32 sint_index)20 static u32 synic_event_ring_get_queued_port(u32 sint_index)
21 {
22 struct hv_synic_event_ring_page **event_ring_page;
23 volatile struct hv_synic_event_ring *ring;
24 struct hv_synic_pages *spages;
25 u8 **synic_eventring_tail;
26 u32 message;
27 u8 tail;
28
29 spages = this_cpu_ptr(mshv_root.synic_pages);
30 event_ring_page = &spages->synic_event_ring_page;
31 synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
32
33 if (unlikely(!*synic_eventring_tail)) {
34 pr_debug("Missing synic event ring tail!\n");
35 return 0;
36 }
37 tail = (*synic_eventring_tail)[sint_index];
38
39 if (unlikely(!*event_ring_page)) {
40 pr_debug("Missing synic event ring page!\n");
41 return 0;
42 }
43
44 ring = &(*event_ring_page)->sint_event_ring[sint_index];
45
46 /*
47 * Get the message.
48 */
49 message = ring->data[tail];
50
51 if (!message) {
52 if (ring->ring_full) {
53 /*
54 * Ring is marked full, but we would have consumed all
55 * the messages. Notify the hypervisor that ring is now
56 * empty and check again.
57 */
58 ring->ring_full = 0;
59 hv_call_notify_port_ring_empty(sint_index);
60 message = ring->data[tail];
61 }
62
63 if (!message) {
64 ring->signal_masked = 0;
65 /*
66 * Unmask the signal and sync with hypervisor
67 * before one last check for any message.
68 */
69 mb();
70 message = ring->data[tail];
71
72 /*
73 * Ok, lets bail out.
74 */
75 if (!message)
76 return 0;
77 }
78
79 ring->signal_masked = 1;
80 }
81
82 /*
83 * Clear the message in the ring buffer.
84 */
85 ring->data[tail] = 0;
86
87 if (++tail == HV_SYNIC_EVENT_RING_MESSAGE_COUNT)
88 tail = 0;
89
90 (*synic_eventring_tail)[sint_index] = tail;
91
92 return message;
93 }
94
95 static bool
mshv_doorbell_isr(struct hv_message * msg)96 mshv_doorbell_isr(struct hv_message *msg)
97 {
98 struct hv_notification_message_payload *notification;
99 u32 port;
100
101 if (msg->header.message_type != HVMSG_SYNIC_SINT_INTERCEPT)
102 return false;
103
104 notification = (struct hv_notification_message_payload *)msg->u.payload;
105 if (notification->sint_index != HV_SYNIC_DOORBELL_SINT_INDEX)
106 return false;
107
108 while ((port = synic_event_ring_get_queued_port(HV_SYNIC_DOORBELL_SINT_INDEX))) {
109 struct port_table_info ptinfo = { 0 };
110
111 if (mshv_portid_lookup(port, &ptinfo)) {
112 pr_debug("Failed to get port info from port_table!\n");
113 continue;
114 }
115
116 if (ptinfo.hv_port_type != HV_PORT_TYPE_DOORBELL) {
117 pr_debug("Not a doorbell port!, port: %d, port_type: %d\n",
118 port, ptinfo.hv_port_type);
119 continue;
120 }
121
122 /* Invoke the callback */
123 ptinfo.hv_port_doorbell.doorbell_cb(port,
124 ptinfo.hv_port_doorbell.data);
125 }
126
127 return true;
128 }
129
mshv_async_call_completion_isr(struct hv_message * msg)130 static bool mshv_async_call_completion_isr(struct hv_message *msg)
131 {
132 bool handled = false;
133 struct hv_async_completion_message_payload *async_msg;
134 struct mshv_partition *partition;
135 u64 partition_id;
136
137 if (msg->header.message_type != HVMSG_ASYNC_CALL_COMPLETION)
138 goto out;
139
140 async_msg =
141 (struct hv_async_completion_message_payload *)msg->u.payload;
142
143 partition_id = async_msg->partition_id;
144
145 /*
146 * Hold this lock for the rest of the isr, because the partition could
147 * be released anytime.
148 * e.g. the MSHV_RUN_VP thread could wake on another cpu; it could
149 * release the partition unless we hold this!
150 */
151 rcu_read_lock();
152
153 partition = mshv_partition_find(partition_id);
154
155 if (unlikely(!partition)) {
156 pr_debug("failed to find partition %llu\n", partition_id);
157 goto unlock_out;
158 }
159
160 partition->async_hypercall_status = async_msg->status;
161 complete(&partition->async_hypercall);
162
163 handled = true;
164
165 unlock_out:
166 rcu_read_unlock();
167 out:
168 return handled;
169 }
170
kick_vp(struct mshv_vp * vp)171 static void kick_vp(struct mshv_vp *vp)
172 {
173 atomic64_inc(&vp->run.vp_signaled_count);
174 vp->run.kicked_by_hv = 1;
175 wake_up(&vp->run.vp_suspend_queue);
176 }
177
178 static void
handle_bitset_message(const struct hv_vp_signal_bitset_scheduler_message * msg)179 handle_bitset_message(const struct hv_vp_signal_bitset_scheduler_message *msg)
180 {
181 int bank_idx, vps_signaled = 0, bank_mask_size;
182 struct mshv_partition *partition;
183 const struct hv_vpset *vpset;
184 const u64 *bank_contents;
185 u64 partition_id = msg->partition_id;
186
187 if (msg->vp_bitset.bitset.format != HV_GENERIC_SET_SPARSE_4K) {
188 pr_debug("scheduler message format is not HV_GENERIC_SET_SPARSE_4K");
189 return;
190 }
191
192 if (msg->vp_count == 0) {
193 pr_debug("scheduler message with no VP specified");
194 return;
195 }
196
197 rcu_read_lock();
198
199 partition = mshv_partition_find(partition_id);
200 if (unlikely(!partition)) {
201 pr_debug("failed to find partition %llu\n", partition_id);
202 goto unlock_out;
203 }
204
205 vpset = &msg->vp_bitset.bitset;
206
207 bank_idx = -1;
208 bank_contents = vpset->bank_contents;
209 bank_mask_size = sizeof(vpset->valid_bank_mask) * BITS_PER_BYTE;
210
211 while (true) {
212 int vp_bank_idx = -1;
213 int vp_bank_size = sizeof(*bank_contents) * BITS_PER_BYTE;
214 int vp_index;
215
216 bank_idx = find_next_bit((unsigned long *)&vpset->valid_bank_mask,
217 bank_mask_size, bank_idx + 1);
218 if (bank_idx == bank_mask_size)
219 break;
220
221 while (true) {
222 struct mshv_vp *vp;
223
224 vp_bank_idx = find_next_bit((unsigned long *)bank_contents,
225 vp_bank_size, vp_bank_idx + 1);
226 if (vp_bank_idx == vp_bank_size)
227 break;
228
229 vp_index = (bank_idx * vp_bank_size) + vp_bank_idx;
230
231 /* This shouldn't happen, but just in case. */
232 if (unlikely(vp_index >= MSHV_MAX_VPS)) {
233 pr_debug("VP index %u out of bounds\n",
234 vp_index);
235 goto unlock_out;
236 }
237
238 vp = partition->pt_vp_array[vp_index];
239 if (unlikely(!vp)) {
240 pr_debug("failed to find VP %u\n", vp_index);
241 goto unlock_out;
242 }
243
244 kick_vp(vp);
245 vps_signaled++;
246 }
247
248 bank_contents++;
249 }
250
251 unlock_out:
252 rcu_read_unlock();
253
254 if (vps_signaled != msg->vp_count)
255 pr_debug("asked to signal %u VPs but only did %u\n",
256 msg->vp_count, vps_signaled);
257 }
258
259 static void
handle_pair_message(const struct hv_vp_signal_pair_scheduler_message * msg)260 handle_pair_message(const struct hv_vp_signal_pair_scheduler_message *msg)
261 {
262 struct mshv_partition *partition = NULL;
263 struct mshv_vp *vp;
264 int idx;
265
266 rcu_read_lock();
267
268 for (idx = 0; idx < msg->vp_count; idx++) {
269 u64 partition_id = msg->partition_ids[idx];
270 u32 vp_index = msg->vp_indexes[idx];
271
272 if (idx == 0 || partition->pt_id != partition_id) {
273 partition = mshv_partition_find(partition_id);
274 if (unlikely(!partition)) {
275 pr_debug("failed to find partition %llu\n",
276 partition_id);
277 break;
278 }
279 }
280
281 /* This shouldn't happen, but just in case. */
282 if (unlikely(vp_index >= MSHV_MAX_VPS)) {
283 pr_debug("VP index %u out of bounds\n", vp_index);
284 break;
285 }
286
287 vp = partition->pt_vp_array[vp_index];
288 if (!vp) {
289 pr_debug("failed to find VP %u\n", vp_index);
290 break;
291 }
292
293 kick_vp(vp);
294 }
295
296 rcu_read_unlock();
297 }
298
299 static bool
mshv_scheduler_isr(struct hv_message * msg)300 mshv_scheduler_isr(struct hv_message *msg)
301 {
302 if (msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_BITSET &&
303 msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_PAIR)
304 return false;
305
306 if (msg->header.message_type == HVMSG_SCHEDULER_VP_SIGNAL_BITSET)
307 handle_bitset_message((struct hv_vp_signal_bitset_scheduler_message *)
308 msg->u.payload);
309 else
310 handle_pair_message((struct hv_vp_signal_pair_scheduler_message *)
311 msg->u.payload);
312
313 return true;
314 }
315
316 static bool
mshv_intercept_isr(struct hv_message * msg)317 mshv_intercept_isr(struct hv_message *msg)
318 {
319 struct mshv_partition *partition;
320 bool handled = false;
321 struct mshv_vp *vp;
322 u64 partition_id;
323 u32 vp_index;
324
325 partition_id = msg->header.sender;
326
327 rcu_read_lock();
328
329 partition = mshv_partition_find(partition_id);
330 if (unlikely(!partition)) {
331 pr_debug("failed to find partition %llu\n",
332 partition_id);
333 goto unlock_out;
334 }
335
336 if (msg->header.message_type == HVMSG_X64_APIC_EOI) {
337 /*
338 * Check if this gsi is registered in the
339 * ack_notifier list and invoke the callback
340 * if registered.
341 */
342
343 /*
344 * If there is a notifier, the ack callback is supposed
345 * to handle the VMEXIT. So we need not pass this message
346 * to vcpu thread.
347 */
348 struct hv_x64_apic_eoi_message *eoi_msg =
349 (struct hv_x64_apic_eoi_message *)&msg->u.payload[0];
350
351 if (mshv_notify_acked_gsi(partition, eoi_msg->interrupt_vector)) {
352 handled = true;
353 goto unlock_out;
354 }
355 }
356
357 /*
358 * We should get an opaque intercept message here for all intercept
359 * messages, since we're using the mapped VP intercept message page.
360 *
361 * The intercept message will have been placed in intercept message
362 * page at this point.
363 *
364 * Make sure the message type matches our expectation.
365 */
366 if (msg->header.message_type != HVMSG_OPAQUE_INTERCEPT) {
367 pr_debug("wrong message type %d", msg->header.message_type);
368 goto unlock_out;
369 }
370
371 /*
372 * Since we directly index the vp, and it has to exist for us to be here
373 * (because the vp is only deleted when the partition is), no additional
374 * locking is needed here
375 */
376 vp_index =
377 ((struct hv_opaque_intercept_message *)msg->u.payload)->vp_index;
378 vp = partition->pt_vp_array[vp_index];
379 if (unlikely(!vp)) {
380 pr_debug("failed to find VP %u\n", vp_index);
381 goto unlock_out;
382 }
383
384 kick_vp(vp);
385
386 handled = true;
387
388 unlock_out:
389 rcu_read_unlock();
390
391 return handled;
392 }
393
mshv_isr(void)394 void mshv_isr(void)
395 {
396 struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages);
397 struct hv_message_page **msg_page = &spages->synic_message_page;
398 struct hv_message *msg;
399 bool handled;
400
401 if (unlikely(!(*msg_page))) {
402 pr_debug("Missing synic page!\n");
403 return;
404 }
405
406 msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]);
407
408 /*
409 * If the type isn't set, there isn't really a message;
410 * it may be some other hyperv interrupt
411 */
412 if (msg->header.message_type == HVMSG_NONE)
413 return;
414
415 handled = mshv_doorbell_isr(msg);
416
417 if (!handled)
418 handled = mshv_scheduler_isr(msg);
419
420 if (!handled)
421 handled = mshv_async_call_completion_isr(msg);
422
423 if (!handled)
424 handled = mshv_intercept_isr(msg);
425
426 if (handled) {
427 /*
428 * Acknowledge message with hypervisor if another message is
429 * pending.
430 */
431 msg->header.message_type = HVMSG_NONE;
432 /*
433 * Ensure the write is complete so the hypervisor will deliver
434 * the next message if available.
435 */
436 mb();
437 if (msg->header.message_flags.msg_pending)
438 hv_set_non_nested_msr(HV_MSR_EOM, 0);
439
440 #ifdef HYPERVISOR_CALLBACK_VECTOR
441 add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR);
442 #endif
443 } else {
444 pr_warn_once("%s: unknown message type 0x%x\n", __func__,
445 msg->header.message_type);
446 }
447 }
448
mshv_synic_init(unsigned int cpu)449 int mshv_synic_init(unsigned int cpu)
450 {
451 union hv_synic_simp simp;
452 union hv_synic_siefp siefp;
453 union hv_synic_sirbp sirbp;
454 #ifdef HYPERVISOR_CALLBACK_VECTOR
455 union hv_synic_sint sint;
456 #endif
457 union hv_synic_scontrol sctrl;
458 struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages);
459 struct hv_message_page **msg_page = &spages->synic_message_page;
460 struct hv_synic_event_flags_page **event_flags_page =
461 &spages->synic_event_flags_page;
462 struct hv_synic_event_ring_page **event_ring_page =
463 &spages->synic_event_ring_page;
464
465 /* Setup the Synic's message page */
466 simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
467 simp.simp_enabled = true;
468 *msg_page = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
469 HV_HYP_PAGE_SIZE,
470 MEMREMAP_WB);
471
472 if (!(*msg_page))
473 return -EFAULT;
474
475 hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
476
477 /* Setup the Synic's event flags page */
478 siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
479 siefp.siefp_enabled = true;
480 *event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT,
481 PAGE_SIZE, MEMREMAP_WB);
482
483 if (!(*event_flags_page))
484 goto cleanup;
485
486 hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
487
488 /* Setup the Synic's event ring page */
489 sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP);
490 sirbp.sirbp_enabled = true;
491 *event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT,
492 PAGE_SIZE, MEMREMAP_WB);
493
494 if (!(*event_ring_page))
495 goto cleanup;
496
497 hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
498
499 #ifdef HYPERVISOR_CALLBACK_VECTOR
500 /* Enable intercepts */
501 sint.as_uint64 = 0;
502 sint.vector = HYPERVISOR_CALLBACK_VECTOR;
503 sint.masked = false;
504 sint.auto_eoi = hv_recommend_using_aeoi();
505 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
506 sint.as_uint64);
507
508 /* Doorbell SINT */
509 sint.as_uint64 = 0;
510 sint.vector = HYPERVISOR_CALLBACK_VECTOR;
511 sint.masked = false;
512 sint.as_intercept = 1;
513 sint.auto_eoi = hv_recommend_using_aeoi();
514 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX,
515 sint.as_uint64);
516 #endif
517
518 /* Enable global synic bit */
519 sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
520 sctrl.enable = 1;
521 hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
522
523 return 0;
524
525 cleanup:
526 if (*event_ring_page) {
527 sirbp.sirbp_enabled = false;
528 hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
529 memunmap(*event_ring_page);
530 }
531 if (*event_flags_page) {
532 siefp.siefp_enabled = false;
533 hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
534 memunmap(*event_flags_page);
535 }
536 if (*msg_page) {
537 simp.simp_enabled = false;
538 hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
539 memunmap(*msg_page);
540 }
541
542 return -EFAULT;
543 }
544
mshv_synic_cleanup(unsigned int cpu)545 int mshv_synic_cleanup(unsigned int cpu)
546 {
547 union hv_synic_sint sint;
548 union hv_synic_simp simp;
549 union hv_synic_siefp siefp;
550 union hv_synic_sirbp sirbp;
551 union hv_synic_scontrol sctrl;
552 struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages);
553 struct hv_message_page **msg_page = &spages->synic_message_page;
554 struct hv_synic_event_flags_page **event_flags_page =
555 &spages->synic_event_flags_page;
556 struct hv_synic_event_ring_page **event_ring_page =
557 &spages->synic_event_ring_page;
558
559 /* Disable the interrupt */
560 sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX);
561 sint.masked = true;
562 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
563 sint.as_uint64);
564
565 /* Disable Doorbell SINT */
566 sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX);
567 sint.masked = true;
568 hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX,
569 sint.as_uint64);
570
571 /* Disable Synic's event ring page */
572 sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP);
573 sirbp.sirbp_enabled = false;
574 hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
575 memunmap(*event_ring_page);
576
577 /* Disable Synic's event flags page */
578 siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
579 siefp.siefp_enabled = false;
580 hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
581 memunmap(*event_flags_page);
582
583 /* Disable Synic's message page */
584 simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
585 simp.simp_enabled = false;
586 hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
587 memunmap(*msg_page);
588
589 /* Disable global synic bit */
590 sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
591 sctrl.enable = 0;
592 hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
593
594 return 0;
595 }
596
597 int
mshv_register_doorbell(u64 partition_id,doorbell_cb_t doorbell_cb,void * data,u64 gpa,u64 val,u64 flags)598 mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void *data,
599 u64 gpa, u64 val, u64 flags)
600 {
601 struct hv_connection_info connection_info = { 0 };
602 union hv_connection_id connection_id = { 0 };
603 struct port_table_info *port_table_info;
604 struct hv_port_info port_info = { 0 };
605 union hv_port_id port_id = { 0 };
606 int ret;
607
608 port_table_info = kmalloc(sizeof(*port_table_info), GFP_KERNEL);
609 if (!port_table_info)
610 return -ENOMEM;
611
612 port_table_info->hv_port_type = HV_PORT_TYPE_DOORBELL;
613 port_table_info->hv_port_doorbell.doorbell_cb = doorbell_cb;
614 port_table_info->hv_port_doorbell.data = data;
615 ret = mshv_portid_alloc(port_table_info);
616 if (ret < 0) {
617 kfree(port_table_info);
618 return ret;
619 }
620
621 port_id.u.id = ret;
622 port_info.port_type = HV_PORT_TYPE_DOORBELL;
623 port_info.doorbell_port_info.target_sint = HV_SYNIC_DOORBELL_SINT_INDEX;
624 port_info.doorbell_port_info.target_vp = HV_ANY_VP;
625 ret = hv_call_create_port(hv_current_partition_id, port_id, partition_id,
626 &port_info,
627 0, 0, NUMA_NO_NODE);
628
629 if (ret < 0) {
630 mshv_portid_free(port_id.u.id);
631 return ret;
632 }
633
634 connection_id.u.id = port_id.u.id;
635 connection_info.port_type = HV_PORT_TYPE_DOORBELL;
636 connection_info.doorbell_connection_info.gpa = gpa;
637 connection_info.doorbell_connection_info.trigger_value = val;
638 connection_info.doorbell_connection_info.flags = flags;
639
640 ret = hv_call_connect_port(hv_current_partition_id, port_id, partition_id,
641 connection_id, &connection_info, 0, NUMA_NO_NODE);
642 if (ret < 0) {
643 hv_call_delete_port(hv_current_partition_id, port_id);
644 mshv_portid_free(port_id.u.id);
645 return ret;
646 }
647
648 // lets use the port_id as the doorbell_id
649 return port_id.u.id;
650 }
651
652 void
mshv_unregister_doorbell(u64 partition_id,int doorbell_portid)653 mshv_unregister_doorbell(u64 partition_id, int doorbell_portid)
654 {
655 union hv_port_id port_id = { 0 };
656 union hv_connection_id connection_id = { 0 };
657
658 connection_id.u.id = doorbell_portid;
659 hv_call_disconnect_port(partition_id, connection_id);
660
661 port_id.u.id = doorbell_portid;
662 hv_call_delete_port(hv_current_partition_id, port_id);
663
664 mshv_portid_free(doorbell_portid);
665 }
666