1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4 *
5 * Authors:
6 * Anup Patel <anup.patel@wdc.com>
7 */
8
9 #include <linux/bitops.h>
10 #include <linux/entry-kvm.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kdebug.h>
14 #include <linux/module.h>
15 #include <linux/percpu.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sched/signal.h>
18 #include <linux/fs.h>
19 #include <linux/kvm_host.h>
20 #include <asm/cacheflush.h>
21 #include <asm/kvm_nacl.h>
22 #include <asm/kvm_vcpu_vector.h>
23
24 #define CREATE_TRACE_POINTS
25 #include "trace.h"
26
27 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
28 KVM_GENERIC_VCPU_STATS(),
29 STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
30 STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
31 STATS_DESC_COUNTER(VCPU, wrs_exit_stat),
32 STATS_DESC_COUNTER(VCPU, mmio_exit_user),
33 STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
34 STATS_DESC_COUNTER(VCPU, csr_exit_user),
35 STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
36 STATS_DESC_COUNTER(VCPU, signal_exits),
37 STATS_DESC_COUNTER(VCPU, exits),
38 STATS_DESC_COUNTER(VCPU, instr_illegal_exits),
39 STATS_DESC_COUNTER(VCPU, load_misaligned_exits),
40 STATS_DESC_COUNTER(VCPU, store_misaligned_exits),
41 STATS_DESC_COUNTER(VCPU, load_access_exits),
42 STATS_DESC_COUNTER(VCPU, store_access_exits),
43 };
44
45 const struct kvm_stats_header kvm_vcpu_stats_header = {
46 .name_size = KVM_STATS_NAME_SIZE,
47 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
48 .id_offset = sizeof(struct kvm_stats_header),
49 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
50 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
51 sizeof(kvm_vcpu_stats_desc),
52 };
53
kvm_riscv_reset_vcpu(struct kvm_vcpu * vcpu)54 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
55 {
56 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
57 struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
58 struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
59 struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
60 bool loaded;
61
62 /**
63 * The preemption should be disabled here because it races with
64 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
65 * also calls vcpu_load/put.
66 */
67 get_cpu();
68 loaded = (vcpu->cpu != -1);
69 if (loaded)
70 kvm_arch_vcpu_put(vcpu);
71
72 vcpu->arch.last_exit_cpu = -1;
73
74 memcpy(csr, reset_csr, sizeof(*csr));
75
76 spin_lock(&vcpu->arch.reset_cntx_lock);
77 memcpy(cntx, reset_cntx, sizeof(*cntx));
78 spin_unlock(&vcpu->arch.reset_cntx_lock);
79
80 memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr));
81
82 kvm_riscv_vcpu_fp_reset(vcpu);
83
84 kvm_riscv_vcpu_vector_reset(vcpu);
85
86 kvm_riscv_vcpu_timer_reset(vcpu);
87
88 kvm_riscv_vcpu_aia_reset(vcpu);
89
90 bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
91 bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
92
93 kvm_riscv_vcpu_pmu_reset(vcpu);
94
95 vcpu->arch.hfence_head = 0;
96 vcpu->arch.hfence_tail = 0;
97 memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
98
99 kvm_riscv_vcpu_sbi_sta_reset(vcpu);
100
101 /* Reset the guest CSRs for hotplug usecase */
102 if (loaded)
103 kvm_arch_vcpu_load(vcpu, smp_processor_id());
104 put_cpu();
105 }
106
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)107 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
108 {
109 return 0;
110 }
111
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)112 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
113 {
114 int rc;
115 struct kvm_cpu_context *cntx;
116 struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
117
118 spin_lock_init(&vcpu->arch.mp_state_lock);
119
120 /* Mark this VCPU never ran */
121 vcpu->arch.ran_atleast_once = false;
122 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
123 bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
124
125 /* Setup ISA features available to VCPU */
126 kvm_riscv_vcpu_setup_isa(vcpu);
127
128 /* Setup vendor, arch, and implementation details */
129 vcpu->arch.mvendorid = sbi_get_mvendorid();
130 vcpu->arch.marchid = sbi_get_marchid();
131 vcpu->arch.mimpid = sbi_get_mimpid();
132
133 /* Setup VCPU hfence queue */
134 spin_lock_init(&vcpu->arch.hfence_lock);
135
136 /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
137 spin_lock_init(&vcpu->arch.reset_cntx_lock);
138
139 spin_lock(&vcpu->arch.reset_cntx_lock);
140 cntx = &vcpu->arch.guest_reset_context;
141 cntx->sstatus = SR_SPP | SR_SPIE;
142 cntx->hstatus = 0;
143 cntx->hstatus |= HSTATUS_VTW;
144 cntx->hstatus |= HSTATUS_SPVP;
145 cntx->hstatus |= HSTATUS_SPV;
146 spin_unlock(&vcpu->arch.reset_cntx_lock);
147
148 if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
149 return -ENOMEM;
150
151 /* By default, make CY, TM, and IR counters accessible in VU mode */
152 reset_csr->scounteren = 0x7;
153
154 /* Setup VCPU timer */
155 kvm_riscv_vcpu_timer_init(vcpu);
156
157 /* setup performance monitoring */
158 kvm_riscv_vcpu_pmu_init(vcpu);
159
160 /* Setup VCPU AIA */
161 rc = kvm_riscv_vcpu_aia_init(vcpu);
162 if (rc)
163 return rc;
164
165 /*
166 * Setup SBI extensions
167 * NOTE: This must be the last thing to be initialized.
168 */
169 kvm_riscv_vcpu_sbi_init(vcpu);
170
171 /* Reset VCPU */
172 kvm_riscv_reset_vcpu(vcpu);
173
174 return 0;
175 }
176
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)177 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
178 {
179 /**
180 * vcpu with id 0 is the designated boot cpu.
181 * Keep all vcpus with non-zero id in power-off state so that
182 * they can be brought up using SBI HSM extension.
183 */
184 if (vcpu->vcpu_idx != 0)
185 kvm_riscv_vcpu_power_off(vcpu);
186 }
187
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)188 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
189 {
190 /* Cleanup VCPU AIA context */
191 kvm_riscv_vcpu_aia_deinit(vcpu);
192
193 /* Cleanup VCPU timer */
194 kvm_riscv_vcpu_timer_deinit(vcpu);
195
196 kvm_riscv_vcpu_pmu_deinit(vcpu);
197
198 /* Free unused pages pre-allocated for G-stage page table mappings */
199 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
200
201 /* Free vector context space for host and guest kernel */
202 kvm_riscv_vcpu_free_vector_context(vcpu);
203 }
204
kvm_cpu_has_pending_timer(struct kvm_vcpu * vcpu)205 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
206 {
207 return kvm_riscv_vcpu_timer_pending(vcpu);
208 }
209
kvm_arch_vcpu_blocking(struct kvm_vcpu * vcpu)210 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
211 {
212 kvm_riscv_aia_wakeon_hgei(vcpu, true);
213 }
214
kvm_arch_vcpu_unblocking(struct kvm_vcpu * vcpu)215 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
216 {
217 kvm_riscv_aia_wakeon_hgei(vcpu, false);
218 }
219
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)220 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
221 {
222 return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
223 !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
224 }
225
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)226 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
227 {
228 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
229 }
230
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)231 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
232 {
233 return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
234 }
235
236 #ifdef CONFIG_GUEST_PERF_EVENTS
kvm_arch_vcpu_get_ip(struct kvm_vcpu * vcpu)237 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
238 {
239 return vcpu->arch.guest_context.sepc;
240 }
241 #endif
242
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)243 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
244 {
245 return VM_FAULT_SIGBUS;
246 }
247
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)248 long kvm_arch_vcpu_async_ioctl(struct file *filp,
249 unsigned int ioctl, unsigned long arg)
250 {
251 struct kvm_vcpu *vcpu = filp->private_data;
252 void __user *argp = (void __user *)arg;
253
254 if (ioctl == KVM_INTERRUPT) {
255 struct kvm_interrupt irq;
256
257 if (copy_from_user(&irq, argp, sizeof(irq)))
258 return -EFAULT;
259
260 if (irq.irq == KVM_INTERRUPT_SET)
261 return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
262 else
263 return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
264 }
265
266 return -ENOIOCTLCMD;
267 }
268
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)269 long kvm_arch_vcpu_ioctl(struct file *filp,
270 unsigned int ioctl, unsigned long arg)
271 {
272 struct kvm_vcpu *vcpu = filp->private_data;
273 void __user *argp = (void __user *)arg;
274 long r = -EINVAL;
275
276 switch (ioctl) {
277 case KVM_SET_ONE_REG:
278 case KVM_GET_ONE_REG: {
279 struct kvm_one_reg reg;
280
281 r = -EFAULT;
282 if (copy_from_user(®, argp, sizeof(reg)))
283 break;
284
285 if (ioctl == KVM_SET_ONE_REG)
286 r = kvm_riscv_vcpu_set_reg(vcpu, ®);
287 else
288 r = kvm_riscv_vcpu_get_reg(vcpu, ®);
289 break;
290 }
291 case KVM_GET_REG_LIST: {
292 struct kvm_reg_list __user *user_list = argp;
293 struct kvm_reg_list reg_list;
294 unsigned int n;
295
296 r = -EFAULT;
297 if (copy_from_user(®_list, user_list, sizeof(reg_list)))
298 break;
299 n = reg_list.n;
300 reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
301 if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
302 break;
303 r = -E2BIG;
304 if (n < reg_list.n)
305 break;
306 r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
307 break;
308 }
309 default:
310 break;
311 }
312
313 return r;
314 }
315
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)316 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
317 struct kvm_sregs *sregs)
318 {
319 return -EINVAL;
320 }
321
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)322 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
323 struct kvm_sregs *sregs)
324 {
325 return -EINVAL;
326 }
327
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)328 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
329 {
330 return -EINVAL;
331 }
332
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)333 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
334 {
335 return -EINVAL;
336 }
337
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)338 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
339 struct kvm_translation *tr)
340 {
341 return -EINVAL;
342 }
343
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)344 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
345 {
346 return -EINVAL;
347 }
348
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)349 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
350 {
351 return -EINVAL;
352 }
353
kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu * vcpu)354 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
355 {
356 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
357 unsigned long mask, val;
358
359 if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
360 mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
361 val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
362
363 csr->hvip &= ~mask;
364 csr->hvip |= val;
365 }
366
367 /* Flush AIA high interrupts */
368 kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
369 }
370
kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu * vcpu)371 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
372 {
373 unsigned long hvip;
374 struct kvm_vcpu_arch *v = &vcpu->arch;
375 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
376
377 /* Read current HVIP and VSIE CSRs */
378 csr->vsie = ncsr_read(CSR_VSIE);
379
380 /* Sync-up HVIP.VSSIP bit changes does by Guest */
381 hvip = ncsr_read(CSR_HVIP);
382 if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
383 if (hvip & (1UL << IRQ_VS_SOFT)) {
384 if (!test_and_set_bit(IRQ_VS_SOFT,
385 v->irqs_pending_mask))
386 set_bit(IRQ_VS_SOFT, v->irqs_pending);
387 } else {
388 if (!test_and_set_bit(IRQ_VS_SOFT,
389 v->irqs_pending_mask))
390 clear_bit(IRQ_VS_SOFT, v->irqs_pending);
391 }
392 }
393
394 /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
395 if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
396 if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
397 !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
398 clear_bit(IRQ_PMU_OVF, v->irqs_pending);
399 }
400
401 /* Sync-up AIA high interrupts */
402 kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
403
404 /* Sync-up timer CSRs */
405 kvm_riscv_vcpu_timer_sync(vcpu);
406 }
407
kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu * vcpu,unsigned int irq)408 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
409 {
410 /*
411 * We only allow VS-mode software, timer, and external
412 * interrupts when irq is one of the local interrupts
413 * defined by RISC-V privilege specification.
414 */
415 if (irq < IRQ_LOCAL_MAX &&
416 irq != IRQ_VS_SOFT &&
417 irq != IRQ_VS_TIMER &&
418 irq != IRQ_VS_EXT &&
419 irq != IRQ_PMU_OVF)
420 return -EINVAL;
421
422 set_bit(irq, vcpu->arch.irqs_pending);
423 smp_mb__before_atomic();
424 set_bit(irq, vcpu->arch.irqs_pending_mask);
425
426 kvm_vcpu_kick(vcpu);
427
428 return 0;
429 }
430
kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu * vcpu,unsigned int irq)431 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
432 {
433 /*
434 * We only allow VS-mode software, timer, counter overflow and external
435 * interrupts when irq is one of the local interrupts
436 * defined by RISC-V privilege specification.
437 */
438 if (irq < IRQ_LOCAL_MAX &&
439 irq != IRQ_VS_SOFT &&
440 irq != IRQ_VS_TIMER &&
441 irq != IRQ_VS_EXT &&
442 irq != IRQ_PMU_OVF)
443 return -EINVAL;
444
445 clear_bit(irq, vcpu->arch.irqs_pending);
446 smp_mb__before_atomic();
447 set_bit(irq, vcpu->arch.irqs_pending_mask);
448
449 return 0;
450 }
451
kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu * vcpu,u64 mask)452 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
453 {
454 unsigned long ie;
455
456 ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
457 << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
458 ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
459 (unsigned long)mask;
460 if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
461 return true;
462
463 /* Check AIA high interrupts */
464 return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
465 }
466
__kvm_riscv_vcpu_power_off(struct kvm_vcpu * vcpu)467 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
468 {
469 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
470 kvm_make_request(KVM_REQ_SLEEP, vcpu);
471 kvm_vcpu_kick(vcpu);
472 }
473
kvm_riscv_vcpu_power_off(struct kvm_vcpu * vcpu)474 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
475 {
476 spin_lock(&vcpu->arch.mp_state_lock);
477 __kvm_riscv_vcpu_power_off(vcpu);
478 spin_unlock(&vcpu->arch.mp_state_lock);
479 }
480
__kvm_riscv_vcpu_power_on(struct kvm_vcpu * vcpu)481 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
482 {
483 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
484 kvm_vcpu_wake_up(vcpu);
485 }
486
kvm_riscv_vcpu_power_on(struct kvm_vcpu * vcpu)487 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
488 {
489 spin_lock(&vcpu->arch.mp_state_lock);
490 __kvm_riscv_vcpu_power_on(vcpu);
491 spin_unlock(&vcpu->arch.mp_state_lock);
492 }
493
kvm_riscv_vcpu_stopped(struct kvm_vcpu * vcpu)494 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
495 {
496 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
497 }
498
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)499 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
500 struct kvm_mp_state *mp_state)
501 {
502 *mp_state = READ_ONCE(vcpu->arch.mp_state);
503
504 return 0;
505 }
506
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)507 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
508 struct kvm_mp_state *mp_state)
509 {
510 int ret = 0;
511
512 spin_lock(&vcpu->arch.mp_state_lock);
513
514 switch (mp_state->mp_state) {
515 case KVM_MP_STATE_RUNNABLE:
516 WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
517 break;
518 case KVM_MP_STATE_STOPPED:
519 __kvm_riscv_vcpu_power_off(vcpu);
520 break;
521 default:
522 ret = -EINVAL;
523 }
524
525 spin_unlock(&vcpu->arch.mp_state_lock);
526
527 return ret;
528 }
529
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)530 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
531 struct kvm_guest_debug *dbg)
532 {
533 if (dbg->control & KVM_GUESTDBG_ENABLE) {
534 vcpu->guest_debug = dbg->control;
535 vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
536 } else {
537 vcpu->guest_debug = 0;
538 vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
539 }
540
541 return 0;
542 }
543
kvm_riscv_vcpu_setup_config(struct kvm_vcpu * vcpu)544 static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
545 {
546 const unsigned long *isa = vcpu->arch.isa;
547 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
548
549 if (riscv_isa_extension_available(isa, SVPBMT))
550 cfg->henvcfg |= ENVCFG_PBMTE;
551
552 if (riscv_isa_extension_available(isa, SSTC))
553 cfg->henvcfg |= ENVCFG_STCE;
554
555 if (riscv_isa_extension_available(isa, ZICBOM))
556 cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
557
558 if (riscv_isa_extension_available(isa, ZICBOZ))
559 cfg->henvcfg |= ENVCFG_CBZE;
560
561 if (riscv_isa_extension_available(isa, SVADU) &&
562 !riscv_isa_extension_available(isa, SVADE))
563 cfg->henvcfg |= ENVCFG_ADUE;
564
565 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
566 cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
567 if (riscv_isa_extension_available(isa, SSAIA))
568 cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
569 SMSTATEEN0_AIA |
570 SMSTATEEN0_AIA_ISEL;
571 if (riscv_isa_extension_available(isa, SMSTATEEN))
572 cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
573 }
574
575 cfg->hedeleg = KVM_HEDELEG_DEFAULT;
576 if (vcpu->guest_debug)
577 cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
578 }
579
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)580 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
581 {
582 void *nsh;
583 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
584 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
585
586 if (kvm_riscv_nacl_sync_csr_available()) {
587 nsh = nacl_shmem();
588 nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus);
589 nacl_csr_write(nsh, CSR_VSIE, csr->vsie);
590 nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec);
591 nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch);
592 nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
593 nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
594 nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
595 nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
596 nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
597 nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
598 nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
599 if (IS_ENABLED(CONFIG_32BIT))
600 nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
601 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
602 nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
603 if (IS_ENABLED(CONFIG_32BIT))
604 nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
605 }
606 } else {
607 csr_write(CSR_VSSTATUS, csr->vsstatus);
608 csr_write(CSR_VSIE, csr->vsie);
609 csr_write(CSR_VSTVEC, csr->vstvec);
610 csr_write(CSR_VSSCRATCH, csr->vsscratch);
611 csr_write(CSR_VSEPC, csr->vsepc);
612 csr_write(CSR_VSCAUSE, csr->vscause);
613 csr_write(CSR_VSTVAL, csr->vstval);
614 csr_write(CSR_HEDELEG, cfg->hedeleg);
615 csr_write(CSR_HVIP, csr->hvip);
616 csr_write(CSR_VSATP, csr->vsatp);
617 csr_write(CSR_HENVCFG, cfg->henvcfg);
618 if (IS_ENABLED(CONFIG_32BIT))
619 csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
620 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
621 csr_write(CSR_HSTATEEN0, cfg->hstateen0);
622 if (IS_ENABLED(CONFIG_32BIT))
623 csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
624 }
625 }
626
627 kvm_riscv_gstage_update_hgatp(vcpu);
628
629 kvm_riscv_vcpu_timer_restore(vcpu);
630
631 kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
632 kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
633 vcpu->arch.isa);
634 kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
635 kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
636 vcpu->arch.isa);
637
638 kvm_riscv_vcpu_aia_load(vcpu, cpu);
639
640 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
641
642 vcpu->cpu = cpu;
643 }
644
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)645 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
646 {
647 void *nsh;
648 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
649
650 vcpu->cpu = -1;
651
652 kvm_riscv_vcpu_aia_put(vcpu);
653
654 kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
655 vcpu->arch.isa);
656 kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
657
658 kvm_riscv_vcpu_timer_save(vcpu);
659 kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
660 vcpu->arch.isa);
661 kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
662
663 if (kvm_riscv_nacl_available()) {
664 nsh = nacl_shmem();
665 csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS);
666 csr->vsie = nacl_csr_read(nsh, CSR_VSIE);
667 csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC);
668 csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH);
669 csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC);
670 csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE);
671 csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL);
672 csr->hvip = nacl_csr_read(nsh, CSR_HVIP);
673 csr->vsatp = nacl_csr_read(nsh, CSR_VSATP);
674 } else {
675 csr->vsstatus = csr_read(CSR_VSSTATUS);
676 csr->vsie = csr_read(CSR_VSIE);
677 csr->vstvec = csr_read(CSR_VSTVEC);
678 csr->vsscratch = csr_read(CSR_VSSCRATCH);
679 csr->vsepc = csr_read(CSR_VSEPC);
680 csr->vscause = csr_read(CSR_VSCAUSE);
681 csr->vstval = csr_read(CSR_VSTVAL);
682 csr->hvip = csr_read(CSR_HVIP);
683 csr->vsatp = csr_read(CSR_VSATP);
684 }
685 }
686
kvm_riscv_check_vcpu_requests(struct kvm_vcpu * vcpu)687 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
688 {
689 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
690
691 if (kvm_request_pending(vcpu)) {
692 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
693 kvm_vcpu_srcu_read_unlock(vcpu);
694 rcuwait_wait_event(wait,
695 (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
696 TASK_INTERRUPTIBLE);
697 kvm_vcpu_srcu_read_lock(vcpu);
698
699 if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
700 /*
701 * Awaken to handle a signal, request to
702 * sleep again later.
703 */
704 kvm_make_request(KVM_REQ_SLEEP, vcpu);
705 }
706 }
707
708 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
709 kvm_riscv_reset_vcpu(vcpu);
710
711 if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
712 kvm_riscv_gstage_update_hgatp(vcpu);
713
714 if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
715 kvm_riscv_fence_i_process(vcpu);
716
717 /*
718 * The generic KVM_REQ_TLB_FLUSH is same as
719 * KVM_REQ_HFENCE_GVMA_VMID_ALL
720 */
721 if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
722 kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
723
724 if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
725 kvm_riscv_hfence_vvma_all_process(vcpu);
726
727 if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
728 kvm_riscv_hfence_process(vcpu);
729
730 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
731 kvm_riscv_vcpu_record_steal_time(vcpu);
732 }
733 }
734
kvm_riscv_update_hvip(struct kvm_vcpu * vcpu)735 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
736 {
737 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
738
739 ncsr_write(CSR_HVIP, csr->hvip);
740 kvm_riscv_vcpu_aia_update_hvip(vcpu);
741 }
742
kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * vcpu)743 static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *vcpu)
744 {
745 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
746 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
747 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
748
749 vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
750 vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
751 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
752 (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
753 vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0,
754 smcsr->sstateen0);
755 }
756
kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu * vcpu)757 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu)
758 {
759 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
760 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
761 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
762
763 csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
764 csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
765 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
766 (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
767 smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0,
768 vcpu->arch.host_sstateen0);
769 }
770
771 /*
772 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
773 * the vCPU is running.
774 *
775 * This must be noinstr as instrumentation may make use of RCU, and this is not
776 * safe during the EQS.
777 */
kvm_riscv_vcpu_enter_exit(struct kvm_vcpu * vcpu,struct kvm_cpu_trap * trap)778 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu,
779 struct kvm_cpu_trap *trap)
780 {
781 void *nsh;
782 struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context;
783 struct kvm_cpu_context *hcntx = &vcpu->arch.host_context;
784
785 /*
786 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and
787 * HTINST) here because we do local_irq_enable() after this
788 * function in kvm_arch_vcpu_ioctl_run() which can result in
789 * an interrupt immediately after local_irq_enable() and can
790 * potentially change trap CSRs.
791 */
792
793 kvm_riscv_vcpu_swap_in_guest_state(vcpu);
794 guest_state_enter_irqoff();
795
796 if (kvm_riscv_nacl_sync_sret_available()) {
797 nsh = nacl_shmem();
798
799 if (kvm_riscv_nacl_autoswap_csr_available()) {
800 hcntx->hstatus =
801 nacl_csr_read(nsh, CSR_HSTATUS);
802 nacl_scratch_write_long(nsh,
803 SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
804 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS,
805 gcntx->hstatus);
806 nacl_scratch_write_long(nsh,
807 SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
808 SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS);
809 } else if (kvm_riscv_nacl_sync_csr_available()) {
810 hcntx->hstatus = nacl_csr_swap(nsh,
811 CSR_HSTATUS, gcntx->hstatus);
812 } else {
813 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
814 }
815
816 nacl_scratch_write_longs(nsh,
817 SBI_NACL_SHMEM_SRET_OFFSET +
818 SBI_NACL_SHMEM_SRET_X(1),
819 &gcntx->ra,
820 SBI_NACL_SHMEM_SRET_X_LAST);
821
822 __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL,
823 SBI_EXT_NACL_SYNC_SRET);
824
825 if (kvm_riscv_nacl_autoswap_csr_available()) {
826 nacl_scratch_write_long(nsh,
827 SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
828 0);
829 gcntx->hstatus = nacl_scratch_read_long(nsh,
830 SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
831 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS);
832 } else {
833 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
834 }
835
836 trap->htval = nacl_csr_read(nsh, CSR_HTVAL);
837 trap->htinst = nacl_csr_read(nsh, CSR_HTINST);
838 } else {
839 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
840
841 __kvm_riscv_switch_to(&vcpu->arch);
842
843 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
844
845 trap->htval = csr_read(CSR_HTVAL);
846 trap->htinst = csr_read(CSR_HTINST);
847 }
848
849 trap->sepc = gcntx->sepc;
850 trap->scause = csr_read(CSR_SCAUSE);
851 trap->stval = csr_read(CSR_STVAL);
852
853 vcpu->arch.last_exit_cpu = vcpu->cpu;
854 guest_state_exit_irqoff();
855 kvm_riscv_vcpu_swap_in_host_state(vcpu);
856 }
857
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)858 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
859 {
860 int ret;
861 struct kvm_cpu_trap trap;
862 struct kvm_run *run = vcpu->run;
863
864 if (!vcpu->arch.ran_atleast_once)
865 kvm_riscv_vcpu_setup_config(vcpu);
866
867 /* Mark this VCPU ran at least once */
868 vcpu->arch.ran_atleast_once = true;
869
870 kvm_vcpu_srcu_read_lock(vcpu);
871
872 switch (run->exit_reason) {
873 case KVM_EXIT_MMIO:
874 /* Process MMIO value returned from user-space */
875 ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
876 break;
877 case KVM_EXIT_RISCV_SBI:
878 /* Process SBI value returned from user-space */
879 ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
880 break;
881 case KVM_EXIT_RISCV_CSR:
882 /* Process CSR value returned from user-space */
883 ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
884 break;
885 default:
886 ret = 0;
887 break;
888 }
889 if (ret) {
890 kvm_vcpu_srcu_read_unlock(vcpu);
891 return ret;
892 }
893
894 if (!vcpu->wants_to_run) {
895 kvm_vcpu_srcu_read_unlock(vcpu);
896 return -EINTR;
897 }
898
899 vcpu_load(vcpu);
900
901 kvm_sigset_activate(vcpu);
902
903 ret = 1;
904 run->exit_reason = KVM_EXIT_UNKNOWN;
905 while (ret > 0) {
906 /* Check conditions before entering the guest */
907 ret = xfer_to_guest_mode_handle_work(vcpu);
908 if (ret)
909 continue;
910 ret = 1;
911
912 kvm_riscv_gstage_vmid_update(vcpu);
913
914 kvm_riscv_check_vcpu_requests(vcpu);
915
916 preempt_disable();
917
918 /* Update AIA HW state before entering guest */
919 ret = kvm_riscv_vcpu_aia_update(vcpu);
920 if (ret <= 0) {
921 preempt_enable();
922 continue;
923 }
924
925 local_irq_disable();
926
927 /*
928 * Ensure we set mode to IN_GUEST_MODE after we disable
929 * interrupts and before the final VCPU requests check.
930 * See the comment in kvm_vcpu_exiting_guest_mode() and
931 * Documentation/virt/kvm/vcpu-requests.rst
932 */
933 vcpu->mode = IN_GUEST_MODE;
934
935 kvm_vcpu_srcu_read_unlock(vcpu);
936 smp_mb__after_srcu_read_unlock();
937
938 /*
939 * We might have got VCPU interrupts updated asynchronously
940 * so update it in HW.
941 */
942 kvm_riscv_vcpu_flush_interrupts(vcpu);
943
944 /* Update HVIP CSR for current CPU */
945 kvm_riscv_update_hvip(vcpu);
946
947 if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
948 kvm_request_pending(vcpu) ||
949 xfer_to_guest_mode_work_pending()) {
950 vcpu->mode = OUTSIDE_GUEST_MODE;
951 local_irq_enable();
952 preempt_enable();
953 kvm_vcpu_srcu_read_lock(vcpu);
954 continue;
955 }
956
957 /*
958 * Cleanup stale TLB enteries
959 *
960 * Note: This should be done after G-stage VMID has been
961 * updated using kvm_riscv_gstage_vmid_ver_changed()
962 */
963 kvm_riscv_local_tlb_sanitize(vcpu);
964
965 trace_kvm_entry(vcpu);
966
967 guest_timing_enter_irqoff();
968
969 kvm_riscv_vcpu_enter_exit(vcpu, &trap);
970
971 vcpu->mode = OUTSIDE_GUEST_MODE;
972 vcpu->stat.exits++;
973
974 /* Syncup interrupts state with HW */
975 kvm_riscv_vcpu_sync_interrupts(vcpu);
976
977 /*
978 * We must ensure that any pending interrupts are taken before
979 * we exit guest timing so that timer ticks are accounted as
980 * guest time. Transiently unmask interrupts so that any
981 * pending interrupts are taken.
982 *
983 * There's no barrier which ensures that pending interrupts are
984 * recognised, so we just hope that the CPU takes any pending
985 * interrupts between the enable and disable.
986 */
987 local_irq_enable();
988 local_irq_disable();
989
990 guest_timing_exit_irqoff();
991
992 local_irq_enable();
993
994 trace_kvm_exit(&trap);
995
996 preempt_enable();
997
998 kvm_vcpu_srcu_read_lock(vcpu);
999
1000 ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
1001 }
1002
1003 kvm_sigset_deactivate(vcpu);
1004
1005 vcpu_put(vcpu);
1006
1007 kvm_vcpu_srcu_read_unlock(vcpu);
1008
1009 return ret;
1010 }
1011