1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 34 35 static const u8 default_ppi[] = { 36 [TIMER_PTIMER] = 30, 37 [TIMER_VTIMER] = 27, 38 [TIMER_HPTIMER] = 26, 39 [TIMER_HVTIMER] = 28, 40 }; 41 42 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 43 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 44 struct arch_timer_context *timer_ctx); 45 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 46 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 47 struct arch_timer_context *timer, 48 enum kvm_arch_timer_regs treg, 49 u64 val); 50 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 51 struct arch_timer_context *timer, 52 enum kvm_arch_timer_regs treg); 53 static bool kvm_arch_timer_get_input_level(int vintid); 54 55 static struct irq_ops arch_timer_irq_ops = { 56 .get_input_level = kvm_arch_timer_get_input_level, 57 }; 58 59 static int nr_timers(struct kvm_vcpu *vcpu) 60 { 61 if (!vcpu_has_nv(vcpu)) 62 return NR_KVM_EL0_TIMERS; 63 64 return NR_KVM_TIMERS; 65 } 66 67 u32 timer_get_ctl(struct arch_timer_context *ctxt) 68 { 69 struct kvm_vcpu *vcpu = ctxt->vcpu; 70 71 switch(arch_timer_ctx_index(ctxt)) { 72 case TIMER_VTIMER: 73 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 74 case TIMER_PTIMER: 75 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 76 case TIMER_HVTIMER: 77 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 78 case TIMER_HPTIMER: 79 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 80 default: 81 WARN_ON(1); 82 return 0; 83 } 84 } 85 86 u64 timer_get_cval(struct arch_timer_context *ctxt) 87 { 88 struct kvm_vcpu *vcpu = ctxt->vcpu; 89 90 switch(arch_timer_ctx_index(ctxt)) { 91 case TIMER_VTIMER: 92 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 93 case TIMER_PTIMER: 94 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 95 case TIMER_HVTIMER: 96 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 97 case TIMER_HPTIMER: 98 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 99 default: 100 WARN_ON(1); 101 return 0; 102 } 103 } 104 105 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 106 { 107 struct kvm_vcpu *vcpu = ctxt->vcpu; 108 109 switch(arch_timer_ctx_index(ctxt)) { 110 case TIMER_VTIMER: 111 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; 112 break; 113 case TIMER_PTIMER: 114 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; 115 break; 116 case TIMER_HVTIMER: 117 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; 118 break; 119 case TIMER_HPTIMER: 120 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; 121 break; 122 default: 123 WARN_ON(1); 124 } 125 } 126 127 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 128 { 129 struct kvm_vcpu *vcpu = ctxt->vcpu; 130 131 switch(arch_timer_ctx_index(ctxt)) { 132 case TIMER_VTIMER: 133 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; 134 break; 135 case TIMER_PTIMER: 136 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; 137 break; 138 case TIMER_HVTIMER: 139 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; 140 break; 141 case TIMER_HPTIMER: 142 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; 143 break; 144 default: 145 WARN_ON(1); 146 } 147 } 148 149 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) 150 { 151 if (!ctxt->offset.vm_offset) { 152 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); 153 return; 154 } 155 156 WRITE_ONCE(*ctxt->offset.vm_offset, offset); 157 } 158 159 u64 kvm_phys_timer_read(void) 160 { 161 return timecounter->cc->read(timecounter->cc); 162 } 163 164 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 165 { 166 if (vcpu_has_nv(vcpu)) { 167 if (is_hyp_ctxt(vcpu)) { 168 map->direct_vtimer = vcpu_hvtimer(vcpu); 169 map->direct_ptimer = vcpu_hptimer(vcpu); 170 map->emul_vtimer = vcpu_vtimer(vcpu); 171 map->emul_ptimer = vcpu_ptimer(vcpu); 172 } else { 173 map->direct_vtimer = vcpu_vtimer(vcpu); 174 map->direct_ptimer = vcpu_ptimer(vcpu); 175 map->emul_vtimer = vcpu_hvtimer(vcpu); 176 map->emul_ptimer = vcpu_hptimer(vcpu); 177 } 178 } else if (has_vhe()) { 179 map->direct_vtimer = vcpu_vtimer(vcpu); 180 map->direct_ptimer = vcpu_ptimer(vcpu); 181 map->emul_vtimer = NULL; 182 map->emul_ptimer = NULL; 183 } else { 184 map->direct_vtimer = vcpu_vtimer(vcpu); 185 map->direct_ptimer = NULL; 186 map->emul_vtimer = NULL; 187 map->emul_ptimer = vcpu_ptimer(vcpu); 188 } 189 190 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 191 } 192 193 static inline bool userspace_irqchip(struct kvm *kvm) 194 { 195 return unlikely(!irqchip_in_kernel(kvm)); 196 } 197 198 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 199 { 200 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 201 HRTIMER_MODE_ABS_HARD); 202 } 203 204 static void soft_timer_cancel(struct hrtimer *hrt) 205 { 206 hrtimer_cancel(hrt); 207 } 208 209 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 210 { 211 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 212 struct arch_timer_context *ctx; 213 struct timer_map map; 214 215 /* 216 * We may see a timer interrupt after vcpu_put() has been called which 217 * sets the CPU's vcpu pointer to NULL, because even though the timer 218 * has been disabled in timer_save_state(), the hardware interrupt 219 * signal may not have been retired from the interrupt controller yet. 220 */ 221 if (!vcpu) 222 return IRQ_HANDLED; 223 224 get_timer_map(vcpu, &map); 225 226 if (irq == host_vtimer_irq) 227 ctx = map.direct_vtimer; 228 else 229 ctx = map.direct_ptimer; 230 231 if (kvm_timer_should_fire(ctx)) 232 kvm_timer_update_irq(vcpu, true, ctx); 233 234 if (userspace_irqchip(vcpu->kvm) && 235 !static_branch_unlikely(&has_gic_active_state)) 236 disable_percpu_irq(host_vtimer_irq); 237 238 return IRQ_HANDLED; 239 } 240 241 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 242 u64 val) 243 { 244 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 245 246 if (now < val) { 247 u64 ns; 248 249 ns = cyclecounter_cyc2ns(timecounter->cc, 250 val - now, 251 timecounter->mask, 252 &timer_ctx->ns_frac); 253 return ns; 254 } 255 256 return 0; 257 } 258 259 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 260 { 261 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 262 } 263 264 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 265 { 266 WARN_ON(timer_ctx && timer_ctx->loaded); 267 return timer_ctx && 268 ((timer_get_ctl(timer_ctx) & 269 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 270 } 271 272 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 273 { 274 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 275 vcpu_get_flag(vcpu, IN_WFIT)); 276 } 277 278 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 279 { 280 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 281 struct arch_timer_context *ctx; 282 283 ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); 284 285 return kvm_counter_compute_delta(ctx, val); 286 } 287 288 /* 289 * Returns the earliest expiration time in ns among guest timers. 290 * Note that it will return 0 if none of timers can fire. 291 */ 292 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 293 { 294 u64 min_delta = ULLONG_MAX; 295 int i; 296 297 for (i = 0; i < nr_timers(vcpu); i++) { 298 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 299 300 WARN(ctx->loaded, "timer %d loaded\n", i); 301 if (kvm_timer_irq_can_fire(ctx)) 302 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 303 } 304 305 if (vcpu_has_wfit_active(vcpu)) 306 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 307 308 /* If none of timers can fire, then return 0 */ 309 if (min_delta == ULLONG_MAX) 310 return 0; 311 312 return min_delta; 313 } 314 315 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 316 { 317 struct arch_timer_cpu *timer; 318 struct kvm_vcpu *vcpu; 319 u64 ns; 320 321 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 322 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 323 324 /* 325 * Check that the timer has really expired from the guest's 326 * PoV (NTP on the host may have forced it to expire 327 * early). If we should have slept longer, restart it. 328 */ 329 ns = kvm_timer_earliest_exp(vcpu); 330 if (unlikely(ns)) { 331 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 332 return HRTIMER_RESTART; 333 } 334 335 kvm_vcpu_wake_up(vcpu); 336 return HRTIMER_NORESTART; 337 } 338 339 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 340 { 341 struct arch_timer_context *ctx; 342 struct kvm_vcpu *vcpu; 343 u64 ns; 344 345 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 346 vcpu = ctx->vcpu; 347 348 trace_kvm_timer_hrtimer_expire(ctx); 349 350 /* 351 * Check that the timer has really expired from the guest's 352 * PoV (NTP on the host may have forced it to expire 353 * early). If not ready, schedule for a later time. 354 */ 355 ns = kvm_timer_compute_delta(ctx); 356 if (unlikely(ns)) { 357 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 358 return HRTIMER_RESTART; 359 } 360 361 kvm_timer_update_irq(vcpu, true, ctx); 362 return HRTIMER_NORESTART; 363 } 364 365 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 366 { 367 enum kvm_arch_timers index; 368 u64 cval, now; 369 370 if (!timer_ctx) 371 return false; 372 373 index = arch_timer_ctx_index(timer_ctx); 374 375 if (timer_ctx->loaded) { 376 u32 cnt_ctl = 0; 377 378 switch (index) { 379 case TIMER_VTIMER: 380 case TIMER_HVTIMER: 381 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 382 break; 383 case TIMER_PTIMER: 384 case TIMER_HPTIMER: 385 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 386 break; 387 case NR_KVM_TIMERS: 388 /* GCC is braindead */ 389 cnt_ctl = 0; 390 break; 391 } 392 393 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 394 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 395 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 396 } 397 398 if (!kvm_timer_irq_can_fire(timer_ctx)) 399 return false; 400 401 cval = timer_get_cval(timer_ctx); 402 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 403 404 return cval <= now; 405 } 406 407 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 408 { 409 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 410 } 411 412 /* 413 * Reflect the timer output level into the kvm_run structure 414 */ 415 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 416 { 417 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 418 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 419 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 420 421 /* Populate the device bitmap with the timer states */ 422 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 423 KVM_ARM_DEV_EL1_PTIMER); 424 if (kvm_timer_should_fire(vtimer)) 425 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 426 if (kvm_timer_should_fire(ptimer)) 427 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 428 } 429 430 static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 431 { 432 /* 433 * Paper over NV2 brokenness by publishing the interrupt status 434 * bit. This still results in a poor quality of emulation (guest 435 * writes will have no effect until the next exit). 436 * 437 * But hey, it's fast, right? 438 */ 439 if (is_hyp_ctxt(ctx->vcpu) && 440 (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { 441 unsigned long val = timer_get_ctl(ctx); 442 __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 443 timer_set_ctl(ctx, val); 444 } 445 } 446 447 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 448 struct arch_timer_context *timer_ctx) 449 { 450 int ret; 451 452 kvm_timer_update_status(timer_ctx, new_level); 453 454 timer_ctx->irq.level = new_level; 455 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 456 timer_ctx->irq.level); 457 458 if (!userspace_irqchip(vcpu->kvm)) { 459 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, 460 timer_irq(timer_ctx), 461 timer_ctx->irq.level, 462 timer_ctx); 463 WARN_ON(ret); 464 } 465 } 466 467 /* Only called for a fully emulated timer */ 468 static void timer_emulate(struct arch_timer_context *ctx) 469 { 470 bool should_fire = kvm_timer_should_fire(ctx); 471 472 trace_kvm_timer_emulate(ctx, should_fire); 473 474 if (should_fire != ctx->irq.level) { 475 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); 476 return; 477 } 478 479 kvm_timer_update_status(ctx, should_fire); 480 481 /* 482 * If the timer can fire now, we don't need to have a soft timer 483 * scheduled for the future. If the timer cannot fire at all, 484 * then we also don't need a soft timer. 485 */ 486 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 487 return; 488 489 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 490 } 491 492 static void set_cntvoff(u64 cntvoff) 493 { 494 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 495 } 496 497 static void set_cntpoff(u64 cntpoff) 498 { 499 if (has_cntpoff()) 500 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 501 } 502 503 static void timer_save_state(struct arch_timer_context *ctx) 504 { 505 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 506 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 507 unsigned long flags; 508 509 if (!timer->enabled) 510 return; 511 512 local_irq_save(flags); 513 514 if (!ctx->loaded) 515 goto out; 516 517 switch (index) { 518 u64 cval; 519 520 case TIMER_VTIMER: 521 case TIMER_HVTIMER: 522 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 523 cval = read_sysreg_el0(SYS_CNTV_CVAL); 524 525 if (has_broken_cntvoff()) 526 cval -= timer_get_offset(ctx); 527 528 timer_set_cval(ctx, cval); 529 530 /* Disable the timer */ 531 write_sysreg_el0(0, SYS_CNTV_CTL); 532 isb(); 533 534 /* 535 * The kernel may decide to run userspace after 536 * calling vcpu_put, so we reset cntvoff to 0 to 537 * ensure a consistent read between user accesses to 538 * the virtual counter and kernel access to the 539 * physical counter of non-VHE case. 540 * 541 * For VHE, the virtual counter uses a fixed virtual 542 * offset of zero, so no need to zero CNTVOFF_EL2 543 * register, but this is actually useful when switching 544 * between EL1/vEL2 with NV. 545 * 546 * Do it unconditionally, as this is either unavoidable 547 * or dirt cheap. 548 */ 549 set_cntvoff(0); 550 break; 551 case TIMER_PTIMER: 552 case TIMER_HPTIMER: 553 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 554 cval = read_sysreg_el0(SYS_CNTP_CVAL); 555 556 cval -= timer_get_offset(ctx); 557 558 timer_set_cval(ctx, cval); 559 560 /* Disable the timer */ 561 write_sysreg_el0(0, SYS_CNTP_CTL); 562 isb(); 563 564 set_cntpoff(0); 565 break; 566 case NR_KVM_TIMERS: 567 BUG(); 568 } 569 570 trace_kvm_timer_save_state(ctx); 571 572 ctx->loaded = false; 573 out: 574 local_irq_restore(flags); 575 } 576 577 /* 578 * Schedule the background timer before calling kvm_vcpu_halt, so that this 579 * thread is removed from its waitqueue and made runnable when there's a timer 580 * interrupt to handle. 581 */ 582 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 583 { 584 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 585 struct timer_map map; 586 587 get_timer_map(vcpu, &map); 588 589 /* 590 * If no timers are capable of raising interrupts (disabled or 591 * masked), then there's no more work for us to do. 592 */ 593 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 594 !kvm_timer_irq_can_fire(map.direct_ptimer) && 595 !kvm_timer_irq_can_fire(map.emul_vtimer) && 596 !kvm_timer_irq_can_fire(map.emul_ptimer) && 597 !vcpu_has_wfit_active(vcpu)) 598 return; 599 600 /* 601 * At least one guest time will expire. Schedule a background timer. 602 * Set the earliest expiration time among the guest timers. 603 */ 604 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 605 } 606 607 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 608 { 609 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 610 611 soft_timer_cancel(&timer->bg_timer); 612 } 613 614 static void timer_restore_state(struct arch_timer_context *ctx) 615 { 616 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 617 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 618 unsigned long flags; 619 620 if (!timer->enabled) 621 return; 622 623 local_irq_save(flags); 624 625 if (ctx->loaded) 626 goto out; 627 628 switch (index) { 629 u64 cval, offset; 630 631 case TIMER_VTIMER: 632 case TIMER_HVTIMER: 633 cval = timer_get_cval(ctx); 634 offset = timer_get_offset(ctx); 635 if (has_broken_cntvoff()) { 636 set_cntvoff(0); 637 cval += offset; 638 } else { 639 set_cntvoff(offset); 640 } 641 write_sysreg_el0(cval, SYS_CNTV_CVAL); 642 isb(); 643 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 644 break; 645 case TIMER_PTIMER: 646 case TIMER_HPTIMER: 647 cval = timer_get_cval(ctx); 648 offset = timer_get_offset(ctx); 649 set_cntpoff(offset); 650 cval += offset; 651 write_sysreg_el0(cval, SYS_CNTP_CVAL); 652 isb(); 653 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 654 break; 655 case NR_KVM_TIMERS: 656 BUG(); 657 } 658 659 trace_kvm_timer_restore_state(ctx); 660 661 ctx->loaded = true; 662 out: 663 local_irq_restore(flags); 664 } 665 666 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 667 { 668 int r; 669 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 670 WARN_ON(r); 671 } 672 673 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 674 { 675 struct kvm_vcpu *vcpu = ctx->vcpu; 676 bool phys_active = false; 677 678 /* 679 * Update the timer output so that it is likely to match the 680 * state we're about to restore. If the timer expires between 681 * this point and the register restoration, we'll take the 682 * interrupt anyway. 683 */ 684 kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); 685 686 if (irqchip_in_kernel(vcpu->kvm)) 687 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 688 689 phys_active |= ctx->irq.level; 690 691 set_timer_irq_phys_active(ctx, phys_active); 692 } 693 694 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 695 { 696 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 697 698 /* 699 * Update the timer output so that it is likely to match the 700 * state we're about to restore. If the timer expires between 701 * this point and the register restoration, we'll take the 702 * interrupt anyway. 703 */ 704 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 705 706 /* 707 * When using a userspace irqchip with the architected timers and a 708 * host interrupt controller that doesn't support an active state, we 709 * must still prevent continuously exiting from the guest, and 710 * therefore mask the physical interrupt by disabling it on the host 711 * interrupt controller when the virtual level is high, such that the 712 * guest can make forward progress. Once we detect the output level 713 * being de-asserted, we unmask the interrupt again so that we exit 714 * from the guest when the timer fires. 715 */ 716 if (vtimer->irq.level) 717 disable_percpu_irq(host_vtimer_irq); 718 else 719 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 720 } 721 722 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 723 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 724 do { \ 725 if (_pred) \ 726 (_set) |= (_bit); \ 727 else \ 728 (_clr) |= (_bit); \ 729 } while (0) 730 731 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 732 struct timer_map *map) 733 { 734 int hw, ret; 735 736 if (!irqchip_in_kernel(vcpu->kvm)) 737 return; 738 739 /* 740 * We only ever unmap the vtimer irq on a VHE system that runs nested 741 * virtualization, in which case we have both a valid emul_vtimer, 742 * emul_ptimer, direct_vtimer, and direct_ptimer. 743 * 744 * Since this is called from kvm_timer_vcpu_load(), a change between 745 * vEL2 and vEL1/0 will have just happened, and the timer_map will 746 * represent this, and therefore we switch the emul/direct mappings 747 * below. 748 */ 749 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 750 if (hw < 0) { 751 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 752 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 753 754 ret = kvm_vgic_map_phys_irq(vcpu, 755 map->direct_vtimer->host_timer_irq, 756 timer_irq(map->direct_vtimer), 757 &arch_timer_irq_ops); 758 WARN_ON_ONCE(ret); 759 ret = kvm_vgic_map_phys_irq(vcpu, 760 map->direct_ptimer->host_timer_irq, 761 timer_irq(map->direct_ptimer), 762 &arch_timer_irq_ops); 763 WARN_ON_ONCE(ret); 764 765 /* 766 * The virtual offset behaviour is "interesting", as it 767 * always applies when HCR_EL2.E2H==0, but only when 768 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we 769 * track E2H when putting the HV timer in "direct" mode. 770 */ 771 if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { 772 struct arch_timer_offset *offs = &map->direct_vtimer->offset; 773 774 if (vcpu_el2_e2h_is_set(vcpu)) 775 offs->vcpu_offset = NULL; 776 else 777 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 778 } 779 } 780 } 781 782 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 783 { 784 bool tvt, tpt, tvc, tpc, tvt02, tpt02; 785 u64 clr, set; 786 787 /* 788 * No trapping gets configured here with nVHE. See 789 * __timer_enable_traps(), which is where the stuff happens. 790 */ 791 if (!has_vhe()) 792 return; 793 794 /* 795 * Our default policy is not to trap anything. As we progress 796 * within this function, reality kicks in and we start adding 797 * traps based on emulation requirements. 798 */ 799 tvt = tpt = tvc = tpc = false; 800 tvt02 = tpt02 = false; 801 802 /* 803 * NV2 badly breaks the timer semantics by redirecting accesses to 804 * the EL1 timer state to memory, so let's call ECV to the rescue if 805 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 806 * 807 * The treatment slightly varies depending whether we run a nVHE or 808 * VHE guest: nVHE will use the _EL0 registers directly, while VHE 809 * will use the _EL02 accessors. This translates in different trap 810 * bits. 811 * 812 * None of the trapping is required when running in non-HYP context, 813 * unless required by the L1 hypervisor settings once we advertise 814 * ECV+NV in the guest, or that we need trapping for other reasons. 815 */ 816 if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 817 if (vcpu_el2_e2h_is_set(vcpu)) 818 tvt02 = tpt02 = true; 819 else 820 tvt = tpt = true; 821 } 822 823 /* 824 * We have two possibility to deal with a physical offset: 825 * 826 * - Either we have CNTPOFF (yay!) or the offset is 0: 827 * we let the guest freely access the HW 828 * 829 * - or neither of these condition apply: 830 * we trap accesses to the HW, but still use it 831 * after correcting the physical offset 832 */ 833 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 834 tpt = tpc = true; 835 836 /* 837 * For the poor sods that could not correctly substract one value 838 * from another, trap the full virtual timer and counter. 839 */ 840 if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 841 tvt = tvc = true; 842 843 /* 844 * Apply the enable bits that the guest hypervisor has requested for 845 * its own guest. We can only add traps that wouldn't have been set 846 * above. 847 * Implementation choices: we do not support NV when E2H=0 in the 848 * guest, and we don't support configuration where E2H is writable 849 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 850 * not both). This simplifies the handling of the EL1NV* bits. 851 */ 852 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { 853 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 854 855 /* Use the VHE format for mental sanity */ 856 if (!vcpu_el2_e2h_is_set(vcpu)) 857 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 858 859 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 860 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 861 862 tpt02 |= (val & CNTHCTL_EL1NVPCT); 863 tvt02 |= (val & CNTHCTL_EL1NVVCT); 864 } 865 866 /* 867 * Now that we have collected our requirements, compute the 868 * trap and enable bits. 869 */ 870 set = 0; 871 clr = 0; 872 873 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 874 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 875 assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 876 assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 877 assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 878 assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 879 880 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 881 sysreg_clear_set(cnthctl_el2, clr, set); 882 } 883 884 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 885 { 886 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 887 struct timer_map map; 888 889 if (unlikely(!timer->enabled)) 890 return; 891 892 get_timer_map(vcpu, &map); 893 894 if (static_branch_likely(&has_gic_active_state)) { 895 if (vcpu_has_nv(vcpu)) 896 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 897 898 kvm_timer_vcpu_load_gic(map.direct_vtimer); 899 if (map.direct_ptimer) 900 kvm_timer_vcpu_load_gic(map.direct_ptimer); 901 } else { 902 kvm_timer_vcpu_load_nogic(vcpu); 903 } 904 905 kvm_timer_unblocking(vcpu); 906 907 timer_restore_state(map.direct_vtimer); 908 if (map.direct_ptimer) 909 timer_restore_state(map.direct_ptimer); 910 if (map.emul_vtimer) 911 timer_emulate(map.emul_vtimer); 912 if (map.emul_ptimer) 913 timer_emulate(map.emul_ptimer); 914 915 timer_set_traps(vcpu, &map); 916 } 917 918 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 919 { 920 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 921 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 922 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 923 bool vlevel, plevel; 924 925 if (likely(irqchip_in_kernel(vcpu->kvm))) 926 return false; 927 928 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 929 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 930 931 return kvm_timer_should_fire(vtimer) != vlevel || 932 kvm_timer_should_fire(ptimer) != plevel; 933 } 934 935 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 936 { 937 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 938 struct timer_map map; 939 940 if (unlikely(!timer->enabled)) 941 return; 942 943 get_timer_map(vcpu, &map); 944 945 timer_save_state(map.direct_vtimer); 946 if (map.direct_ptimer) 947 timer_save_state(map.direct_ptimer); 948 949 /* 950 * Cancel soft timer emulation, because the only case where we 951 * need it after a vcpu_put is in the context of a sleeping VCPU, and 952 * in that case we already factor in the deadline for the physical 953 * timer when scheduling the bg_timer. 954 * 955 * In any case, we re-schedule the hrtimer for the physical timer when 956 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 957 */ 958 if (map.emul_vtimer) 959 soft_timer_cancel(&map.emul_vtimer->hrtimer); 960 if (map.emul_ptimer) 961 soft_timer_cancel(&map.emul_ptimer->hrtimer); 962 963 if (kvm_vcpu_is_blocking(vcpu)) 964 kvm_timer_blocking(vcpu); 965 } 966 967 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 968 { 969 /* 970 * When NV2 is on, guest hypervisors have their EL1 timer register 971 * accesses redirected to the VNCR page. Any guest action taken on 972 * the timer is postponed until the next exit, leading to a very 973 * poor quality of emulation. 974 * 975 * This is an unmitigated disaster, only papered over by FEAT_ECV, 976 * which allows trapping of the timer registers even with NV2. 977 * Still, this is still worse than FEAT_NV on its own. Meh. 978 */ 979 if (!vcpu_el2_e2h_is_set(vcpu)) { 980 if (cpus_have_final_cap(ARM64_HAS_ECV)) 981 return; 982 983 /* 984 * A non-VHE guest hypervisor doesn't have any direct access 985 * to its timers: the EL2 registers trap (and the HW is 986 * fully emulated), while the EL0 registers access memory 987 * despite the access being notionally direct. Boo. 988 * 989 * We update the hardware timer registers with the 990 * latest value written by the guest to the VNCR page 991 * and let the hardware take care of the rest. 992 */ 993 write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL); 994 write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL); 995 write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL); 996 write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL); 997 } else { 998 /* 999 * For a VHE guest hypervisor, the EL2 state is directly 1000 * stored in the host EL1 timers, while the emulated EL0 1001 * state is stored in the VNCR page. The latter could have 1002 * been updated behind our back, and we must reset the 1003 * emulation of the timers. 1004 */ 1005 struct timer_map map; 1006 get_timer_map(vcpu, &map); 1007 1008 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1009 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1010 timer_emulate(map.emul_vtimer); 1011 timer_emulate(map.emul_ptimer); 1012 } 1013 } 1014 1015 /* 1016 * With a userspace irqchip we have to check if the guest de-asserted the 1017 * timer and if so, unmask the timer irq signal on the host interrupt 1018 * controller to ensure that we see future timer signals. 1019 */ 1020 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 1021 { 1022 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 1023 1024 if (!kvm_timer_should_fire(vtimer)) { 1025 kvm_timer_update_irq(vcpu, false, vtimer); 1026 if (static_branch_likely(&has_gic_active_state)) 1027 set_timer_irq_phys_active(vtimer, false); 1028 else 1029 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1030 } 1031 } 1032 1033 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 1034 { 1035 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1036 1037 if (unlikely(!timer->enabled)) 1038 return; 1039 1040 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1041 unmask_vtimer_irq_user(vcpu); 1042 } 1043 1044 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 1045 { 1046 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1047 struct timer_map map; 1048 1049 get_timer_map(vcpu, &map); 1050 1051 /* 1052 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 1053 * and to 0 for ARMv7. We provide an implementation that always 1054 * resets the timer to be disabled and unmasked and is compliant with 1055 * the ARMv7 architecture. 1056 */ 1057 for (int i = 0; i < nr_timers(vcpu); i++) 1058 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 1059 1060 /* 1061 * A vcpu running at EL2 is in charge of the offset applied to 1062 * the virtual timer, so use the physical VM offset, and point 1063 * the vcpu offset to CNTVOFF_EL2. 1064 */ 1065 if (vcpu_has_nv(vcpu)) { 1066 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 1067 1068 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 1069 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 1070 } 1071 1072 if (timer->enabled) { 1073 for (int i = 0; i < nr_timers(vcpu); i++) 1074 kvm_timer_update_irq(vcpu, false, 1075 vcpu_get_timer(vcpu, i)); 1076 1077 if (irqchip_in_kernel(vcpu->kvm)) { 1078 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 1079 if (map.direct_ptimer) 1080 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 1081 } 1082 } 1083 1084 if (map.emul_vtimer) 1085 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1086 if (map.emul_ptimer) 1087 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1088 } 1089 1090 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 1091 { 1092 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 1093 struct kvm *kvm = vcpu->kvm; 1094 1095 ctxt->vcpu = vcpu; 1096 1097 if (timerid == TIMER_VTIMER) 1098 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1099 else 1100 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1101 1102 hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1103 ctxt->hrtimer.function = kvm_hrtimer_expire; 1104 1105 switch (timerid) { 1106 case TIMER_PTIMER: 1107 case TIMER_HPTIMER: 1108 ctxt->host_timer_irq = host_ptimer_irq; 1109 break; 1110 case TIMER_VTIMER: 1111 case TIMER_HVTIMER: 1112 ctxt->host_timer_irq = host_vtimer_irq; 1113 break; 1114 } 1115 } 1116 1117 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1118 { 1119 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1120 1121 for (int i = 0; i < NR_KVM_TIMERS; i++) 1122 timer_context_init(vcpu, i); 1123 1124 /* Synchronize offsets across timers of a VM if not already provided */ 1125 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1126 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1127 timer_set_offset(vcpu_ptimer(vcpu), 0); 1128 } 1129 1130 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1131 timer->bg_timer.function = kvm_bg_timer_expire; 1132 } 1133 1134 void kvm_timer_init_vm(struct kvm *kvm) 1135 { 1136 for (int i = 0; i < NR_KVM_TIMERS; i++) 1137 kvm->arch.timer_data.ppi[i] = default_ppi[i]; 1138 } 1139 1140 void kvm_timer_cpu_up(void) 1141 { 1142 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1143 if (host_ptimer_irq) 1144 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1145 } 1146 1147 void kvm_timer_cpu_down(void) 1148 { 1149 disable_percpu_irq(host_vtimer_irq); 1150 if (host_ptimer_irq) 1151 disable_percpu_irq(host_ptimer_irq); 1152 } 1153 1154 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 1155 { 1156 struct arch_timer_context *timer; 1157 1158 switch (regid) { 1159 case KVM_REG_ARM_TIMER_CTL: 1160 timer = vcpu_vtimer(vcpu); 1161 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1162 break; 1163 case KVM_REG_ARM_TIMER_CNT: 1164 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1165 &vcpu->kvm->arch.flags)) { 1166 timer = vcpu_vtimer(vcpu); 1167 timer_set_offset(timer, kvm_phys_timer_read() - value); 1168 } 1169 break; 1170 case KVM_REG_ARM_TIMER_CVAL: 1171 timer = vcpu_vtimer(vcpu); 1172 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1173 break; 1174 case KVM_REG_ARM_PTIMER_CTL: 1175 timer = vcpu_ptimer(vcpu); 1176 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1177 break; 1178 case KVM_REG_ARM_PTIMER_CNT: 1179 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1180 &vcpu->kvm->arch.flags)) { 1181 timer = vcpu_ptimer(vcpu); 1182 timer_set_offset(timer, kvm_phys_timer_read() - value); 1183 } 1184 break; 1185 case KVM_REG_ARM_PTIMER_CVAL: 1186 timer = vcpu_ptimer(vcpu); 1187 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1188 break; 1189 1190 default: 1191 return -1; 1192 } 1193 1194 return 0; 1195 } 1196 1197 static u64 read_timer_ctl(struct arch_timer_context *timer) 1198 { 1199 /* 1200 * Set ISTATUS bit if it's expired. 1201 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1202 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1203 * regardless of ENABLE bit for our implementation convenience. 1204 */ 1205 u32 ctl = timer_get_ctl(timer); 1206 1207 if (!kvm_timer_compute_delta(timer)) 1208 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1209 1210 return ctl; 1211 } 1212 1213 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 1214 { 1215 switch (regid) { 1216 case KVM_REG_ARM_TIMER_CTL: 1217 return kvm_arm_timer_read(vcpu, 1218 vcpu_vtimer(vcpu), TIMER_REG_CTL); 1219 case KVM_REG_ARM_TIMER_CNT: 1220 return kvm_arm_timer_read(vcpu, 1221 vcpu_vtimer(vcpu), TIMER_REG_CNT); 1222 case KVM_REG_ARM_TIMER_CVAL: 1223 return kvm_arm_timer_read(vcpu, 1224 vcpu_vtimer(vcpu), TIMER_REG_CVAL); 1225 case KVM_REG_ARM_PTIMER_CTL: 1226 return kvm_arm_timer_read(vcpu, 1227 vcpu_ptimer(vcpu), TIMER_REG_CTL); 1228 case KVM_REG_ARM_PTIMER_CNT: 1229 return kvm_arm_timer_read(vcpu, 1230 vcpu_ptimer(vcpu), TIMER_REG_CNT); 1231 case KVM_REG_ARM_PTIMER_CVAL: 1232 return kvm_arm_timer_read(vcpu, 1233 vcpu_ptimer(vcpu), TIMER_REG_CVAL); 1234 } 1235 return (u64)-1; 1236 } 1237 1238 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1239 struct arch_timer_context *timer, 1240 enum kvm_arch_timer_regs treg) 1241 { 1242 u64 val; 1243 1244 switch (treg) { 1245 case TIMER_REG_TVAL: 1246 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1247 val = lower_32_bits(val); 1248 break; 1249 1250 case TIMER_REG_CTL: 1251 val = read_timer_ctl(timer); 1252 break; 1253 1254 case TIMER_REG_CVAL: 1255 val = timer_get_cval(timer); 1256 break; 1257 1258 case TIMER_REG_CNT: 1259 val = kvm_phys_timer_read() - timer_get_offset(timer); 1260 break; 1261 1262 case TIMER_REG_VOFF: 1263 val = *timer->offset.vcpu_offset; 1264 break; 1265 1266 default: 1267 BUG(); 1268 } 1269 1270 return val; 1271 } 1272 1273 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1274 enum kvm_arch_timers tmr, 1275 enum kvm_arch_timer_regs treg) 1276 { 1277 struct arch_timer_context *timer; 1278 struct timer_map map; 1279 u64 val; 1280 1281 get_timer_map(vcpu, &map); 1282 timer = vcpu_get_timer(vcpu, tmr); 1283 1284 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1285 return kvm_arm_timer_read(vcpu, timer, treg); 1286 1287 preempt_disable(); 1288 timer_save_state(timer); 1289 1290 val = kvm_arm_timer_read(vcpu, timer, treg); 1291 1292 timer_restore_state(timer); 1293 preempt_enable(); 1294 1295 return val; 1296 } 1297 1298 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1299 struct arch_timer_context *timer, 1300 enum kvm_arch_timer_regs treg, 1301 u64 val) 1302 { 1303 switch (treg) { 1304 case TIMER_REG_TVAL: 1305 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1306 break; 1307 1308 case TIMER_REG_CTL: 1309 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1310 break; 1311 1312 case TIMER_REG_CVAL: 1313 timer_set_cval(timer, val); 1314 break; 1315 1316 case TIMER_REG_VOFF: 1317 *timer->offset.vcpu_offset = val; 1318 break; 1319 1320 default: 1321 BUG(); 1322 } 1323 } 1324 1325 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1326 enum kvm_arch_timers tmr, 1327 enum kvm_arch_timer_regs treg, 1328 u64 val) 1329 { 1330 struct arch_timer_context *timer; 1331 struct timer_map map; 1332 1333 get_timer_map(vcpu, &map); 1334 timer = vcpu_get_timer(vcpu, tmr); 1335 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1336 soft_timer_cancel(&timer->hrtimer); 1337 kvm_arm_timer_write(vcpu, timer, treg, val); 1338 timer_emulate(timer); 1339 } else { 1340 preempt_disable(); 1341 timer_save_state(timer); 1342 kvm_arm_timer_write(vcpu, timer, treg, val); 1343 timer_restore_state(timer); 1344 preempt_enable(); 1345 } 1346 } 1347 1348 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1349 { 1350 if (vcpu) 1351 irqd_set_forwarded_to_vcpu(d); 1352 else 1353 irqd_clr_forwarded_to_vcpu(d); 1354 1355 return 0; 1356 } 1357 1358 static int timer_irq_set_irqchip_state(struct irq_data *d, 1359 enum irqchip_irq_state which, bool val) 1360 { 1361 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1362 return irq_chip_set_parent_state(d, which, val); 1363 1364 if (val) 1365 irq_chip_mask_parent(d); 1366 else 1367 irq_chip_unmask_parent(d); 1368 1369 return 0; 1370 } 1371 1372 static void timer_irq_eoi(struct irq_data *d) 1373 { 1374 if (!irqd_is_forwarded_to_vcpu(d)) 1375 irq_chip_eoi_parent(d); 1376 } 1377 1378 static void timer_irq_ack(struct irq_data *d) 1379 { 1380 d = d->parent_data; 1381 if (d->chip->irq_ack) 1382 d->chip->irq_ack(d); 1383 } 1384 1385 static struct irq_chip timer_chip = { 1386 .name = "KVM", 1387 .irq_ack = timer_irq_ack, 1388 .irq_mask = irq_chip_mask_parent, 1389 .irq_unmask = irq_chip_unmask_parent, 1390 .irq_eoi = timer_irq_eoi, 1391 .irq_set_type = irq_chip_set_type_parent, 1392 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1393 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1394 }; 1395 1396 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1397 unsigned int nr_irqs, void *arg) 1398 { 1399 irq_hw_number_t hwirq = (uintptr_t)arg; 1400 1401 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1402 &timer_chip, NULL); 1403 } 1404 1405 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1406 unsigned int nr_irqs) 1407 { 1408 } 1409 1410 static const struct irq_domain_ops timer_domain_ops = { 1411 .alloc = timer_irq_domain_alloc, 1412 .free = timer_irq_domain_free, 1413 }; 1414 1415 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1416 { 1417 *flags = irq_get_trigger_type(virq); 1418 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1419 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1420 virq); 1421 *flags = IRQF_TRIGGER_LOW; 1422 } 1423 } 1424 1425 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1426 { 1427 struct irq_domain *domain = NULL; 1428 1429 if (info->virtual_irq <= 0) { 1430 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1431 info->virtual_irq); 1432 return -ENODEV; 1433 } 1434 1435 host_vtimer_irq = info->virtual_irq; 1436 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1437 1438 if (kvm_vgic_global_state.no_hw_deactivation) { 1439 struct fwnode_handle *fwnode; 1440 struct irq_data *data; 1441 1442 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1443 if (!fwnode) 1444 return -ENOMEM; 1445 1446 /* Assume both vtimer and ptimer in the same parent */ 1447 data = irq_get_irq_data(host_vtimer_irq); 1448 domain = irq_domain_create_hierarchy(data->domain, 0, 1449 NR_KVM_TIMERS, fwnode, 1450 &timer_domain_ops, NULL); 1451 if (!domain) { 1452 irq_domain_free_fwnode(fwnode); 1453 return -ENOMEM; 1454 } 1455 1456 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1457 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1458 (void *)TIMER_VTIMER)); 1459 } 1460 1461 if (info->physical_irq > 0) { 1462 host_ptimer_irq = info->physical_irq; 1463 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1464 1465 if (domain) 1466 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1467 (void *)TIMER_PTIMER)); 1468 } 1469 1470 return 0; 1471 } 1472 1473 static void kvm_timer_handle_errata(void) 1474 { 1475 u64 mmfr0, mmfr1, mmfr4; 1476 1477 /* 1478 * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1479 * all virtual timer/counter accesses, requiring FEAT_ECV. 1480 * 1481 * However, a hypervisor supporting nesting is likely to mitigate the 1482 * erratum at L0, and not require other levels to mitigate it (which 1483 * would otherwise be a terrible performance sink due to trap 1484 * amplification). 1485 * 1486 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1487 * and that NV is likely not to (because of limitations of the 1488 * architecture), only enable the workaround when FEAT_VHE and 1489 * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1490 */ 1491 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1492 mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1493 mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1494 if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1495 !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1496 SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1497 (has_vhe() || has_hvhe()) && 1498 cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1499 static_branch_enable(&broken_cntvoff_key); 1500 kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1501 } 1502 } 1503 1504 int __init kvm_timer_hyp_init(bool has_gic) 1505 { 1506 struct arch_timer_kvm_info *info; 1507 int err; 1508 1509 info = arch_timer_get_kvm_info(); 1510 timecounter = &info->timecounter; 1511 1512 if (!timecounter->cc) { 1513 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1514 return -ENODEV; 1515 } 1516 1517 err = kvm_irq_init(info); 1518 if (err) 1519 return err; 1520 1521 /* First, do the virtual EL1 timer irq */ 1522 1523 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1524 "kvm guest vtimer", kvm_get_running_vcpus()); 1525 if (err) { 1526 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1527 host_vtimer_irq, err); 1528 return err; 1529 } 1530 1531 if (has_gic) { 1532 err = irq_set_vcpu_affinity(host_vtimer_irq, 1533 kvm_get_running_vcpus()); 1534 if (err) { 1535 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1536 goto out_free_vtimer_irq; 1537 } 1538 1539 static_branch_enable(&has_gic_active_state); 1540 } 1541 1542 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1543 1544 /* Now let's do the physical EL1 timer irq */ 1545 1546 if (info->physical_irq > 0) { 1547 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1548 "kvm guest ptimer", kvm_get_running_vcpus()); 1549 if (err) { 1550 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1551 host_ptimer_irq, err); 1552 goto out_free_vtimer_irq; 1553 } 1554 1555 if (has_gic) { 1556 err = irq_set_vcpu_affinity(host_ptimer_irq, 1557 kvm_get_running_vcpus()); 1558 if (err) { 1559 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1560 goto out_free_ptimer_irq; 1561 } 1562 } 1563 1564 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1565 } else if (has_vhe()) { 1566 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1567 info->physical_irq); 1568 err = -ENODEV; 1569 goto out_free_vtimer_irq; 1570 } 1571 1572 kvm_timer_handle_errata(); 1573 return 0; 1574 1575 out_free_ptimer_irq: 1576 if (info->physical_irq > 0) 1577 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1578 out_free_vtimer_irq: 1579 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1580 return err; 1581 } 1582 1583 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1584 { 1585 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1586 1587 soft_timer_cancel(&timer->bg_timer); 1588 } 1589 1590 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1591 { 1592 u32 ppis = 0; 1593 bool valid; 1594 1595 mutex_lock(&vcpu->kvm->arch.config_lock); 1596 1597 for (int i = 0; i < nr_timers(vcpu); i++) { 1598 struct arch_timer_context *ctx; 1599 int irq; 1600 1601 ctx = vcpu_get_timer(vcpu, i); 1602 irq = timer_irq(ctx); 1603 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1604 break; 1605 1606 /* 1607 * We know by construction that we only have PPIs, so 1608 * all values are less than 32. 1609 */ 1610 ppis |= BIT(irq); 1611 } 1612 1613 valid = hweight32(ppis) == nr_timers(vcpu); 1614 1615 if (valid) 1616 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1617 1618 mutex_unlock(&vcpu->kvm->arch.config_lock); 1619 1620 return valid; 1621 } 1622 1623 static bool kvm_arch_timer_get_input_level(int vintid) 1624 { 1625 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1626 1627 if (WARN(!vcpu, "No vcpu context!\n")) 1628 return false; 1629 1630 for (int i = 0; i < nr_timers(vcpu); i++) { 1631 struct arch_timer_context *ctx; 1632 1633 ctx = vcpu_get_timer(vcpu, i); 1634 if (timer_irq(ctx) == vintid) 1635 return kvm_timer_should_fire(ctx); 1636 } 1637 1638 /* A timer IRQ has fired, but no matching timer was found? */ 1639 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1640 1641 return false; 1642 } 1643 1644 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1645 { 1646 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1647 struct timer_map map; 1648 int ret; 1649 1650 if (timer->enabled) 1651 return 0; 1652 1653 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1654 if (!irqchip_in_kernel(vcpu->kvm)) 1655 goto no_vgic; 1656 1657 /* 1658 * At this stage, we have the guarantee that the vgic is both 1659 * available and initialized. 1660 */ 1661 if (!timer_irqs_are_valid(vcpu)) { 1662 kvm_debug("incorrectly configured timer irqs\n"); 1663 return -EINVAL; 1664 } 1665 1666 get_timer_map(vcpu, &map); 1667 1668 ret = kvm_vgic_map_phys_irq(vcpu, 1669 map.direct_vtimer->host_timer_irq, 1670 timer_irq(map.direct_vtimer), 1671 &arch_timer_irq_ops); 1672 if (ret) 1673 return ret; 1674 1675 if (map.direct_ptimer) { 1676 ret = kvm_vgic_map_phys_irq(vcpu, 1677 map.direct_ptimer->host_timer_irq, 1678 timer_irq(map.direct_ptimer), 1679 &arch_timer_irq_ops); 1680 } 1681 1682 if (ret) 1683 return ret; 1684 1685 no_vgic: 1686 timer->enabled = 1; 1687 return 0; 1688 } 1689 1690 /* If we have CNTPOFF, permanently set ECV to enable it */ 1691 void kvm_timer_init_vhe(void) 1692 { 1693 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1694 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1695 } 1696 1697 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1698 { 1699 int __user *uaddr = (int __user *)(long)attr->addr; 1700 int irq, idx, ret = 0; 1701 1702 if (!irqchip_in_kernel(vcpu->kvm)) 1703 return -EINVAL; 1704 1705 if (get_user(irq, uaddr)) 1706 return -EFAULT; 1707 1708 if (!(irq_is_ppi(irq))) 1709 return -EINVAL; 1710 1711 mutex_lock(&vcpu->kvm->arch.config_lock); 1712 1713 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1714 &vcpu->kvm->arch.flags)) { 1715 ret = -EBUSY; 1716 goto out; 1717 } 1718 1719 switch (attr->attr) { 1720 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1721 idx = TIMER_VTIMER; 1722 break; 1723 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1724 idx = TIMER_PTIMER; 1725 break; 1726 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1727 idx = TIMER_HVTIMER; 1728 break; 1729 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1730 idx = TIMER_HPTIMER; 1731 break; 1732 default: 1733 ret = -ENXIO; 1734 goto out; 1735 } 1736 1737 /* 1738 * We cannot validate the IRQ unicity before we run, so take it at 1739 * face value. The verdict will be given on first vcpu run, for each 1740 * vcpu. Yes this is late. Blame it on the stupid API. 1741 */ 1742 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1743 1744 out: 1745 mutex_unlock(&vcpu->kvm->arch.config_lock); 1746 return ret; 1747 } 1748 1749 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1750 { 1751 int __user *uaddr = (int __user *)(long)attr->addr; 1752 struct arch_timer_context *timer; 1753 int irq; 1754 1755 switch (attr->attr) { 1756 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1757 timer = vcpu_vtimer(vcpu); 1758 break; 1759 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1760 timer = vcpu_ptimer(vcpu); 1761 break; 1762 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1763 timer = vcpu_hvtimer(vcpu); 1764 break; 1765 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1766 timer = vcpu_hptimer(vcpu); 1767 break; 1768 default: 1769 return -ENXIO; 1770 } 1771 1772 irq = timer_irq(timer); 1773 return put_user(irq, uaddr); 1774 } 1775 1776 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1777 { 1778 switch (attr->attr) { 1779 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1780 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1781 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1782 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1783 return 0; 1784 } 1785 1786 return -ENXIO; 1787 } 1788 1789 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1790 struct kvm_arm_counter_offset *offset) 1791 { 1792 int ret = 0; 1793 1794 if (offset->reserved) 1795 return -EINVAL; 1796 1797 mutex_lock(&kvm->lock); 1798 1799 if (lock_all_vcpus(kvm)) { 1800 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1801 1802 /* 1803 * If userspace decides to set the offset using this 1804 * API rather than merely restoring the counter 1805 * values, the offset applies to both the virtual and 1806 * physical views. 1807 */ 1808 kvm->arch.timer_data.voffset = offset->counter_offset; 1809 kvm->arch.timer_data.poffset = offset->counter_offset; 1810 1811 unlock_all_vcpus(kvm); 1812 } else { 1813 ret = -EBUSY; 1814 } 1815 1816 mutex_unlock(&kvm->lock); 1817 1818 return ret; 1819 } 1820