1 #include "libcflat.h" 2 #include "apic.h" 3 #include "vm.h" 4 #include "smp.h" 5 #include "desc.h" 6 #include "isr.h" 7 #include "msr.h" 8 #include "atomic.h" 9 #include "fwcfg.h" 10 11 #define MAX_TPR 0xf 12 13 static bool is_apic_hw_enabled(void) 14 { 15 return rdmsr(MSR_IA32_APICBASE) & APIC_EN; 16 } 17 18 static bool is_apic_sw_enabled(void) 19 { 20 return apic_read(APIC_SPIV) & APIC_SPIV_APIC_ENABLED; 21 } 22 23 static bool is_x2apic_enabled(void) 24 { 25 return (rdmsr(MSR_IA32_APICBASE) & (APIC_EN | APIC_EXTD)) == (APIC_EN | APIC_EXTD); 26 } 27 28 static bool is_xapic_enabled(void) 29 { 30 return (rdmsr(MSR_IA32_APICBASE) & (APIC_EN | APIC_EXTD)) == APIC_EN; 31 } 32 33 static void test_lapic_existence(void) 34 { 35 u8 version; 36 37 version = (u8)apic_read(APIC_LVR); 38 printf("apic version: %x\n", version); 39 report(version >= 0x10 && version <= 0x15, "apic existence"); 40 } 41 42 #define TSC_DEADLINE_TIMER_VECTOR 0xef 43 #define BROADCAST_VECTOR 0xcf 44 45 static int tdt_count; 46 47 static void tsc_deadline_timer_isr(isr_regs_t *regs) 48 { 49 ++tdt_count; 50 eoi(); 51 } 52 53 static void __test_tsc_deadline_timer(void) 54 { 55 handle_irq(TSC_DEADLINE_TIMER_VECTOR, tsc_deadline_timer_isr); 56 57 wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)); 58 asm volatile ("nop"); 59 report(tdt_count == 1, "tsc deadline timer"); 60 report(rdmsr(MSR_IA32_TSCDEADLINE) == 0, "tsc deadline timer clearing"); 61 } 62 63 static int enable_tsc_deadline_timer(void) 64 { 65 uint32_t lvtt; 66 67 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { 68 lvtt = APIC_LVT_TIMER_TSCDEADLINE | TSC_DEADLINE_TIMER_VECTOR; 69 apic_write(APIC_LVTT, lvtt); 70 return 1; 71 } else { 72 return 0; 73 } 74 } 75 76 static void test_tsc_deadline_timer(void) 77 { 78 if(enable_tsc_deadline_timer()) 79 __test_tsc_deadline_timer(); 80 else 81 report_skip("tsc deadline timer not detected"); 82 } 83 84 static void do_write_apicbase(void *data) 85 { 86 wrmsr(MSR_IA32_APICBASE, *(u64 *)data); 87 } 88 89 static bool test_write_apicbase_exception(u64 data) 90 { 91 return test_for_exception(GP_VECTOR, do_write_apicbase, &data); 92 } 93 94 static void test_enable_x2apic(void) 95 { 96 u64 apicbase = rdmsr(MSR_IA32_APICBASE); 97 98 if (enable_x2apic()) { 99 printf("x2apic enabled\n"); 100 101 apicbase &= ~(APIC_EN | APIC_EXTD); 102 report(test_write_apicbase_exception(apicbase | APIC_EXTD), 103 "x2apic enabled to invalid state"); 104 report(test_write_apicbase_exception(apicbase | APIC_EN), 105 "x2apic enabled to apic enabled"); 106 107 report(!test_write_apicbase_exception(apicbase | 0), 108 "x2apic enabled to disabled state"); 109 report(test_write_apicbase_exception(apicbase | APIC_EXTD), 110 "disabled to invalid state"); 111 report(test_write_apicbase_exception(apicbase | APIC_EN | APIC_EXTD), 112 "disabled to x2apic enabled"); 113 114 report(!test_write_apicbase_exception(apicbase | APIC_EN), 115 "apic disabled to apic enabled"); 116 report(test_write_apicbase_exception(apicbase | APIC_EXTD), 117 "apic enabled to invalid state"); 118 } else { 119 printf("x2apic not detected\n"); 120 121 report(test_write_apicbase_exception(APIC_EN | APIC_EXTD), 122 "enable unsupported x2apic"); 123 } 124 } 125 126 static void verify_disabled_apic_mmio(void) 127 { 128 volatile u32 *lvr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_LVR); 129 volatile u32 *tpr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_TASKPRI); 130 u32 cr8 = read_cr8(); 131 132 memset((void *)APIC_DEFAULT_PHYS_BASE, 0xff, PAGE_SIZE); 133 report(*lvr == ~0, "*0xfee00030: %x", *lvr); 134 report(read_cr8() == cr8, "CR8: %lx", read_cr8()); 135 write_cr8(cr8 ^ MAX_TPR); 136 report(read_cr8() == (cr8 ^ MAX_TPR), "CR8: %lx", read_cr8()); 137 report(*tpr == ~0, "*0xfee00080: %x", *tpr); 138 write_cr8(cr8); 139 } 140 141 static void test_apic_disable(void) 142 { 143 volatile u32 *lvr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_LVR); 144 volatile u32 *tpr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_TASKPRI); 145 u32 apic_version = apic_read(APIC_LVR); 146 u32 cr8 = read_cr8(); 147 148 report_prefix_push("apic_disable"); 149 150 disable_apic(); 151 report(!is_apic_hw_enabled(), "Local apic disabled"); 152 report(!this_cpu_has(X86_FEATURE_APIC), 153 "CPUID.1H:EDX.APIC[bit 9] is clear"); 154 verify_disabled_apic_mmio(); 155 156 reset_apic(); 157 report(is_xapic_enabled(), "Local apic enabled in xAPIC mode"); 158 report(this_cpu_has(X86_FEATURE_APIC), "CPUID.1H:EDX.APIC[bit 9] is set"); 159 report(*lvr == apic_version, "*0xfee00030: %x", *lvr); 160 report(*tpr == cr8, "*0xfee00080: %x", *tpr); 161 write_cr8(cr8 ^ MAX_TPR); 162 report(*tpr == (cr8 ^ MAX_TPR) << 4, "*0xfee00080: %x", *tpr); 163 write_cr8(cr8); 164 165 if (enable_x2apic()) { 166 report(is_x2apic_enabled(), "Local apic enabled in x2APIC mode"); 167 report(this_cpu_has(X86_FEATURE_APIC), 168 "CPUID.1H:EDX.APIC[bit 9] is set"); 169 verify_disabled_apic_mmio(); 170 } 171 report_prefix_pop(); 172 } 173 174 #define ALTERNATE_APIC_BASE 0xfed40000 175 176 static void test_apicbase(void) 177 { 178 u64 orig_apicbase = rdmsr(MSR_IA32_APICBASE); 179 u32 lvr = apic_read(APIC_LVR); 180 u64 value; 181 182 wrmsr(MSR_IA32_APICBASE, orig_apicbase & ~(APIC_EN | APIC_EXTD)); 183 wrmsr(MSR_IA32_APICBASE, ALTERNATE_APIC_BASE | APIC_BSP | APIC_EN); 184 185 report_prefix_push("apicbase"); 186 187 report(*(volatile u32 *)(ALTERNATE_APIC_BASE + APIC_LVR) == lvr, 188 "relocate apic"); 189 190 value = orig_apicbase | (1UL << cpuid_maxphyaddr()); 191 report(test_for_exception(GP_VECTOR, do_write_apicbase, &value), 192 "reserved physaddr bits"); 193 194 value = orig_apicbase | 1; 195 report(test_for_exception(GP_VECTOR, do_write_apicbase, &value), 196 "reserved low bits"); 197 198 /* Restore the APIC address, the "reset" helpers leave it as is. */ 199 wrmsr(MSR_IA32_APICBASE, orig_apicbase); 200 201 report_prefix_pop(); 202 } 203 204 static void do_write_apic_id(void *id) 205 { 206 apic_write(APIC_ID, *(u32 *)id); 207 } 208 209 static void __test_apic_id(void * unused) 210 { 211 u32 id, newid; 212 u8 initial_xapic_id = cpuid(1).b >> 24; 213 u32 initial_x2apic_id = cpuid(0xb).d; 214 bool x2apic_mode = is_x2apic_enabled(); 215 216 if (x2apic_mode) 217 reset_apic(); 218 219 id = apic_id(); 220 report(initial_xapic_id == id, "xapic id matches cpuid"); 221 222 newid = (id + 1) << 24; 223 report(!test_for_exception(GP_VECTOR, do_write_apic_id, &newid) && 224 (id == apic_id() || id + 1 == apic_id()), 225 "writeable xapic id"); 226 227 if (!enable_x2apic()) 228 goto out; 229 230 report(test_for_exception(GP_VECTOR, do_write_apic_id, &newid), 231 "non-writeable x2apic id"); 232 report(initial_xapic_id == (apic_id() & 0xff), "sane x2apic id"); 233 234 /* old QEMUs do not set initial x2APIC ID */ 235 report(initial_xapic_id == (initial_x2apic_id & 0xff) && 236 initial_x2apic_id == apic_id(), 237 "x2apic id matches cpuid"); 238 239 out: 240 reset_apic(); 241 242 report(initial_xapic_id == apic_id(), "correct xapic id after reset"); 243 244 /* old KVMs do not reset xAPIC ID */ 245 if (id != apic_id()) 246 apic_write(APIC_ID, id << 24); 247 248 if (x2apic_mode) 249 enable_x2apic(); 250 } 251 252 static void test_apic_id(void) 253 { 254 if (cpu_count() < 2) 255 return; 256 257 on_cpu(1, __test_apic_id, NULL); 258 } 259 260 static atomic_t ipi_count; 261 262 static void handle_ipi(isr_regs_t *regs) 263 { 264 atomic_inc(&ipi_count); 265 eoi(); 266 } 267 268 static void __test_self_ipi(void) 269 { 270 u64 start = rdtsc(); 271 int vec = 0xf1; 272 273 handle_irq(vec, handle_ipi); 274 apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | vec, 275 id_map[0]); 276 277 do { 278 pause(); 279 } while (rdtsc() - start < 1000000000 && atomic_read(&ipi_count) == 0); 280 } 281 282 static void test_self_ipi_xapic(void) 283 { 284 report_prefix_push("self_ipi_xapic"); 285 286 /* Reset to xAPIC mode. */ 287 reset_apic(); 288 report(is_xapic_enabled(), "Local apic enabled in xAPIC mode"); 289 290 atomic_set(&ipi_count, 0); 291 __test_self_ipi(); 292 report(atomic_read(&ipi_count) == 1, "self ipi"); 293 294 report_prefix_pop(); 295 } 296 297 static void test_self_ipi_x2apic(void) 298 { 299 report_prefix_push("self_ipi_x2apic"); 300 301 if (enable_x2apic()) { 302 report(is_x2apic_enabled(), "Local apic enabled in x2APIC mode"); 303 304 atomic_set(&ipi_count, 0); 305 __test_self_ipi(); 306 report(atomic_read(&ipi_count) == 1, "self ipi"); 307 } else { 308 report_skip("x2apic not detected"); 309 } 310 311 report_prefix_pop(); 312 } 313 314 volatile int nmi_counter_private, nmi_counter, nmi_hlt_counter, sti_loop_active; 315 316 static void sti_nop(char *p) 317 { 318 asm volatile ( 319 ".globl post_sti \n\t" 320 "sti \n" 321 /* 322 * vmx won't exit on external interrupt if blocked-by-sti, 323 * so give it a reason to exit by accessing an unmapped page. 324 */ 325 "post_sti: testb $0, %0 \n\t" 326 "nop \n\t" 327 "cli" 328 : : "m"(*p) 329 ); 330 nmi_counter = nmi_counter_private; 331 } 332 333 static void sti_loop(void *ignore) 334 { 335 unsigned k = 0; 336 337 while (sti_loop_active) 338 sti_nop((char *)(ulong)((k++ * 4096) % (128 * 1024 * 1024))); 339 } 340 341 static void nmi_handler(isr_regs_t *regs) 342 { 343 extern void post_sti(void); 344 ++nmi_counter_private; 345 nmi_hlt_counter += regs->rip == (ulong)post_sti; 346 } 347 348 static void test_sti_nmi(void) 349 { 350 unsigned old_counter; 351 352 if (cpu_count() < 2) 353 return; 354 355 handle_irq(2, nmi_handler); 356 on_cpu(1, update_cr3, (void *)read_cr3()); 357 358 sti_loop_active = 1; 359 on_cpu_async(1, sti_loop, 0); 360 while (nmi_counter < 30000) { 361 old_counter = nmi_counter; 362 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[1]); 363 while (nmi_counter == old_counter) 364 ; 365 } 366 sti_loop_active = 0; 367 report(nmi_hlt_counter == 0, "nmi-after-sti"); 368 } 369 370 static volatile bool nmi_done, nmi_flushed; 371 static volatile int nmi_received; 372 static volatile int cpu0_nmi_ctr1, cpu1_nmi_ctr1; 373 static volatile int cpu0_nmi_ctr2, cpu1_nmi_ctr2; 374 375 static void multiple_nmi_handler(isr_regs_t *regs) 376 { 377 ++nmi_received; 378 } 379 380 static void kick_me_nmi(void *blah) 381 { 382 while (!nmi_done) { 383 ++cpu1_nmi_ctr1; 384 while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1 && !nmi_done) 385 pause(); 386 387 if (nmi_done) 388 return; 389 390 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]); 391 /* make sure the NMI has arrived by sending an IPI after it */ 392 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT 393 | 0x44, id_map[0]); 394 ++cpu1_nmi_ctr2; 395 while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2 && !nmi_done) 396 pause(); 397 } 398 } 399 400 static void flush_nmi(isr_regs_t *regs) 401 { 402 nmi_flushed = true; 403 apic_write(APIC_EOI, 0); 404 } 405 406 static void test_multiple_nmi(void) 407 { 408 int i; 409 bool ok = true; 410 411 if (cpu_count() < 2) 412 return; 413 414 sti(); 415 handle_irq(2, multiple_nmi_handler); 416 handle_irq(0x44, flush_nmi); 417 on_cpu_async(1, kick_me_nmi, 0); 418 for (i = 0; i < 100000; ++i) { 419 nmi_flushed = false; 420 nmi_received = 0; 421 ++cpu0_nmi_ctr1; 422 while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1) 423 pause(); 424 425 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]); 426 while (!nmi_flushed) 427 pause(); 428 429 if (nmi_received != 2) { 430 ok = false; 431 break; 432 } 433 434 ++cpu0_nmi_ctr2; 435 while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2) 436 pause(); 437 } 438 nmi_done = true; 439 report(ok, "multiple nmi"); 440 } 441 442 static void pending_nmi_handler(isr_regs_t *regs) 443 { 444 int i; 445 446 if (++nmi_received == 1) { 447 for (i = 0; i < 10; ++i) 448 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI, 0); 449 } 450 } 451 452 static void test_pending_nmi(void) 453 { 454 int i; 455 456 handle_irq(2, pending_nmi_handler); 457 for (i = 0; i < 100000; ++i) { 458 nmi_received = 0; 459 460 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI, 0); 461 while (nmi_received < 2) 462 pause(); 463 464 if (nmi_received != 2) 465 break; 466 } 467 report(nmi_received == 2, "pending nmi"); 468 } 469 470 static volatile int lvtt_counter = 0; 471 472 static void lvtt_handler(isr_regs_t *regs) 473 { 474 lvtt_counter++; 475 eoi(); 476 } 477 478 static void test_apic_timer_one_shot(void) 479 { 480 uint64_t tsc1, tsc2; 481 static const uint32_t interval = 0x10000; 482 483 #define APIC_LVT_TIMER_VECTOR (0xee) 484 485 handle_irq(APIC_LVT_TIMER_VECTOR, lvtt_handler); 486 487 /* One shot mode */ 488 apic_write(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | 489 APIC_LVT_TIMER_VECTOR); 490 /* Divider == 1 */ 491 apic_write(APIC_TDCR, 0x0000000b); 492 493 tsc1 = rdtsc(); 494 /* Set "Initial Counter Register", which starts the timer */ 495 apic_write(APIC_TMICT, interval); 496 while (!lvtt_counter); 497 tsc2 = rdtsc(); 498 499 /* 500 * For LVT Timer clock, SDM vol 3 10.5.4 says it should be 501 * derived from processor's bus clock (IIUC which is the same 502 * as TSC), however QEMU seems to be using nanosecond. In all 503 * cases, the following should satisfy on all modern 504 * processors. 505 */ 506 report((lvtt_counter == 1) && (tsc2 - tsc1 >= interval), 507 "APIC LVT timer one shot"); 508 } 509 510 static atomic_t broadcast_counter; 511 512 static void broadcast_handler(isr_regs_t *regs) 513 { 514 atomic_inc(&broadcast_counter); 515 eoi(); 516 } 517 518 static bool broadcast_received(unsigned ncpus) 519 { 520 unsigned counter; 521 u64 start = rdtsc(); 522 523 do { 524 counter = atomic_read(&broadcast_counter); 525 if (counter >= ncpus) 526 break; 527 pause(); 528 } while (rdtsc() - start < 1000000000); 529 530 atomic_set(&broadcast_counter, 0); 531 532 return counter == ncpus; 533 } 534 535 static void test_physical_broadcast(void) 536 { 537 unsigned ncpus = cpu_count(); 538 unsigned long cr3 = read_cr3(); 539 u32 broadcast_address = enable_x2apic() ? 0xffffffff : 0xff; 540 541 handle_irq(BROADCAST_VECTOR, broadcast_handler); 542 for (int c = 1; c < ncpus; c++) 543 on_cpu(c, update_cr3, (void *)cr3); 544 545 printf("starting broadcast (%s)\n", enable_x2apic() ? "x2apic" : "xapic"); 546 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | 547 BROADCAST_VECTOR, broadcast_address); 548 report(broadcast_received(ncpus), "APIC physical broadcast address"); 549 550 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | 551 BROADCAST_VECTOR | APIC_DEST_ALLINC, 0); 552 report(broadcast_received(ncpus), "APIC physical broadcast shorthand"); 553 } 554 555 static void wait_until_tmcct_common(uint32_t initial_count, bool stop_when_half, bool should_wrap_around) 556 { 557 uint32_t tmcct = apic_read(APIC_TMCCT); 558 559 if (tmcct) { 560 while (tmcct > (initial_count / 2)) 561 tmcct = apic_read(APIC_TMCCT); 562 563 if ( stop_when_half ) 564 return; 565 566 /* Wait until the counter reach 0 or wrap-around */ 567 while ( tmcct <= (initial_count / 2) && tmcct > 0 ) 568 tmcct = apic_read(APIC_TMCCT); 569 570 /* Wait specifically for wrap around to skip 0 TMCCR if we were asked to */ 571 while (should_wrap_around && !tmcct) 572 tmcct = apic_read(APIC_TMCCT); 573 } 574 } 575 576 static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half) 577 { 578 return wait_until_tmcct_common(initial_count, stop_when_half, false); 579 } 580 581 static void wait_until_tmcct_wrap_around(uint32_t initial_count, bool stop_when_half) 582 { 583 return wait_until_tmcct_common(initial_count, stop_when_half, true); 584 } 585 586 static inline void apic_change_mode(unsigned long new_mode) 587 { 588 uint32_t lvtt; 589 590 lvtt = apic_read(APIC_LVTT); 591 apic_write(APIC_LVTT, (lvtt & ~APIC_LVT_TIMER_MASK) | new_mode); 592 } 593 594 static void test_apic_change_mode(void) 595 { 596 uint32_t tmict = 0x999999; 597 598 printf("starting apic change mode\n"); 599 600 apic_write(APIC_TMICT, tmict); 601 602 apic_change_mode(APIC_LVT_TIMER_PERIODIC); 603 604 report(apic_read(APIC_TMICT) == tmict, "TMICT value reset"); 605 606 /* Testing one-shot */ 607 apic_change_mode(APIC_LVT_TIMER_ONESHOT); 608 apic_write(APIC_TMICT, tmict); 609 report(apic_read(APIC_TMCCT), "TMCCT should have a non-zero value"); 610 611 wait_until_tmcct_is_zero(tmict, false); 612 report(!apic_read(APIC_TMCCT), "TMCCT should have reached 0"); 613 614 /* 615 * Write TMICT before changing mode from one-shot to periodic TMCCT should 616 * be reset to TMICT periodicly 617 */ 618 apic_write(APIC_TMICT, tmict); 619 wait_until_tmcct_is_zero(tmict, true); 620 apic_change_mode(APIC_LVT_TIMER_PERIODIC); 621 report(apic_read(APIC_TMCCT), "TMCCT should have a non-zero value"); 622 623 /* 624 * After the change of mode, the counter should not be reset and continue 625 * counting down from where it was 626 */ 627 report(apic_read(APIC_TMCCT) < (tmict / 2), 628 "TMCCT should not be reset to TMICT value"); 629 /* 630 * Specifically wait for timer wrap around and skip 0. 631 * Under KVM lapic there is a possibility that a small amount of consecutive 632 * TMCCR reads return 0 while hrtimer is reset in an async callback 633 */ 634 wait_until_tmcct_wrap_around(tmict, false); 635 report(apic_read(APIC_TMCCT) > (tmict / 2), 636 "TMCCT should be reset to the initial-count"); 637 638 wait_until_tmcct_is_zero(tmict, true); 639 /* 640 * Keep the same TMICT and change timer mode to one-shot 641 * TMCCT should be > 0 and count-down to 0 642 */ 643 apic_change_mode(APIC_LVT_TIMER_ONESHOT); 644 report(apic_read(APIC_TMCCT) < (tmict / 2), 645 "TMCCT should not be reset to init"); 646 wait_until_tmcct_is_zero(tmict, false); 647 report(!apic_read(APIC_TMCCT), "TMCCT should have reach zero"); 648 649 /* now tmcct == 0 and tmict != 0 */ 650 apic_change_mode(APIC_LVT_TIMER_PERIODIC); 651 report(!apic_read(APIC_TMCCT), "TMCCT should stay at zero"); 652 } 653 654 #define KVM_HC_SEND_IPI 10 655 656 static void test_pv_ipi(void) 657 { 658 int ret; 659 unsigned long a0 = 0xFFFFFFFF, a1 = 0, a2 = 0xFFFFFFFF, a3 = 0x0; 660 661 if (!test_device_enabled()) 662 return; 663 664 asm volatile("vmcall" : "=a"(ret) :"a"(KVM_HC_SEND_IPI), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); 665 report(!ret, "PV IPIs testing"); 666 } 667 668 #define APIC_LDR_CLUSTER_FLAG BIT(31) 669 670 static void set_ldr(void *__ldr) 671 { 672 u32 ldr = (unsigned long)__ldr; 673 674 if (ldr & APIC_LDR_CLUSTER_FLAG) 675 apic_write(APIC_DFR, APIC_DFR_CLUSTER); 676 else 677 apic_write(APIC_DFR, APIC_DFR_FLAT); 678 679 apic_write(APIC_LDR, ldr << 24); 680 } 681 682 static int test_fixed_ipi(u32 dest_mode, u8 dest, u8 vector, 683 int nr_ipis_expected, const char *mode_name) 684 { 685 u64 start = rdtsc(); 686 int got; 687 688 atomic_set(&ipi_count, 0); 689 690 /* 691 * Wait for vCPU1 to get back into HLT, i.e. into the host so that 692 * KVM must handle incomplete AVIC IPIs. 693 */ 694 do { 695 pause(); 696 } while (rdtsc() - start < 1000000); 697 698 start = rdtsc(); 699 700 apic_icr_write(dest_mode | APIC_DM_FIXED | vector, dest); 701 702 do { 703 pause(); 704 } while (rdtsc() - start < 1000000000 && 705 atomic_read(&ipi_count) != nr_ipis_expected); 706 707 /* Only report failures to cut down on the spam. */ 708 got = atomic_read(&ipi_count); 709 if (got != nr_ipis_expected) 710 report_fail("Want %d IPI(s) using %s mode, dest = %x, got %d IPI(s)", 711 nr_ipis_expected, mode_name, dest, got); 712 atomic_set(&ipi_count, 0); 713 714 return got == nr_ipis_expected ? 0 : 1; 715 } 716 717 static int test_logical_ipi_single_target(u8 logical_id, bool cluster, u8 dest, 718 u8 vector) 719 { 720 /* Disallow broadcast, there are at least 2 vCPUs. */ 721 if (dest == 0xff) 722 return 0; 723 724 set_ldr((void *)0); 725 on_cpu(1, set_ldr, 726 (void *)((u32)logical_id | (cluster ? APIC_LDR_CLUSTER_FLAG : 0))); 727 return test_fixed_ipi(APIC_DEST_LOGICAL, dest, vector, 1, 728 cluster ? "logical cluster" : "logical flat"); 729 } 730 731 static int test_logical_ipi_multi_target(u8 vcpu0_logical_id, u8 vcpu1_logical_id, 732 bool cluster, u8 dest, u8 vector) 733 { 734 /* Allow broadcast unless there are more than 2 vCPUs. */ 735 if (dest == 0xff && cpu_count() > 2) 736 return 0; 737 738 set_ldr((void *)((u32)vcpu0_logical_id | (cluster ? APIC_LDR_CLUSTER_FLAG : 0))); 739 on_cpu(1, set_ldr, 740 (void *)((u32)vcpu1_logical_id | (cluster ? APIC_LDR_CLUSTER_FLAG : 0))); 741 return test_fixed_ipi(APIC_DEST_LOGICAL, dest, vector, 2, 742 cluster ? "logical cluster" : "logical flat"); 743 } 744 745 static void test_logical_ipi_xapic(void) 746 { 747 int c, i, j, k, f; 748 u8 vector = 0xf1; 749 750 if (cpu_count() < 2) 751 return; 752 753 /* 754 * All vCPUs must be in xAPIC mode, i.e. simply resetting this vCPUs 755 * APIC is not sufficient. 756 */ 757 if (is_x2apic_enabled()) 758 return; 759 760 handle_irq(vector, handle_ipi); 761 762 /* Flat mode. 8 bits for logical IDs (one per bit). */ 763 f = 0; 764 for (i = 0; i < 8; i++) { 765 /* 766 * Test all possible destination values. Non-existent targets 767 * should be ignored. vCPU is always targeted, i.e. should get 768 * an IPI. 769 */ 770 for (k = 0; k < 0xff; k++) { 771 /* 772 * Skip values that overlap the actual target the 773 * resulting combination will be covered by other 774 * numbers in the sequence. 775 */ 776 if (BIT(i) & k) 777 continue; 778 779 f += test_logical_ipi_single_target(BIT(i), false, 780 BIT(i) | k, vector); 781 } 782 } 783 report(!f, "IPI to single target using logical flat mode"); 784 785 /* Cluster mode. 4 bits for the cluster, 4 bits for logical IDs. */ 786 f = 0; 787 for (c = 0; c < 0xf; c++) { 788 for (i = 0; i < 4; i++) { 789 /* Same as above, just fewer bits... */ 790 for (k = 0; k < 0x10; k++) { 791 if (BIT(i) & k) 792 continue; 793 794 test_logical_ipi_single_target(c << 4 | BIT(i), true, 795 c << 4 | BIT(i) | k, vector); 796 } 797 } 798 } 799 report(!f, "IPI to single target using logical cluster mode"); 800 801 /* And now do it all over again targeting both vCPU0 and vCPU1. */ 802 f = 0; 803 for (i = 0; i < 8 && !f; i++) { 804 for (j = 0; j < 8 && !f; j++) { 805 if (i == j) 806 continue; 807 808 for (k = 0; k < 0x100 && !f; k++) { 809 if ((BIT(i) | BIT(j)) & k) 810 continue; 811 812 f += test_logical_ipi_multi_target(BIT(i), BIT(j), false, 813 BIT(i) | BIT(j) | k, vector); 814 if (f) 815 break; 816 f += test_logical_ipi_multi_target(BIT(i) | BIT(j), 817 BIT(i) | BIT(j), false, 818 BIT(i) | BIT(j) | k, vector); 819 } 820 } 821 } 822 report(!f, "IPI to multiple targets using logical flat mode"); 823 824 f = 0; 825 for (c = 0; c < 0xf && !f; c++) { 826 for (i = 0; i < 4 && !f; i++) { 827 for (j = 0; j < 4 && !f; j++) { 828 if (i == j) 829 continue; 830 831 for (k = 0; k < 0x10 && !f; k++) { 832 if ((BIT(i) | BIT(j)) & k) 833 continue; 834 835 f += test_logical_ipi_multi_target(c << 4 | BIT(i), 836 c << 4 | BIT(j), true, 837 c << 4 | BIT(i) | BIT(j) | k, vector); 838 if (f) 839 break; 840 f += test_logical_ipi_multi_target(c << 4 | BIT(i) | BIT(j), 841 c << 4 | BIT(i) | BIT(j), true, 842 c << 4 | BIT(i) | BIT(j) | k, vector); 843 } 844 } 845 } 846 } 847 report(!f, "IPI to multiple targets using logical cluster mode"); 848 } 849 850 static void set_xapic_physical_id(void *apic_id) 851 { 852 apic_write(APIC_ID, (unsigned long)apic_id << 24); 853 } 854 855 static void handle_aliased_ipi(isr_regs_t *regs) 856 { 857 u32 apic_id = apic_read(APIC_ID) >> 24; 858 859 if (apic_id == 0xff) 860 apic_id = smp_id(); 861 else 862 apic_id++; 863 apic_write(APIC_ID, (unsigned long)apic_id << 24); 864 865 /* 866 * Handle the IPI after updating the APIC ID, as the IPI count acts as 867 * synchronization barrier before vCPU0 sends the next IPI. 868 */ 869 handle_ipi(regs); 870 } 871 872 static void test_aliased_xapic_physical_ipi(void) 873 { 874 u8 vector = 0xf1; 875 int i, f; 876 877 if (cpu_count() < 2) 878 return; 879 880 /* 881 * All vCPUs must be in xAPIC mode, i.e. simply resetting this vCPUs 882 * APIC is not sufficient. 883 */ 884 if (is_x2apic_enabled()) 885 return; 886 887 /* 888 * By default, KVM doesn't follow the x86 APIC architecture for aliased 889 * APIC IDs if userspace has enabled KVM_X2APIC_API_USE_32BIT_IDS. 890 * If x2APIC is supported, assume the userspace VMM has enabled 32-bit 891 * IDs and thus activated KVM's quirk. Delete this code to run the 892 * aliasing test on x2APIC CPUs, e.g. to run it on bare metal. 893 */ 894 if (this_cpu_has(X86_FEATURE_X2APIC)) 895 return; 896 897 handle_irq(vector, handle_aliased_ipi); 898 899 /* 900 * Set both vCPU0 and vCPU1's APIC IDs to 0, then start the chain 901 * reaction of IPIs from APIC ID 0..255. Each vCPU will increment its 902 * APIC ID in the handler, and then "reset" to its original ID (using 903 * smp_id()) after the last IPI. Using on_cpu() to set vCPU1's ID 904 * after this point won't work due to on_cpu() using physical mode. 905 */ 906 on_cpu(1, set_xapic_physical_id, (void *)0ul); 907 set_xapic_physical_id((void *)0ul); 908 909 f = 0; 910 for (i = 0; i < 0x100; i++) 911 f += test_fixed_ipi(APIC_DEST_PHYSICAL, i, vector, 2, "physical"); 912 913 report(!f, "IPI to aliased xAPIC physical IDs"); 914 } 915 916 typedef void (*apic_test_fn)(void); 917 918 int main(void) 919 { 920 bool is_x2apic = is_x2apic_enabled(); 921 u32 spiv = apic_read(APIC_SPIV); 922 int i; 923 924 const apic_test_fn tests[] = { 925 test_lapic_existence, 926 927 test_apic_disable, 928 test_enable_x2apic, 929 930 test_self_ipi_xapic, 931 test_self_ipi_x2apic, 932 test_physical_broadcast, 933 test_logical_ipi_xapic, 934 935 test_pv_ipi, 936 937 test_sti_nmi, 938 test_multiple_nmi, 939 test_pending_nmi, 940 941 test_apic_timer_one_shot, 942 test_apic_change_mode, 943 test_tsc_deadline_timer, 944 945 /* 946 * KVM may disable APICv if the APIC ID and/or APIC_BASE is 947 * modified, keep these tests at the end so that the test as a 948 * whole provides coverage for APICv (when it's enabled). 949 */ 950 test_apic_id, 951 test_apicbase, 952 test_aliased_xapic_physical_ipi, 953 }; 954 955 assert_msg(is_apic_hw_enabled() && is_apic_sw_enabled(), 956 "APIC should be fully enabled by startup code."); 957 958 setup_vm(); 959 960 mask_pic_interrupts(); 961 irq_enable(); 962 963 for (i = 0; i < ARRAY_SIZE(tests); i++) { 964 tests[i](); 965 966 if (is_x2apic) 967 enable_x2apic(); 968 else 969 reset_apic(); 970 971 apic_write(APIC_SPIV, spiv); 972 } 973 974 return report_summary(); 975 } 976