1 #include "libcflat.h" 2 #include "apic.h" 3 #include "vm.h" 4 #include "smp.h" 5 #include "desc.h" 6 #include "isr.h" 7 #include "msr.h" 8 #include "atomic.h" 9 #include "fwcfg.h" 10 11 #define MAX_TPR 0xf 12 13 static void test_lapic_existence(void) 14 { 15 u8 version; 16 17 version = (u8)apic_read(APIC_LVR); 18 printf("apic version: %x\n", version); 19 report(version >= 0x10 && version <= 0x15, "apic existence"); 20 } 21 22 #define TSC_DEADLINE_TIMER_VECTOR 0xef 23 #define BROADCAST_VECTOR 0xcf 24 25 static int tdt_count; 26 27 static void tsc_deadline_timer_isr(isr_regs_t *regs) 28 { 29 ++tdt_count; 30 eoi(); 31 } 32 33 static void __test_tsc_deadline_timer(void) 34 { 35 handle_irq(TSC_DEADLINE_TIMER_VECTOR, tsc_deadline_timer_isr); 36 37 wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)); 38 asm volatile ("nop"); 39 report(tdt_count == 1, "tsc deadline timer"); 40 report(rdmsr(MSR_IA32_TSCDEADLINE) == 0, "tsc deadline timer clearing"); 41 } 42 43 static int enable_tsc_deadline_timer(void) 44 { 45 uint32_t lvtt; 46 47 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { 48 lvtt = APIC_LVT_TIMER_TSCDEADLINE | TSC_DEADLINE_TIMER_VECTOR; 49 apic_write(APIC_LVTT, lvtt); 50 return 1; 51 } else { 52 return 0; 53 } 54 } 55 56 static void test_tsc_deadline_timer(void) 57 { 58 if(enable_tsc_deadline_timer()) 59 __test_tsc_deadline_timer(); 60 else 61 report_skip("tsc deadline timer not detected"); 62 } 63 64 static void do_write_apicbase(void *data) 65 { 66 wrmsr(MSR_IA32_APICBASE, *(u64 *)data); 67 } 68 69 static bool test_write_apicbase_exception(u64 data) 70 { 71 return test_for_exception(GP_VECTOR, do_write_apicbase, &data); 72 } 73 74 static void test_enable_x2apic(void) 75 { 76 u64 apicbase = rdmsr(MSR_IA32_APICBASE); 77 78 if (enable_x2apic()) { 79 printf("x2apic enabled\n"); 80 81 apicbase &= ~(APIC_EN | APIC_EXTD); 82 report(test_write_apicbase_exception(apicbase | APIC_EXTD), 83 "x2apic enabled to invalid state"); 84 report(test_write_apicbase_exception(apicbase | APIC_EN), 85 "x2apic enabled to apic enabled"); 86 87 report(!test_write_apicbase_exception(apicbase | 0), 88 "x2apic enabled to disabled state"); 89 report(test_write_apicbase_exception(apicbase | APIC_EXTD), 90 "disabled to invalid state"); 91 report(test_write_apicbase_exception(apicbase | APIC_EN | APIC_EXTD), 92 "disabled to x2apic enabled"); 93 94 report(!test_write_apicbase_exception(apicbase | APIC_EN), 95 "apic disabled to apic enabled"); 96 report(test_write_apicbase_exception(apicbase | APIC_EXTD), 97 "apic enabled to invalid state"); 98 } else { 99 printf("x2apic not detected\n"); 100 101 report(test_write_apicbase_exception(APIC_EN | APIC_EXTD), 102 "enable unsupported x2apic"); 103 } 104 } 105 106 static void verify_disabled_apic_mmio(void) 107 { 108 volatile u32 *lvr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_LVR); 109 volatile u32 *tpr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_TASKPRI); 110 u32 cr8 = read_cr8(); 111 112 memset((void *)APIC_DEFAULT_PHYS_BASE, 0xff, PAGE_SIZE); 113 report(*lvr == ~0, "*0xfee00030: %x", *lvr); 114 report(read_cr8() == cr8, "CR8: %lx", read_cr8()); 115 write_cr8(cr8 ^ MAX_TPR); 116 report(read_cr8() == (cr8 ^ MAX_TPR), "CR8: %lx", read_cr8()); 117 report(*tpr == ~0, "*0xfee00080: %x", *tpr); 118 write_cr8(cr8); 119 } 120 121 static void test_apic_disable(void) 122 { 123 volatile u32 *lvr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_LVR); 124 volatile u32 *tpr = (volatile u32 *)(APIC_DEFAULT_PHYS_BASE + APIC_TASKPRI); 125 u32 apic_version = apic_read(APIC_LVR); 126 u32 cr8 = read_cr8(); 127 128 report_prefix_push("apic_disable"); 129 130 disable_apic(); 131 report(!is_apic_hw_enabled(), "Local apic disabled"); 132 report(!this_cpu_has(X86_FEATURE_APIC), 133 "CPUID.1H:EDX.APIC[bit 9] is clear"); 134 verify_disabled_apic_mmio(); 135 136 reset_apic(); 137 report(is_xapic_enabled(), "Local apic enabled in xAPIC mode"); 138 report(this_cpu_has(X86_FEATURE_APIC), "CPUID.1H:EDX.APIC[bit 9] is set"); 139 report(*lvr == apic_version, "*0xfee00030: %x", *lvr); 140 report(*tpr == cr8, "*0xfee00080: %x", *tpr); 141 write_cr8(cr8 ^ MAX_TPR); 142 report(*tpr == (cr8 ^ MAX_TPR) << 4, "*0xfee00080: %x", *tpr); 143 write_cr8(cr8); 144 145 if (enable_x2apic()) { 146 report(is_x2apic_enabled(), "Local apic enabled in x2APIC mode"); 147 report(this_cpu_has(X86_FEATURE_APIC), 148 "CPUID.1H:EDX.APIC[bit 9] is set"); 149 verify_disabled_apic_mmio(); 150 } 151 report_prefix_pop(); 152 } 153 154 #define ALTERNATE_APIC_BASE 0xfed40000 155 156 static void test_apicbase(void) 157 { 158 u64 orig_apicbase = rdmsr(MSR_IA32_APICBASE); 159 u32 lvr = apic_read(APIC_LVR); 160 u64 value; 161 162 wrmsr(MSR_IA32_APICBASE, orig_apicbase & ~(APIC_EN | APIC_EXTD)); 163 wrmsr(MSR_IA32_APICBASE, ALTERNATE_APIC_BASE | APIC_BSP | APIC_EN); 164 165 report_prefix_push("apicbase"); 166 167 report(*(volatile u32 *)(ALTERNATE_APIC_BASE + APIC_LVR) == lvr, 168 "relocate apic"); 169 170 value = orig_apicbase | (1UL << cpuid_maxphyaddr()); 171 report(test_for_exception(GP_VECTOR, do_write_apicbase, &value), 172 "reserved physaddr bits"); 173 174 value = orig_apicbase | 1; 175 report(test_for_exception(GP_VECTOR, do_write_apicbase, &value), 176 "reserved low bits"); 177 178 /* Restore the APIC address, the "reset" helpers leave it as is. */ 179 wrmsr(MSR_IA32_APICBASE, orig_apicbase); 180 181 report_prefix_pop(); 182 } 183 184 static void do_write_apic_id(void *id) 185 { 186 apic_write(APIC_ID, *(u32 *)id); 187 } 188 189 static void __test_apic_id(void * unused) 190 { 191 u32 id, newid; 192 u8 initial_xapic_id = cpuid(1).b >> 24; 193 u32 initial_x2apic_id = cpuid(0xb).d; 194 bool x2apic_mode = is_x2apic_enabled(); 195 196 if (x2apic_mode) 197 reset_apic(); 198 199 id = apic_id(); 200 report(initial_xapic_id == id, "xapic id matches cpuid"); 201 202 newid = (id + 1) << 24; 203 report(!test_for_exception(GP_VECTOR, do_write_apic_id, &newid) && 204 (id == apic_id() || id + 1 == apic_id()), 205 "writeable xapic id"); 206 207 if (!enable_x2apic()) 208 goto out; 209 210 report(test_for_exception(GP_VECTOR, do_write_apic_id, &newid), 211 "non-writeable x2apic id"); 212 report(initial_xapic_id == (apic_id() & 0xff), "sane x2apic id"); 213 214 /* old QEMUs do not set initial x2APIC ID */ 215 report(initial_xapic_id == (initial_x2apic_id & 0xff) && 216 initial_x2apic_id == apic_id(), 217 "x2apic id matches cpuid"); 218 219 out: 220 reset_apic(); 221 222 report(initial_xapic_id == apic_id(), "correct xapic id after reset"); 223 224 /* old KVMs do not reset xAPIC ID */ 225 if (id != apic_id()) 226 apic_write(APIC_ID, id << 24); 227 228 if (x2apic_mode) 229 enable_x2apic(); 230 } 231 232 static void test_apic_id(void) 233 { 234 if (cpu_count() < 2) 235 return; 236 237 on_cpu(1, __test_apic_id, NULL); 238 } 239 240 static atomic_t ipi_count; 241 242 static void handle_ipi(isr_regs_t *regs) 243 { 244 atomic_inc(&ipi_count); 245 eoi(); 246 } 247 248 static void __test_self_ipi(void) 249 { 250 u64 start = rdtsc(); 251 int vec = 0xf1; 252 253 handle_irq(vec, handle_ipi); 254 apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | vec, 255 id_map[0]); 256 257 do { 258 pause(); 259 } while (rdtsc() - start < 1000000000 && atomic_read(&ipi_count) == 0); 260 } 261 262 static void test_self_ipi_xapic(void) 263 { 264 report_prefix_push("self_ipi_xapic"); 265 266 /* Reset to xAPIC mode. */ 267 reset_apic(); 268 report(is_xapic_enabled(), "Local apic enabled in xAPIC mode"); 269 270 atomic_set(&ipi_count, 0); 271 __test_self_ipi(); 272 report(atomic_read(&ipi_count) == 1, "self ipi"); 273 274 report_prefix_pop(); 275 } 276 277 static void test_self_ipi_x2apic(void) 278 { 279 report_prefix_push("self_ipi_x2apic"); 280 281 if (enable_x2apic()) { 282 report(is_x2apic_enabled(), "Local apic enabled in x2APIC mode"); 283 284 atomic_set(&ipi_count, 0); 285 __test_self_ipi(); 286 report(atomic_read(&ipi_count) == 1, "self ipi"); 287 } else { 288 report_skip("x2apic not detected"); 289 } 290 291 report_prefix_pop(); 292 } 293 294 volatile int nmi_counter_private, nmi_counter, nmi_hlt_counter, sti_loop_active; 295 296 static void test_sti_nop(char *p) 297 { 298 asm volatile ( 299 ".globl post_sti \n\t" 300 "sti \n" 301 /* 302 * vmx won't exit on external interrupt if blocked-by-sti, 303 * so give it a reason to exit by accessing an unmapped page. 304 */ 305 "post_sti: testb $0, %0 \n\t" 306 "nop \n\t" 307 "cli" 308 : : "m"(*p) 309 ); 310 nmi_counter = nmi_counter_private; 311 } 312 313 static void sti_loop(void *ignore) 314 { 315 unsigned k = 0; 316 317 while (sti_loop_active) 318 test_sti_nop((char *)(ulong)((k++ * 4096) % (128 * 1024 * 1024))); 319 } 320 321 static void nmi_handler(isr_regs_t *regs) 322 { 323 extern void post_sti(void); 324 ++nmi_counter_private; 325 nmi_hlt_counter += regs->rip == (ulong)post_sti; 326 } 327 328 static void test_sti_nmi(void) 329 { 330 unsigned old_counter; 331 332 if (cpu_count() < 2) 333 return; 334 335 handle_irq(2, nmi_handler); 336 on_cpu(1, update_cr3, (void *)read_cr3()); 337 338 sti_loop_active = 1; 339 on_cpu_async(1, sti_loop, 0); 340 while (nmi_counter < 30000) { 341 old_counter = nmi_counter; 342 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[1]); 343 while (nmi_counter == old_counter) 344 ; 345 } 346 sti_loop_active = 0; 347 report(nmi_hlt_counter == 0, "nmi-after-sti"); 348 } 349 350 static volatile bool nmi_done, nmi_flushed; 351 static volatile int nmi_received; 352 static volatile int cpu0_nmi_ctr1, cpu1_nmi_ctr1; 353 static volatile int cpu0_nmi_ctr2, cpu1_nmi_ctr2; 354 355 static void multiple_nmi_handler(isr_regs_t *regs) 356 { 357 ++nmi_received; 358 } 359 360 static void kick_me_nmi(void *blah) 361 { 362 while (!nmi_done) { 363 ++cpu1_nmi_ctr1; 364 while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1 && !nmi_done) 365 pause(); 366 367 if (nmi_done) 368 return; 369 370 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]); 371 /* make sure the NMI has arrived by sending an IPI after it */ 372 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT 373 | 0x44, id_map[0]); 374 ++cpu1_nmi_ctr2; 375 while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2 && !nmi_done) 376 pause(); 377 } 378 } 379 380 static void flush_nmi(isr_regs_t *regs) 381 { 382 nmi_flushed = true; 383 apic_write(APIC_EOI, 0); 384 } 385 386 static void test_multiple_nmi(void) 387 { 388 int i; 389 bool ok = true; 390 391 if (cpu_count() < 2) 392 return; 393 394 sti(); 395 handle_irq(2, multiple_nmi_handler); 396 handle_irq(0x44, flush_nmi); 397 on_cpu_async(1, kick_me_nmi, 0); 398 for (i = 0; i < 100000; ++i) { 399 nmi_flushed = false; 400 nmi_received = 0; 401 ++cpu0_nmi_ctr1; 402 while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1) 403 pause(); 404 405 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]); 406 while (!nmi_flushed) 407 pause(); 408 409 if (nmi_received != 2) { 410 ok = false; 411 break; 412 } 413 414 ++cpu0_nmi_ctr2; 415 while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2) 416 pause(); 417 } 418 nmi_done = true; 419 report(ok, "multiple nmi"); 420 } 421 422 static void pending_nmi_handler(isr_regs_t *regs) 423 { 424 int i; 425 426 if (++nmi_received == 1) { 427 for (i = 0; i < 10; ++i) 428 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI, 0); 429 } 430 } 431 432 static void test_pending_nmi(void) 433 { 434 int i; 435 436 handle_irq(2, pending_nmi_handler); 437 for (i = 0; i < 100000; ++i) { 438 nmi_received = 0; 439 440 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI, 0); 441 while (nmi_received < 2) 442 pause(); 443 444 if (nmi_received != 2) 445 break; 446 } 447 report(nmi_received == 2, "pending nmi"); 448 } 449 450 static volatile int lvtt_counter = 0; 451 452 static void lvtt_handler(isr_regs_t *regs) 453 { 454 lvtt_counter++; 455 eoi(); 456 } 457 458 static void test_apic_timer_one_shot(void) 459 { 460 uint64_t tsc1, tsc2; 461 static const uint32_t interval = 0x10000; 462 463 #define APIC_LVT_TIMER_VECTOR (0xee) 464 465 handle_irq(APIC_LVT_TIMER_VECTOR, lvtt_handler); 466 467 /* One shot mode */ 468 apic_write(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | 469 APIC_LVT_TIMER_VECTOR); 470 /* Divider == 1 */ 471 apic_write(APIC_TDCR, 0x0000000b); 472 473 tsc1 = rdtsc(); 474 /* Set "Initial Counter Register", which starts the timer */ 475 apic_write(APIC_TMICT, interval); 476 while (!lvtt_counter); 477 tsc2 = rdtsc(); 478 479 /* 480 * For LVT Timer clock, SDM vol 3 10.5.4 says it should be 481 * derived from processor's bus clock (IIUC which is the same 482 * as TSC), however QEMU seems to be using nanosecond. In all 483 * cases, the following should satisfy on all modern 484 * processors. 485 */ 486 report((lvtt_counter == 1) && (tsc2 - tsc1 >= interval), 487 "APIC LVT timer one shot"); 488 } 489 490 static atomic_t broadcast_counter; 491 492 static void broadcast_handler(isr_regs_t *regs) 493 { 494 atomic_inc(&broadcast_counter); 495 eoi(); 496 } 497 498 static bool broadcast_received(unsigned ncpus) 499 { 500 unsigned counter; 501 u64 start = rdtsc(); 502 503 do { 504 counter = atomic_read(&broadcast_counter); 505 if (counter >= ncpus) 506 break; 507 pause(); 508 } while (rdtsc() - start < 1000000000); 509 510 atomic_set(&broadcast_counter, 0); 511 512 return counter == ncpus; 513 } 514 515 static void test_physical_broadcast(void) 516 { 517 unsigned ncpus = cpu_count(); 518 unsigned long cr3 = read_cr3(); 519 u32 broadcast_address = enable_x2apic() ? 0xffffffff : 0xff; 520 521 handle_irq(BROADCAST_VECTOR, broadcast_handler); 522 for (int c = 1; c < ncpus; c++) 523 on_cpu(c, update_cr3, (void *)cr3); 524 525 printf("starting broadcast (%s)\n", enable_x2apic() ? "x2apic" : "xapic"); 526 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | 527 BROADCAST_VECTOR, broadcast_address); 528 report(broadcast_received(ncpus), "APIC physical broadcast address"); 529 530 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT | 531 BROADCAST_VECTOR | APIC_DEST_ALLINC, 0); 532 report(broadcast_received(ncpus), "APIC physical broadcast shorthand"); 533 } 534 535 static void wait_until_tmcct_common(uint32_t initial_count, bool stop_when_half, bool should_wrap_around) 536 { 537 uint32_t tmcct = apic_read(APIC_TMCCT); 538 539 if (tmcct) { 540 while (tmcct > (initial_count / 2)) 541 tmcct = apic_read(APIC_TMCCT); 542 543 if ( stop_when_half ) 544 return; 545 546 /* Wait until the counter reach 0 or wrap-around */ 547 while ( tmcct <= (initial_count / 2) && tmcct > 0 ) 548 tmcct = apic_read(APIC_TMCCT); 549 550 /* Wait specifically for wrap around to skip 0 TMCCR if we were asked to */ 551 while (should_wrap_around && !tmcct) 552 tmcct = apic_read(APIC_TMCCT); 553 } 554 } 555 556 static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half) 557 { 558 return wait_until_tmcct_common(initial_count, stop_when_half, false); 559 } 560 561 static void wait_until_tmcct_wrap_around(uint32_t initial_count, bool stop_when_half) 562 { 563 return wait_until_tmcct_common(initial_count, stop_when_half, true); 564 } 565 566 static inline void apic_change_mode(unsigned long new_mode) 567 { 568 uint32_t lvtt; 569 570 lvtt = apic_read(APIC_LVTT); 571 apic_write(APIC_LVTT, (lvtt & ~APIC_LVT_TIMER_MASK) | new_mode); 572 } 573 574 static void test_apic_change_mode(void) 575 { 576 uint32_t tmict = 0x999999; 577 578 printf("starting apic change mode\n"); 579 580 apic_write(APIC_TMICT, tmict); 581 582 apic_change_mode(APIC_LVT_TIMER_PERIODIC); 583 584 report(apic_read(APIC_TMICT) == tmict, "TMICT value reset"); 585 586 /* Testing one-shot */ 587 apic_change_mode(APIC_LVT_TIMER_ONESHOT); 588 apic_write(APIC_TMICT, tmict); 589 report(apic_read(APIC_TMCCT), "TMCCT should have a non-zero value"); 590 591 wait_until_tmcct_is_zero(tmict, false); 592 report(!apic_read(APIC_TMCCT), "TMCCT should have reached 0"); 593 594 /* 595 * Write TMICT before changing mode from one-shot to periodic TMCCT should 596 * be reset to TMICT periodicly 597 */ 598 apic_write(APIC_TMICT, tmict); 599 wait_until_tmcct_is_zero(tmict, true); 600 apic_change_mode(APIC_LVT_TIMER_PERIODIC); 601 report(apic_read(APIC_TMCCT), "TMCCT should have a non-zero value"); 602 603 /* 604 * After the change of mode, the counter should not be reset and continue 605 * counting down from where it was 606 */ 607 report(apic_read(APIC_TMCCT) < (tmict / 2), 608 "TMCCT should not be reset to TMICT value"); 609 /* 610 * Specifically wait for timer wrap around and skip 0. 611 * Under KVM lapic there is a possibility that a small amount of consecutive 612 * TMCCR reads return 0 while hrtimer is reset in an async callback 613 */ 614 wait_until_tmcct_wrap_around(tmict, false); 615 report(apic_read(APIC_TMCCT) > (tmict / 2), 616 "TMCCT should be reset to the initial-count"); 617 618 wait_until_tmcct_is_zero(tmict, true); 619 /* 620 * Keep the same TMICT and change timer mode to one-shot 621 * TMCCT should be > 0 and count-down to 0 622 */ 623 apic_change_mode(APIC_LVT_TIMER_ONESHOT); 624 report(apic_read(APIC_TMCCT) < (tmict / 2), 625 "TMCCT should not be reset to init"); 626 wait_until_tmcct_is_zero(tmict, false); 627 report(!apic_read(APIC_TMCCT), "TMCCT should have reach zero"); 628 629 /* now tmcct == 0 and tmict != 0 */ 630 apic_change_mode(APIC_LVT_TIMER_PERIODIC); 631 report(!apic_read(APIC_TMCCT), "TMCCT should stay at zero"); 632 } 633 634 #define KVM_HC_SEND_IPI 10 635 636 static void test_pv_ipi(void) 637 { 638 int ret; 639 unsigned long a0 = 0xFFFFFFFF, a1 = 0, a2 = 0xFFFFFFFF, a3 = 0x0; 640 641 if (!test_device_enabled()) 642 return; 643 644 asm volatile("vmcall" : "=a"(ret) :"a"(KVM_HC_SEND_IPI), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); 645 report(!ret, "PV IPIs testing"); 646 } 647 648 #define APIC_LDR_CLUSTER_FLAG BIT(31) 649 650 static void set_ldr(void *__ldr) 651 { 652 u32 ldr = (unsigned long)__ldr; 653 654 if (ldr & APIC_LDR_CLUSTER_FLAG) 655 apic_write(APIC_DFR, APIC_DFR_CLUSTER); 656 else 657 apic_write(APIC_DFR, APIC_DFR_FLAT); 658 659 apic_write(APIC_LDR, ldr << 24); 660 } 661 662 static int test_fixed_ipi(u32 dest_mode, u8 dest, u8 vector, 663 int nr_ipis_expected, const char *mode_name) 664 { 665 u64 start = rdtsc(); 666 int got; 667 668 atomic_set(&ipi_count, 0); 669 670 /* 671 * Wait for vCPU1 to get back into HLT, i.e. into the host so that 672 * KVM must handle incomplete AVIC IPIs. 673 */ 674 do { 675 pause(); 676 } while (rdtsc() - start < 1000000); 677 678 start = rdtsc(); 679 680 apic_icr_write(dest_mode | APIC_DM_FIXED | vector, dest); 681 682 do { 683 pause(); 684 } while (rdtsc() - start < 1000000000 && 685 atomic_read(&ipi_count) != nr_ipis_expected); 686 687 /* Only report failures to cut down on the spam. */ 688 got = atomic_read(&ipi_count); 689 if (got != nr_ipis_expected) 690 report_fail("Want %d IPI(s) using %s mode, dest = %x, got %d IPI(s)", 691 nr_ipis_expected, mode_name, dest, got); 692 atomic_set(&ipi_count, 0); 693 694 return got == nr_ipis_expected ? 0 : 1; 695 } 696 697 static int test_logical_ipi_single_target(u8 logical_id, bool cluster, u8 dest, 698 u8 vector) 699 { 700 /* Disallow broadcast, there are at least 2 vCPUs. */ 701 if (dest == 0xff) 702 return 0; 703 704 set_ldr((void *)0); 705 on_cpu(1, set_ldr, 706 (void *)((u32)logical_id | (cluster ? APIC_LDR_CLUSTER_FLAG : 0))); 707 return test_fixed_ipi(APIC_DEST_LOGICAL, dest, vector, 1, 708 cluster ? "logical cluster" : "logical flat"); 709 } 710 711 static int test_logical_ipi_multi_target(u8 vcpu0_logical_id, u8 vcpu1_logical_id, 712 bool cluster, u8 dest, u8 vector) 713 { 714 /* Allow broadcast unless there are more than 2 vCPUs. */ 715 if (dest == 0xff && cpu_count() > 2) 716 return 0; 717 718 set_ldr((void *)((u32)vcpu0_logical_id | (cluster ? APIC_LDR_CLUSTER_FLAG : 0))); 719 on_cpu(1, set_ldr, 720 (void *)((u32)vcpu1_logical_id | (cluster ? APIC_LDR_CLUSTER_FLAG : 0))); 721 return test_fixed_ipi(APIC_DEST_LOGICAL, dest, vector, 2, 722 cluster ? "logical cluster" : "logical flat"); 723 } 724 725 static void test_logical_ipi_xapic(void) 726 { 727 int c, i, j, k, f; 728 u8 vector = 0xf1; 729 730 if (cpu_count() < 2) 731 return; 732 733 /* 734 * All vCPUs must be in xAPIC mode, i.e. simply resetting this vCPUs 735 * APIC is not sufficient. 736 */ 737 if (is_x2apic_enabled()) 738 return; 739 740 handle_irq(vector, handle_ipi); 741 742 /* Flat mode. 8 bits for logical IDs (one per bit). */ 743 f = 0; 744 for (i = 0; i < 8; i++) { 745 /* 746 * Test all possible destination values. Non-existent targets 747 * should be ignored. vCPU is always targeted, i.e. should get 748 * an IPI. 749 */ 750 for (k = 0; k < 0xff; k++) { 751 /* 752 * Skip values that overlap the actual target the 753 * resulting combination will be covered by other 754 * numbers in the sequence. 755 */ 756 if (BIT(i) & k) 757 continue; 758 759 f += test_logical_ipi_single_target(BIT(i), false, 760 BIT(i) | k, vector); 761 } 762 } 763 report(!f, "IPI to single target using logical flat mode"); 764 765 /* Cluster mode. 4 bits for the cluster, 4 bits for logical IDs. */ 766 f = 0; 767 for (c = 0; c < 0xf; c++) { 768 for (i = 0; i < 4; i++) { 769 /* Same as above, just fewer bits... */ 770 for (k = 0; k < 0x10; k++) { 771 if (BIT(i) & k) 772 continue; 773 774 test_logical_ipi_single_target(c << 4 | BIT(i), true, 775 c << 4 | BIT(i) | k, vector); 776 } 777 } 778 } 779 report(!f, "IPI to single target using logical cluster mode"); 780 781 /* And now do it all over again targeting both vCPU0 and vCPU1. */ 782 f = 0; 783 for (i = 0; i < 8 && !f; i++) { 784 for (j = 0; j < 8 && !f; j++) { 785 if (i == j) 786 continue; 787 788 for (k = 0; k < 0x100 && !f; k++) { 789 if ((BIT(i) | BIT(j)) & k) 790 continue; 791 792 f += test_logical_ipi_multi_target(BIT(i), BIT(j), false, 793 BIT(i) | BIT(j) | k, vector); 794 if (f) 795 break; 796 f += test_logical_ipi_multi_target(BIT(i) | BIT(j), 797 BIT(i) | BIT(j), false, 798 BIT(i) | BIT(j) | k, vector); 799 } 800 } 801 } 802 report(!f, "IPI to multiple targets using logical flat mode"); 803 804 f = 0; 805 for (c = 0; c < 0xf && !f; c++) { 806 for (i = 0; i < 4 && !f; i++) { 807 for (j = 0; j < 4 && !f; j++) { 808 if (i == j) 809 continue; 810 811 for (k = 0; k < 0x10 && !f; k++) { 812 if ((BIT(i) | BIT(j)) & k) 813 continue; 814 815 f += test_logical_ipi_multi_target(c << 4 | BIT(i), 816 c << 4 | BIT(j), true, 817 c << 4 | BIT(i) | BIT(j) | k, vector); 818 if (f) 819 break; 820 f += test_logical_ipi_multi_target(c << 4 | BIT(i) | BIT(j), 821 c << 4 | BIT(i) | BIT(j), true, 822 c << 4 | BIT(i) | BIT(j) | k, vector); 823 } 824 } 825 } 826 } 827 report(!f, "IPI to multiple targets using logical cluster mode"); 828 } 829 830 static void set_xapic_physical_id(void *apic_id) 831 { 832 apic_write(APIC_ID, (unsigned long)apic_id << 24); 833 } 834 835 static void handle_aliased_ipi(isr_regs_t *regs) 836 { 837 u32 apic_id = apic_read(APIC_ID) >> 24; 838 839 if (apic_id == 0xff) 840 apic_id = smp_id(); 841 else 842 apic_id++; 843 apic_write(APIC_ID, (unsigned long)apic_id << 24); 844 845 /* 846 * Handle the IPI after updating the APIC ID, as the IPI count acts as 847 * synchronization barrier before vCPU0 sends the next IPI. 848 */ 849 handle_ipi(regs); 850 } 851 852 static void test_aliased_xapic_physical_ipi(void) 853 { 854 u8 vector = 0xf1; 855 int i, f; 856 857 if (cpu_count() < 2) 858 return; 859 860 /* 861 * All vCPUs must be in xAPIC mode, i.e. simply resetting this vCPUs 862 * APIC is not sufficient. 863 */ 864 if (is_x2apic_enabled()) 865 return; 866 867 /* 868 * By default, KVM doesn't follow the x86 APIC architecture for aliased 869 * APIC IDs if userspace has enabled KVM_X2APIC_API_USE_32BIT_IDS. 870 * If x2APIC is supported, assume the userspace VMM has enabled 32-bit 871 * IDs and thus activated KVM's quirk. Delete this code to run the 872 * aliasing test on x2APIC CPUs, e.g. to run it on bare metal. 873 */ 874 if (this_cpu_has(X86_FEATURE_X2APIC)) 875 return; 876 877 handle_irq(vector, handle_aliased_ipi); 878 879 /* 880 * Set both vCPU0 and vCPU1's APIC IDs to 0, then start the chain 881 * reaction of IPIs from APIC ID 0..255. Each vCPU will increment its 882 * APIC ID in the handler, and then "reset" to its original ID (using 883 * smp_id()) after the last IPI. Using on_cpu() to set vCPU1's ID 884 * after this point won't work due to on_cpu() using physical mode. 885 */ 886 on_cpu(1, set_xapic_physical_id, (void *)0ul); 887 set_xapic_physical_id((void *)0ul); 888 889 f = 0; 890 for (i = 0; i < 0x100; i++) 891 f += test_fixed_ipi(APIC_DEST_PHYSICAL, i, vector, 2, "physical"); 892 893 report(!f, "IPI to aliased xAPIC physical IDs"); 894 } 895 896 typedef void (*apic_test_fn)(void); 897 898 int main(void) 899 { 900 bool is_x2apic = is_x2apic_enabled(); 901 u32 spiv = apic_read(APIC_SPIV); 902 int i; 903 904 const apic_test_fn tests[] = { 905 test_lapic_existence, 906 907 test_apic_disable, 908 test_enable_x2apic, 909 910 test_self_ipi_xapic, 911 test_self_ipi_x2apic, 912 test_physical_broadcast, 913 test_logical_ipi_xapic, 914 915 test_pv_ipi, 916 917 test_sti_nmi, 918 test_multiple_nmi, 919 test_pending_nmi, 920 921 test_apic_timer_one_shot, 922 test_apic_change_mode, 923 test_tsc_deadline_timer, 924 925 /* 926 * KVM may disable APICv if the APIC ID and/or APIC_BASE is 927 * modified, keep these tests at the end so that the test as a 928 * whole provides coverage for APICv (when it's enabled). 929 */ 930 test_apic_id, 931 test_apicbase, 932 test_aliased_xapic_physical_ipi, 933 }; 934 935 assert_msg(is_apic_hw_enabled() && is_apic_sw_enabled(), 936 "APIC should be fully enabled by startup code."); 937 938 setup_vm(); 939 940 mask_pic_interrupts(); 941 sti(); 942 943 for (i = 0; i < ARRAY_SIZE(tests); i++) { 944 tests[i](); 945 946 if (is_x2apic) 947 enable_x2apic(); 948 else 949 reset_apic(); 950 951 apic_write(APIC_SPIV, spiv); 952 } 953 954 return report_summary(); 955 } 956