17d36db35SAvi Kivity
27d36db35SAvi Kivity #include <libcflat.h>
3d36b378fSVarad Gautam
4d36b378fSVarad Gautam #include <asm/barrier.h>
5d36b378fSVarad Gautam
6b73c7c6eSAndrew Jones #include "processor.h"
7b73c7c6eSAndrew Jones #include "atomic.h"
87d36db35SAvi Kivity #include "smp.h"
97d36db35SAvi Kivity #include "apic.h"
107d36db35SAvi Kivity #include "fwcfg.h"
11a4f9d79dSJan Kiszka #include "desc.h"
12d36b378fSVarad Gautam #include "alloc_page.h"
13d36b378fSVarad Gautam #include "asm/page.h"
147d36db35SAvi Kivity
157d36db35SAvi Kivity #define IPI_VECTOR 0x20
167d36db35SAvi Kivity
17fa816346SAvi Kivity typedef void (*ipi_function_type)(void *data);
18fa816346SAvi Kivity
197d36db35SAvi Kivity static struct spinlock ipi_lock;
20fa816346SAvi Kivity static volatile ipi_function_type ipi_function;
21a4f9d79dSJan Kiszka static void *volatile ipi_data;
227d36db35SAvi Kivity static volatile int ipi_done;
23e7bc5602SAvi Kivity static volatile bool ipi_wait;
24e7bc5602SAvi Kivity static int _cpu_count;
25b73c7c6eSAndrew Jones static atomic_t active_cpus;
26d36b378fSVarad Gautam extern u8 rm_trampoline, rm_trampoline_end;
271542cd7bSVarad Gautam #if defined(__i386__) || defined(CONFIG_EFI)
28d36b378fSVarad Gautam extern u8 ap_rm_gdt_descr;
29d36b378fSVarad Gautam #endif
30d36b378fSVarad Gautam
311542cd7bSVarad Gautam #ifdef CONFIG_EFI
321542cd7bSVarad Gautam extern u8 ap_rm_gdt, ap_rm_gdt_end;
331542cd7bSVarad Gautam extern u8 ap_start32;
341542cd7bSVarad Gautam extern u32 smp_stacktop;
351542cd7bSVarad Gautam extern u8 stacktop;
361542cd7bSVarad Gautam #endif
371542cd7bSVarad Gautam
38d36b378fSVarad Gautam /* The BSP is online from time zero. */
39d36b378fSVarad Gautam atomic_t cpu_online_count = { .counter = 1 };
400b7501c3SVarad Gautam unsigned char online_cpus[(MAX_TEST_CPUS + 7) / 8];
417d36db35SAvi Kivity
ipi(void)427db17e21SThomas Huth static __attribute__((used)) void ipi(void)
437d36db35SAvi Kivity {
44e7bc5602SAvi Kivity void (*function)(void *data) = ipi_function;
45e7bc5602SAvi Kivity void *data = ipi_data;
46e7bc5602SAvi Kivity bool wait = ipi_wait;
47e7bc5602SAvi Kivity
48e7bc5602SAvi Kivity if (!wait) {
497d36db35SAvi Kivity ipi_done = 1;
50e7bc5602SAvi Kivity apic_write(APIC_EOI, 0);
51e7bc5602SAvi Kivity }
52e7bc5602SAvi Kivity function(data);
53b73c7c6eSAndrew Jones atomic_dec(&active_cpus);
54e7bc5602SAvi Kivity if (wait) {
55e7bc5602SAvi Kivity ipi_done = 1;
56e7bc5602SAvi Kivity apic_write(APIC_EOI, 0);
57e7bc5602SAvi Kivity }
587d36db35SAvi Kivity }
597d36db35SAvi Kivity
607d36db35SAvi Kivity asm (
617d36db35SAvi Kivity "ipi_entry: \n"
627d36db35SAvi Kivity " call ipi \n"
637d36db35SAvi Kivity #ifndef __x86_64__
647d36db35SAvi Kivity " iret"
657d36db35SAvi Kivity #else
667d36db35SAvi Kivity " iretq"
677d36db35SAvi Kivity #endif
687d36db35SAvi Kivity );
697d36db35SAvi Kivity
cpu_count(void)707d36db35SAvi Kivity int cpu_count(void)
717d36db35SAvi Kivity {
72a9f949c7SAvi Kivity return _cpu_count;
737d36db35SAvi Kivity }
747d36db35SAvi Kivity
smp_id(void)757d36db35SAvi Kivity int smp_id(void)
767d36db35SAvi Kivity {
77e5e3ed09SSean Christopherson return this_cpu_read_smp_id();
787d36db35SAvi Kivity }
797d36db35SAvi Kivity
setup_smp_id(void * data)807d36db35SAvi Kivity static void setup_smp_id(void *data)
817d36db35SAvi Kivity {
82e5e3ed09SSean Christopherson this_cpu_write_smp_id(apic_id());
837d36db35SAvi Kivity }
847d36db35SAvi Kivity
ap_online(void)85cd6bfb1dSSean Christopherson void ap_online(void)
86cd6bfb1dSSean Christopherson {
87787f0aebSMaxim Levitsky sti();
88cd6bfb1dSSean Christopherson
89c2f434b1SPaolo Bonzini printf("setup: CPU %" PRId32 " online\n", apic_id());
90cd6bfb1dSSean Christopherson atomic_inc(&cpu_online_count);
91cd6bfb1dSSean Christopherson
92cd6bfb1dSSean Christopherson /* Only the BSP runs the test's main(), APs are given work via IPIs. */
93cd6bfb1dSSean Christopherson for (;;)
94cd6bfb1dSSean Christopherson asm volatile("hlt");
95cd6bfb1dSSean Christopherson }
96cd6bfb1dSSean Christopherson
__on_cpu(int cpu,void (* function)(void * data),void * data,int wait)970a2f06f7SSean Christopherson static void __on_cpu(int cpu, void (*function)(void *data), void *data, int wait)
987d36db35SAvi Kivity {
990a2f06f7SSean Christopherson const u32 ipi_icr = APIC_INT_ASSERT | APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR;
10018a34cceSNadav Amit unsigned int target = id_map[cpu];
10118a34cceSNadav Amit
1027d36db35SAvi Kivity spin_lock(&ipi_lock);
1030a2f06f7SSean Christopherson if (target == smp_id()) {
1047d36db35SAvi Kivity function(data);
1050a2f06f7SSean Christopherson } else {
106b73c7c6eSAndrew Jones atomic_inc(&active_cpus);
1077d36db35SAvi Kivity ipi_done = 0;
1087d36db35SAvi Kivity ipi_function = function;
1097d36db35SAvi Kivity ipi_data = data;
110e7bc5602SAvi Kivity ipi_wait = wait;
1110a2f06f7SSean Christopherson apic_icr_write(ipi_icr, target);
1127d36db35SAvi Kivity while (!ipi_done)
1137d36db35SAvi Kivity ;
1147d36db35SAvi Kivity }
1157d36db35SAvi Kivity spin_unlock(&ipi_lock);
1167d36db35SAvi Kivity }
1177d36db35SAvi Kivity
on_cpu(int cpu,void (* function)(void * data),void * data)1187d36db35SAvi Kivity void on_cpu(int cpu, void (*function)(void *data), void *data)
1197d36db35SAvi Kivity {
1207d36db35SAvi Kivity __on_cpu(cpu, function, data, 1);
1217d36db35SAvi Kivity }
1227d36db35SAvi Kivity
on_cpu_async(int cpu,void (* function)(void * data),void * data)1237d36db35SAvi Kivity void on_cpu_async(int cpu, void (*function)(void *data), void *data)
1247d36db35SAvi Kivity {
1257d36db35SAvi Kivity __on_cpu(cpu, function, data, 0);
1267d36db35SAvi Kivity }
1277d36db35SAvi Kivity
on_cpus(void (* function)(void * data),void * data)128b73c7c6eSAndrew Jones void on_cpus(void (*function)(void *data), void *data)
129b73c7c6eSAndrew Jones {
130b73c7c6eSAndrew Jones int cpu;
131b73c7c6eSAndrew Jones
132b73c7c6eSAndrew Jones for (cpu = cpu_count() - 1; cpu >= 0; --cpu)
133b73c7c6eSAndrew Jones on_cpu_async(cpu, function, data);
134b73c7c6eSAndrew Jones
135b73c7c6eSAndrew Jones while (cpus_active() > 1)
136b73c7c6eSAndrew Jones pause();
137b73c7c6eSAndrew Jones }
138b73c7c6eSAndrew Jones
cpus_active(void)139b73c7c6eSAndrew Jones int cpus_active(void)
140b73c7c6eSAndrew Jones {
141b73c7c6eSAndrew Jones return atomic_read(&active_cpus);
142b73c7c6eSAndrew Jones }
1437d36db35SAvi Kivity
smp_init(void)1447d36db35SAvi Kivity void smp_init(void)
1457d36db35SAvi Kivity {
1467d36db35SAvi Kivity int i;
1477d36db35SAvi Kivity void ipi_entry(void);
1487d36db35SAvi Kivity
1492bb987f3SAvi Kivity setup_idt();
15018a34cceSNadav Amit init_apic_map();
1512bb987f3SAvi Kivity set_idt_entry(IPI_VECTOR, ipi_entry, 0);
1527d36db35SAvi Kivity
1537d36db35SAvi Kivity setup_smp_id(0);
1547d36db35SAvi Kivity for (i = 1; i < cpu_count(); ++i)
1557d36db35SAvi Kivity on_cpu(i, setup_smp_id, 0);
1567d36db35SAvi Kivity
157b73c7c6eSAndrew Jones atomic_inc(&active_cpus);
1587d36db35SAvi Kivity }
15974e79380SPaolo Bonzini
do_reset_apic(void * data)16074e79380SPaolo Bonzini static void do_reset_apic(void *data)
16174e79380SPaolo Bonzini {
16274e79380SPaolo Bonzini reset_apic();
16374e79380SPaolo Bonzini }
16474e79380SPaolo Bonzini
smp_reset_apic(void)16574e79380SPaolo Bonzini void smp_reset_apic(void)
16674e79380SPaolo Bonzini {
16774e79380SPaolo Bonzini int i;
16874e79380SPaolo Bonzini
16974e79380SPaolo Bonzini reset_apic();
17074e79380SPaolo Bonzini for (i = 1; i < cpu_count(); ++i)
17174e79380SPaolo Bonzini on_cpu(i, do_reset_apic, 0);
17274e79380SPaolo Bonzini
17374e79380SPaolo Bonzini atomic_inc(&active_cpus);
17474e79380SPaolo Bonzini }
175d36b378fSVarad Gautam
setup_rm_gdt(void)176d36b378fSVarad Gautam static void setup_rm_gdt(void)
177d36b378fSVarad Gautam {
178d36b378fSVarad Gautam #ifdef __i386__
179d36b378fSVarad Gautam struct descriptor_table_ptr *rm_gdt =
180d36b378fSVarad Gautam (struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
181d36b378fSVarad Gautam /*
182d36b378fSVarad Gautam * On i386, place the gdt descriptor to be loaded from SIPI vector right after
183d36b378fSVarad Gautam * the vector code.
184d36b378fSVarad Gautam */
185d36b378fSVarad Gautam sgdt(rm_gdt);
1861542cd7bSVarad Gautam #elif defined(CONFIG_EFI)
1871542cd7bSVarad Gautam idt_entry_t *gate_descr;
1881542cd7bSVarad Gautam
1891542cd7bSVarad Gautam /*
1901542cd7bSVarad Gautam * The realmode trampoline on EFI has the following layout:
1911542cd7bSVarad Gautam *
1921542cd7bSVarad Gautam * |rm_trampoline:
1931542cd7bSVarad Gautam * |sipi_entry:
1941542cd7bSVarad Gautam * | <AP bootstrapping code called from SIPI>
1951542cd7bSVarad Gautam * |ap_rm_gdt:
1961542cd7bSVarad Gautam * | <GDT used for 16-bit -> 32-bit trasition>
1971542cd7bSVarad Gautam * |ap_rm_gdt_descr:
1981542cd7bSVarad Gautam * | <GDT descriptor for ap_rm_gdt>
1991542cd7bSVarad Gautam * |sipi_end:
2001542cd7bSVarad Gautam * | <End of trampoline>
2011542cd7bSVarad Gautam * |rm_trampoline_end:
2021542cd7bSVarad Gautam *
2031542cd7bSVarad Gautam * After relocating to the lowmem address pointed to by realmode_trampoline,
2041542cd7bSVarad Gautam * the realmode GDT descriptor needs to contain the relocated address of
2051542cd7bSVarad Gautam * ap_rm_gdt.
2061542cd7bSVarad Gautam */
2071542cd7bSVarad Gautam volatile struct descriptor_table_ptr *rm_gdt_descr =
2081542cd7bSVarad Gautam (struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
2091542cd7bSVarad Gautam rm_gdt_descr->base = (ulong) ((u32) (&ap_rm_gdt - &rm_trampoline));
2101542cd7bSVarad Gautam rm_gdt_descr->limit = (u16) (&ap_rm_gdt_end - &ap_rm_gdt - 1);
2111542cd7bSVarad Gautam
2121542cd7bSVarad Gautam /*
2131542cd7bSVarad Gautam * Since 1. compile time calculation of offsets is not allowed when
2141542cd7bSVarad Gautam * building with -shared, and 2. rip-relative addressing is not supported in
2151542cd7bSVarad Gautam * 16-bit mode, the relocated address of ap_rm_gdt_descr needs to be stored at
2161542cd7bSVarad Gautam * a location known to / accessible from the trampoline.
2171542cd7bSVarad Gautam *
2181542cd7bSVarad Gautam * Use the last two bytes of the trampoline page (REALMODE_GDT_LOWMEM) to store
2191542cd7bSVarad Gautam * a pointer to relocated ap_rm_gdt_descr addr. This way, the trampoline code can
2201542cd7bSVarad Gautam * find the relocated descriptor using the lowmem address at pa=REALMODE_GDT_LOWMEM,
2211542cd7bSVarad Gautam * and this relocated descriptor points to the relocated GDT.
2221542cd7bSVarad Gautam */
2231542cd7bSVarad Gautam *((u16 *)(REALMODE_GDT_LOWMEM)) = (u16) (u64) rm_gdt_descr;
2241542cd7bSVarad Gautam
2251542cd7bSVarad Gautam /*
2261542cd7bSVarad Gautam * Set up a call gate to the 32-bit entrypoint (ap_start32) within GDT, since
2271542cd7bSVarad Gautam * EFI may not load the 32-bit AP entrypoint (ap_start32) low enough
2281542cd7bSVarad Gautam * to be reachable from the SIPI vector.
2291542cd7bSVarad Gautam *
2301542cd7bSVarad Gautam * Since kvm-unit-tests builds with -shared, this location needs to be fetched
2311542cd7bSVarad Gautam * at runtime, and rip-relative addressing is not supported in 16-bit mode. This
2321542cd7bSVarad Gautam * prevents using a long jump to ap_start32 (`ljmpl $cs, $ap_start32`).
2331542cd7bSVarad Gautam *
2341542cd7bSVarad Gautam * As an alternative, a far return via `push $cs; push $label; lret` would require
2351542cd7bSVarad Gautam * an intermediate trampoline since $label must still be within 0 - 0xFFFF for
2361542cd7bSVarad Gautam * 16-bit far return to work.
2371542cd7bSVarad Gautam *
2381542cd7bSVarad Gautam * Using a call gate allows for an easier 16-bit -> 32-bit transition via `lcall`.
2391542cd7bSVarad Gautam *
2401542cd7bSVarad Gautam * GDT layout:
2411542cd7bSVarad Gautam *
2421542cd7bSVarad Gautam * Entry | Segment
2431542cd7bSVarad Gautam * 0 | NULL descr
2441542cd7bSVarad Gautam * 1 | Code segment descr
2451542cd7bSVarad Gautam * 2 | Data segment descr
2461542cd7bSVarad Gautam * 3 | Call gate descr
2471542cd7bSVarad Gautam *
2481542cd7bSVarad Gautam * This layout is only used for reaching 32-bit mode. APs load a 64-bit GDT
2491542cd7bSVarad Gautam * later during boot, which does not need to follow this layout.
2501542cd7bSVarad Gautam */
2511542cd7bSVarad Gautam gate_descr = ((void *)(&ap_rm_gdt - &rm_trampoline) + 3 * sizeof(gdt_entry_t));
2521542cd7bSVarad Gautam set_desc_entry(gate_descr, sizeof(gdt_entry_t), (void *) &ap_start32,
2531542cd7bSVarad Gautam 0x8 /* sel */, 0xc /* type */, 0 /* dpl */);
254d36b378fSVarad Gautam #endif
255d36b378fSVarad Gautam }
256d36b378fSVarad Gautam
bringup_aps(void)2570afd4dd0SSean Christopherson void bringup_aps(void)
258d36b378fSVarad Gautam {
259d36b378fSVarad Gautam void *rm_trampoline_dst = RM_TRAMPOLINE_ADDR;
260d36b378fSVarad Gautam size_t rm_trampoline_size = (&rm_trampoline_end - &rm_trampoline) + 1;
261d36b378fSVarad Gautam assert(rm_trampoline_size < PAGE_SIZE);
262d36b378fSVarad Gautam
263d36b378fSVarad Gautam asm volatile("cld");
264d36b378fSVarad Gautam
265d36b378fSVarad Gautam /*
266d36b378fSVarad Gautam * Fill the trampoline page with with INT3 (0xcc) so that any AP
267d36b378fSVarad Gautam * that goes astray within the first page gets a fault.
268d36b378fSVarad Gautam */
269d36b378fSVarad Gautam memset(rm_trampoline_dst, 0xcc /* INT3 */, PAGE_SIZE);
270d36b378fSVarad Gautam
271d36b378fSVarad Gautam memcpy(rm_trampoline_dst, &rm_trampoline, rm_trampoline_size);
272d36b378fSVarad Gautam
273d36b378fSVarad Gautam setup_rm_gdt();
274d36b378fSVarad Gautam
2751542cd7bSVarad Gautam #ifdef CONFIG_EFI
276*2821b32dSSean Christopherson smp_stacktop = ((u64) (&stacktop)) - PER_CPU_SIZE;
2771542cd7bSVarad Gautam #endif
2781542cd7bSVarad Gautam
279d36b378fSVarad Gautam /* INIT */
280d36b378fSVarad Gautam apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
281d36b378fSVarad Gautam
282d36b378fSVarad Gautam /* SIPI */
283d36b378fSVarad Gautam apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP, 0);
284d36b378fSVarad Gautam
285d36b378fSVarad Gautam _cpu_count = fwcfg_get_nb_cpus();
286d36b378fSVarad Gautam
287d36b378fSVarad Gautam printf("smp: waiting for %d APs\n", _cpu_count - 1);
288d36b378fSVarad Gautam while (_cpu_count != atomic_read(&cpu_online_count))
289d36b378fSVarad Gautam cpu_relax();
290d36b378fSVarad Gautam }
291