xref: /kvm-unit-tests/lib/x86/smp.c (revision 1542cd7b194d665f4d7812f549c0168ee6ed2c80)
17d36db35SAvi Kivity 
27d36db35SAvi Kivity #include <libcflat.h>
3d36b378fSVarad Gautam 
4d36b378fSVarad Gautam #include <asm/barrier.h>
5d36b378fSVarad Gautam 
6b73c7c6eSAndrew Jones #include "processor.h"
7b73c7c6eSAndrew Jones #include "atomic.h"
87d36db35SAvi Kivity #include "smp.h"
97d36db35SAvi Kivity #include "apic.h"
107d36db35SAvi Kivity #include "fwcfg.h"
11a4f9d79dSJan Kiszka #include "desc.h"
12d36b378fSVarad Gautam #include "alloc_page.h"
13d36b378fSVarad Gautam #include "asm/page.h"
147d36db35SAvi Kivity 
157d36db35SAvi Kivity #define IPI_VECTOR 0x20
167d36db35SAvi Kivity 
17fa816346SAvi Kivity typedef void (*ipi_function_type)(void *data);
18fa816346SAvi Kivity 
197d36db35SAvi Kivity static struct spinlock ipi_lock;
20fa816346SAvi Kivity static volatile ipi_function_type ipi_function;
21a4f9d79dSJan Kiszka static void *volatile ipi_data;
227d36db35SAvi Kivity static volatile int ipi_done;
23e7bc5602SAvi Kivity static volatile bool ipi_wait;
24e7bc5602SAvi Kivity static int _cpu_count;
25b73c7c6eSAndrew Jones static atomic_t active_cpus;
26d36b378fSVarad Gautam extern u8 rm_trampoline, rm_trampoline_end;
27*1542cd7bSVarad Gautam #if defined(__i386__) || defined(CONFIG_EFI)
28d36b378fSVarad Gautam extern u8 ap_rm_gdt_descr;
29d36b378fSVarad Gautam #endif
30d36b378fSVarad Gautam 
31*1542cd7bSVarad Gautam #ifdef CONFIG_EFI
32*1542cd7bSVarad Gautam extern u8 ap_rm_gdt, ap_rm_gdt_end;
33*1542cd7bSVarad Gautam extern u8 ap_start32;
34*1542cd7bSVarad Gautam extern u32 smp_stacktop;
35*1542cd7bSVarad Gautam extern u8 stacktop;
36*1542cd7bSVarad Gautam #endif
37*1542cd7bSVarad Gautam 
38d36b378fSVarad Gautam /* The BSP is online from time zero. */
39d36b378fSVarad Gautam atomic_t cpu_online_count = { .counter = 1 };
407d36db35SAvi Kivity 
417db17e21SThomas Huth static __attribute__((used)) void ipi(void)
427d36db35SAvi Kivity {
43e7bc5602SAvi Kivity 	void (*function)(void *data) = ipi_function;
44e7bc5602SAvi Kivity 	void *data = ipi_data;
45e7bc5602SAvi Kivity 	bool wait = ipi_wait;
46e7bc5602SAvi Kivity 
47e7bc5602SAvi Kivity 	if (!wait) {
487d36db35SAvi Kivity 		ipi_done = 1;
49e7bc5602SAvi Kivity 		apic_write(APIC_EOI, 0);
50e7bc5602SAvi Kivity 	}
51e7bc5602SAvi Kivity 	function(data);
52b73c7c6eSAndrew Jones 	atomic_dec(&active_cpus);
53e7bc5602SAvi Kivity 	if (wait) {
54e7bc5602SAvi Kivity 		ipi_done = 1;
55e7bc5602SAvi Kivity 		apic_write(APIC_EOI, 0);
56e7bc5602SAvi Kivity 	}
577d36db35SAvi Kivity }
587d36db35SAvi Kivity 
597d36db35SAvi Kivity asm (
607d36db35SAvi Kivity 	 "ipi_entry: \n"
617d36db35SAvi Kivity 	 "   call ipi \n"
627d36db35SAvi Kivity #ifndef __x86_64__
637d36db35SAvi Kivity 	 "   iret"
647d36db35SAvi Kivity #else
657d36db35SAvi Kivity 	 "   iretq"
667d36db35SAvi Kivity #endif
677d36db35SAvi Kivity 	 );
687d36db35SAvi Kivity 
697d36db35SAvi Kivity int cpu_count(void)
707d36db35SAvi Kivity {
71a9f949c7SAvi Kivity 	return _cpu_count;
727d36db35SAvi Kivity }
737d36db35SAvi Kivity 
747d36db35SAvi Kivity int smp_id(void)
757d36db35SAvi Kivity {
76e5e3ed09SSean Christopherson 	return this_cpu_read_smp_id();
777d36db35SAvi Kivity }
787d36db35SAvi Kivity 
797d36db35SAvi Kivity static void setup_smp_id(void *data)
807d36db35SAvi Kivity {
81e5e3ed09SSean Christopherson 	this_cpu_write_smp_id(apic_id());
827d36db35SAvi Kivity }
837d36db35SAvi Kivity 
840a2f06f7SSean Christopherson static void __on_cpu(int cpu, void (*function)(void *data), void *data, int wait)
857d36db35SAvi Kivity {
860a2f06f7SSean Christopherson 	const u32 ipi_icr = APIC_INT_ASSERT | APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR;
8718a34cceSNadav Amit 	unsigned int target = id_map[cpu];
8818a34cceSNadav Amit 
897d36db35SAvi Kivity 	spin_lock(&ipi_lock);
900a2f06f7SSean Christopherson 	if (target == smp_id()) {
917d36db35SAvi Kivity 		function(data);
920a2f06f7SSean Christopherson 	} else {
93b73c7c6eSAndrew Jones 		atomic_inc(&active_cpus);
947d36db35SAvi Kivity 		ipi_done = 0;
957d36db35SAvi Kivity 		ipi_function = function;
967d36db35SAvi Kivity 		ipi_data = data;
97e7bc5602SAvi Kivity 		ipi_wait = wait;
980a2f06f7SSean Christopherson 		apic_icr_write(ipi_icr, target);
997d36db35SAvi Kivity 		while (!ipi_done)
1007d36db35SAvi Kivity 			;
1017d36db35SAvi Kivity 	}
1027d36db35SAvi Kivity 	spin_unlock(&ipi_lock);
1037d36db35SAvi Kivity }
1047d36db35SAvi Kivity 
1057d36db35SAvi Kivity void on_cpu(int cpu, void (*function)(void *data), void *data)
1067d36db35SAvi Kivity {
1077d36db35SAvi Kivity 	__on_cpu(cpu, function, data, 1);
1087d36db35SAvi Kivity }
1097d36db35SAvi Kivity 
1107d36db35SAvi Kivity void on_cpu_async(int cpu, void (*function)(void *data), void *data)
1117d36db35SAvi Kivity {
1127d36db35SAvi Kivity 	__on_cpu(cpu, function, data, 0);
1137d36db35SAvi Kivity }
1147d36db35SAvi Kivity 
115b73c7c6eSAndrew Jones void on_cpus(void (*function)(void *data), void *data)
116b73c7c6eSAndrew Jones {
117b73c7c6eSAndrew Jones 	int cpu;
118b73c7c6eSAndrew Jones 
119b73c7c6eSAndrew Jones 	for (cpu = cpu_count() - 1; cpu >= 0; --cpu)
120b73c7c6eSAndrew Jones 		on_cpu_async(cpu, function, data);
121b73c7c6eSAndrew Jones 
122b73c7c6eSAndrew Jones 	while (cpus_active() > 1)
123b73c7c6eSAndrew Jones 		pause();
124b73c7c6eSAndrew Jones }
125b73c7c6eSAndrew Jones 
126b73c7c6eSAndrew Jones int cpus_active(void)
127b73c7c6eSAndrew Jones {
128b73c7c6eSAndrew Jones 	return atomic_read(&active_cpus);
129b73c7c6eSAndrew Jones }
1307d36db35SAvi Kivity 
1317d36db35SAvi Kivity void smp_init(void)
1327d36db35SAvi Kivity {
1337d36db35SAvi Kivity 	int i;
1347d36db35SAvi Kivity 	void ipi_entry(void);
1357d36db35SAvi Kivity 
1362bb987f3SAvi Kivity 	setup_idt();
13718a34cceSNadav Amit 	init_apic_map();
1382bb987f3SAvi Kivity 	set_idt_entry(IPI_VECTOR, ipi_entry, 0);
1397d36db35SAvi Kivity 
1407d36db35SAvi Kivity 	setup_smp_id(0);
1417d36db35SAvi Kivity 	for (i = 1; i < cpu_count(); ++i)
1427d36db35SAvi Kivity 		on_cpu(i, setup_smp_id, 0);
1437d36db35SAvi Kivity 
144b73c7c6eSAndrew Jones 	atomic_inc(&active_cpus);
1457d36db35SAvi Kivity }
14674e79380SPaolo Bonzini 
14774e79380SPaolo Bonzini static void do_reset_apic(void *data)
14874e79380SPaolo Bonzini {
14974e79380SPaolo Bonzini 	reset_apic();
15074e79380SPaolo Bonzini }
15174e79380SPaolo Bonzini 
15274e79380SPaolo Bonzini void smp_reset_apic(void)
15374e79380SPaolo Bonzini {
15474e79380SPaolo Bonzini 	int i;
15574e79380SPaolo Bonzini 
15674e79380SPaolo Bonzini 	reset_apic();
15774e79380SPaolo Bonzini 	for (i = 1; i < cpu_count(); ++i)
15874e79380SPaolo Bonzini 		on_cpu(i, do_reset_apic, 0);
15974e79380SPaolo Bonzini 
16074e79380SPaolo Bonzini 	atomic_inc(&active_cpus);
16174e79380SPaolo Bonzini }
162d36b378fSVarad Gautam 
163d36b378fSVarad Gautam static void setup_rm_gdt(void)
164d36b378fSVarad Gautam {
165d36b378fSVarad Gautam #ifdef __i386__
166d36b378fSVarad Gautam 	struct descriptor_table_ptr *rm_gdt =
167d36b378fSVarad Gautam 		(struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
168d36b378fSVarad Gautam 	/*
169d36b378fSVarad Gautam 	 * On i386, place the gdt descriptor to be loaded from SIPI vector right after
170d36b378fSVarad Gautam 	 * the vector code.
171d36b378fSVarad Gautam 	 */
172d36b378fSVarad Gautam 	sgdt(rm_gdt);
173*1542cd7bSVarad Gautam #elif defined(CONFIG_EFI)
174*1542cd7bSVarad Gautam 	idt_entry_t *gate_descr;
175*1542cd7bSVarad Gautam 
176*1542cd7bSVarad Gautam 	/*
177*1542cd7bSVarad Gautam 	 * The realmode trampoline on EFI has the following layout:
178*1542cd7bSVarad Gautam 	 *
179*1542cd7bSVarad Gautam 	 * |rm_trampoline:
180*1542cd7bSVarad Gautam 	 * |sipi_entry:
181*1542cd7bSVarad Gautam 	 * |  <AP bootstrapping code called from SIPI>
182*1542cd7bSVarad Gautam 	 * |ap_rm_gdt:
183*1542cd7bSVarad Gautam 	 * |  <GDT used for 16-bit -> 32-bit trasition>
184*1542cd7bSVarad Gautam 	 * |ap_rm_gdt_descr:
185*1542cd7bSVarad Gautam 	 * |  <GDT descriptor for ap_rm_gdt>
186*1542cd7bSVarad Gautam 	 * |sipi_end:
187*1542cd7bSVarad Gautam 	 * |  <End of trampoline>
188*1542cd7bSVarad Gautam 	 * |rm_trampoline_end:
189*1542cd7bSVarad Gautam 	 *
190*1542cd7bSVarad Gautam 	 * After relocating to the lowmem address pointed to by realmode_trampoline,
191*1542cd7bSVarad Gautam 	 * the realmode GDT descriptor needs to contain the relocated address of
192*1542cd7bSVarad Gautam 	 * ap_rm_gdt.
193*1542cd7bSVarad Gautam 	 */
194*1542cd7bSVarad Gautam 	volatile struct descriptor_table_ptr *rm_gdt_descr =
195*1542cd7bSVarad Gautam 			(struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
196*1542cd7bSVarad Gautam 	rm_gdt_descr->base = (ulong) ((u32) (&ap_rm_gdt - &rm_trampoline));
197*1542cd7bSVarad Gautam 	rm_gdt_descr->limit = (u16) (&ap_rm_gdt_end - &ap_rm_gdt - 1);
198*1542cd7bSVarad Gautam 
199*1542cd7bSVarad Gautam 	/*
200*1542cd7bSVarad Gautam 	 * Since 1. compile time calculation of offsets is not allowed when
201*1542cd7bSVarad Gautam 	 * building with -shared, and 2. rip-relative addressing is not supported in
202*1542cd7bSVarad Gautam 	 * 16-bit mode, the relocated address of ap_rm_gdt_descr needs to be stored at
203*1542cd7bSVarad Gautam 	 * a location known to / accessible from the trampoline.
204*1542cd7bSVarad Gautam 	 *
205*1542cd7bSVarad Gautam 	 * Use the last two bytes of the trampoline page (REALMODE_GDT_LOWMEM) to store
206*1542cd7bSVarad Gautam 	 * a pointer to relocated ap_rm_gdt_descr addr. This way, the trampoline code can
207*1542cd7bSVarad Gautam 	 * find the relocated descriptor using the lowmem address at pa=REALMODE_GDT_LOWMEM,
208*1542cd7bSVarad Gautam 	 * and this relocated descriptor points to the relocated GDT.
209*1542cd7bSVarad Gautam 	 */
210*1542cd7bSVarad Gautam 	*((u16 *)(REALMODE_GDT_LOWMEM)) = (u16) (u64) rm_gdt_descr;
211*1542cd7bSVarad Gautam 
212*1542cd7bSVarad Gautam 	/*
213*1542cd7bSVarad Gautam 	 * Set up a call gate to the 32-bit entrypoint (ap_start32) within GDT, since
214*1542cd7bSVarad Gautam 	 * EFI may not load the 32-bit AP entrypoint (ap_start32) low enough
215*1542cd7bSVarad Gautam 	 * to be reachable from the SIPI vector.
216*1542cd7bSVarad Gautam 	 *
217*1542cd7bSVarad Gautam 	 * Since kvm-unit-tests builds with -shared, this location needs to be fetched
218*1542cd7bSVarad Gautam 	 * at runtime, and rip-relative addressing is not supported in 16-bit mode. This
219*1542cd7bSVarad Gautam 	 * prevents using a long jump to ap_start32 (`ljmpl $cs, $ap_start32`).
220*1542cd7bSVarad Gautam 	 *
221*1542cd7bSVarad Gautam 	 * As an alternative, a far return via `push $cs; push $label; lret` would require
222*1542cd7bSVarad Gautam 	 * an intermediate trampoline since $label must still be within 0 - 0xFFFF for
223*1542cd7bSVarad Gautam 	 * 16-bit far return to work.
224*1542cd7bSVarad Gautam 	 *
225*1542cd7bSVarad Gautam 	 * Using a call gate allows for an easier 16-bit -> 32-bit transition via `lcall`.
226*1542cd7bSVarad Gautam 	 *
227*1542cd7bSVarad Gautam 	 * GDT layout:
228*1542cd7bSVarad Gautam 	 *
229*1542cd7bSVarad Gautam 	 * Entry | Segment
230*1542cd7bSVarad Gautam 	 * 0	 | NULL descr
231*1542cd7bSVarad Gautam 	 * 1	 | Code segment descr
232*1542cd7bSVarad Gautam 	 * 2	 | Data segment descr
233*1542cd7bSVarad Gautam 	 * 3	 | Call gate descr
234*1542cd7bSVarad Gautam 	 *
235*1542cd7bSVarad Gautam 	 * This layout is only used for reaching 32-bit mode. APs load a 64-bit GDT
236*1542cd7bSVarad Gautam 	 * later during boot, which does not need to follow this layout.
237*1542cd7bSVarad Gautam 	 */
238*1542cd7bSVarad Gautam 	gate_descr = ((void *)(&ap_rm_gdt - &rm_trampoline) + 3 * sizeof(gdt_entry_t));
239*1542cd7bSVarad Gautam 	set_desc_entry(gate_descr, sizeof(gdt_entry_t), (void *) &ap_start32,
240*1542cd7bSVarad Gautam 		       0x8 /* sel */, 0xc /* type */, 0 /* dpl */);
241d36b378fSVarad Gautam #endif
242d36b378fSVarad Gautam }
243d36b378fSVarad Gautam 
244d36b378fSVarad Gautam void ap_init(void)
245d36b378fSVarad Gautam {
246d36b378fSVarad Gautam 	void *rm_trampoline_dst = RM_TRAMPOLINE_ADDR;
247d36b378fSVarad Gautam 	size_t rm_trampoline_size = (&rm_trampoline_end - &rm_trampoline) + 1;
248d36b378fSVarad Gautam 	assert(rm_trampoline_size < PAGE_SIZE);
249d36b378fSVarad Gautam 
250d36b378fSVarad Gautam 	asm volatile("cld");
251d36b378fSVarad Gautam 
252d36b378fSVarad Gautam 	/*
253d36b378fSVarad Gautam 	 * Fill the trampoline page with with INT3 (0xcc) so that any AP
254d36b378fSVarad Gautam 	 * that goes astray within the first page gets a fault.
255d36b378fSVarad Gautam 	 */
256d36b378fSVarad Gautam 	memset(rm_trampoline_dst, 0xcc /* INT3 */, PAGE_SIZE);
257d36b378fSVarad Gautam 
258d36b378fSVarad Gautam 	memcpy(rm_trampoline_dst, &rm_trampoline, rm_trampoline_size);
259d36b378fSVarad Gautam 
260d36b378fSVarad Gautam 	setup_rm_gdt();
261d36b378fSVarad Gautam 
262*1542cd7bSVarad Gautam #ifdef CONFIG_EFI
263*1542cd7bSVarad Gautam 	smp_stacktop = ((u64) (&stacktop)) - PAGE_SIZE;
264*1542cd7bSVarad Gautam #endif
265*1542cd7bSVarad Gautam 
266d36b378fSVarad Gautam 	/* INIT */
267d36b378fSVarad Gautam 	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
268d36b378fSVarad Gautam 
269d36b378fSVarad Gautam 	/* SIPI */
270d36b378fSVarad Gautam 	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP, 0);
271d36b378fSVarad Gautam 
272d36b378fSVarad Gautam 	_cpu_count = fwcfg_get_nb_cpus();
273d36b378fSVarad Gautam 
274d36b378fSVarad Gautam 	printf("smp: waiting for %d APs\n", _cpu_count - 1);
275d36b378fSVarad Gautam 	while (_cpu_count != atomic_read(&cpu_online_count))
276d36b378fSVarad Gautam 		cpu_relax();
277d36b378fSVarad Gautam }
278