xref: /kvm-unit-tests/lib/x86/smp.c (revision dca3f4c041143c8e8dc70c6890a19a5730310230)
1 
2 #include <libcflat.h>
3 
4 #include <asm/barrier.h>
5 
6 #include "processor.h"
7 #include "atomic.h"
8 #include "smp.h"
9 #include "apic.h"
10 #include "fwcfg.h"
11 #include "desc.h"
12 #include "alloc_page.h"
13 #include "asm/page.h"
14 
15 #define IPI_VECTOR 0x20
16 
17 typedef void (*ipi_function_type)(void *data);
18 
19 static struct spinlock ipi_lock;
20 static volatile ipi_function_type ipi_function;
21 static void *volatile ipi_data;
22 static volatile int ipi_done;
23 static volatile bool ipi_wait;
24 static int _cpu_count;
25 static atomic_t active_cpus;
26 extern u8 rm_trampoline, rm_trampoline_end;
27 #if defined(__i386__) || defined(CONFIG_EFI)
28 extern u8 ap_rm_gdt_descr;
29 #endif
30 
31 #ifdef CONFIG_EFI
32 extern u8 ap_rm_gdt, ap_rm_gdt_end;
33 extern u8 ap_start32;
34 extern u32 smp_stacktop;
35 extern u8 stacktop;
36 #endif
37 
38 /* The BSP is online from time zero. */
39 atomic_t cpu_online_count = { .counter = 1 };
40 unsigned char online_cpus[(MAX_TEST_CPUS + 7) / 8];
41 
ipi(void)42 static __attribute__((used)) void ipi(void)
43 {
44 	void (*function)(void *data) = ipi_function;
45 	void *data = ipi_data;
46 	bool wait = ipi_wait;
47 
48 	if (!wait) {
49 		ipi_done = 1;
50 		apic_write(APIC_EOI, 0);
51 	}
52 	function(data);
53 	atomic_dec(&active_cpus);
54 	if (wait) {
55 		ipi_done = 1;
56 		apic_write(APIC_EOI, 0);
57 	}
58 }
59 
60 asm (
61 	 "ipi_entry: \n"
62 	 "   call ipi \n"
63 #ifndef __x86_64__
64 	 "   iret"
65 #else
66 	 "   iretq"
67 #endif
68 	 );
69 
cpu_count(void)70 int cpu_count(void)
71 {
72 	return _cpu_count;
73 }
74 
smp_id(void)75 int smp_id(void)
76 {
77 	return this_cpu_read_smp_id();
78 }
79 
setup_smp_id(void * data)80 static void setup_smp_id(void *data)
81 {
82 	this_cpu_write_smp_id(apic_id());
83 }
84 
ap_online(void)85 void ap_online(void)
86 {
87 	sti();
88 
89 	printf("setup: CPU %" PRId32 " online\n", apic_id());
90 	atomic_inc(&cpu_online_count);
91 
92 	/* Only the BSP runs the test's main(), APs are given work via IPIs. */
93 	for (;;)
94 		asm volatile("hlt");
95 }
96 
__on_cpu(int cpu,void (* function)(void * data),void * data,int wait)97 static void __on_cpu(int cpu, void (*function)(void *data), void *data, int wait)
98 {
99 	const u32 ipi_icr = APIC_INT_ASSERT | APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR;
100 	unsigned int target = id_map[cpu];
101 
102 	spin_lock(&ipi_lock);
103 	if (target == smp_id()) {
104 		function(data);
105 	} else {
106 		atomic_inc(&active_cpus);
107 		ipi_done = 0;
108 		ipi_function = function;
109 		ipi_data = data;
110 		ipi_wait = wait;
111 		apic_icr_write(ipi_icr, target);
112 		while (!ipi_done)
113 			;
114 	}
115 	spin_unlock(&ipi_lock);
116 }
117 
on_cpu(int cpu,void (* function)(void * data),void * data)118 void on_cpu(int cpu, void (*function)(void *data), void *data)
119 {
120 	__on_cpu(cpu, function, data, 1);
121 }
122 
on_cpu_async(int cpu,void (* function)(void * data),void * data)123 void on_cpu_async(int cpu, void (*function)(void *data), void *data)
124 {
125 	__on_cpu(cpu, function, data, 0);
126 }
127 
on_cpus(void (* function)(void * data),void * data)128 void on_cpus(void (*function)(void *data), void *data)
129 {
130 	int cpu;
131 
132 	for (cpu = cpu_count() - 1; cpu >= 0; --cpu)
133 		on_cpu_async(cpu, function, data);
134 
135 	while (cpus_active() > 1)
136 		pause();
137 }
138 
cpus_active(void)139 int cpus_active(void)
140 {
141 	return atomic_read(&active_cpus);
142 }
143 
smp_init(void)144 void smp_init(void)
145 {
146 	int i;
147 	void ipi_entry(void);
148 
149 	setup_idt();
150 	init_apic_map();
151 	set_idt_entry(IPI_VECTOR, ipi_entry, 0);
152 
153 	setup_smp_id(0);
154 	for (i = 1; i < cpu_count(); ++i)
155 		on_cpu(i, setup_smp_id, 0);
156 
157 	atomic_inc(&active_cpus);
158 }
159 
do_reset_apic(void * data)160 static void do_reset_apic(void *data)
161 {
162 	reset_apic();
163 }
164 
smp_reset_apic(void)165 void smp_reset_apic(void)
166 {
167 	int i;
168 
169 	reset_apic();
170 	for (i = 1; i < cpu_count(); ++i)
171 		on_cpu(i, do_reset_apic, 0);
172 
173 	atomic_inc(&active_cpus);
174 }
175 
setup_rm_gdt(void)176 static void setup_rm_gdt(void)
177 {
178 #ifdef __i386__
179 	struct descriptor_table_ptr *rm_gdt =
180 		(struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
181 	/*
182 	 * On i386, place the gdt descriptor to be loaded from SIPI vector right after
183 	 * the vector code.
184 	 */
185 	sgdt(rm_gdt);
186 #elif defined(CONFIG_EFI)
187 	idt_entry_t *gate_descr;
188 
189 	/*
190 	 * The realmode trampoline on EFI has the following layout:
191 	 *
192 	 * |rm_trampoline:
193 	 * |sipi_entry:
194 	 * |  <AP bootstrapping code called from SIPI>
195 	 * |ap_rm_gdt:
196 	 * |  <GDT used for 16-bit -> 32-bit trasition>
197 	 * |ap_rm_gdt_descr:
198 	 * |  <GDT descriptor for ap_rm_gdt>
199 	 * |sipi_end:
200 	 * |  <End of trampoline>
201 	 * |rm_trampoline_end:
202 	 *
203 	 * After relocating to the lowmem address pointed to by realmode_trampoline,
204 	 * the realmode GDT descriptor needs to contain the relocated address of
205 	 * ap_rm_gdt.
206 	 */
207 	volatile struct descriptor_table_ptr *rm_gdt_descr =
208 			(struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
209 	rm_gdt_descr->base = (ulong) ((u32) (&ap_rm_gdt - &rm_trampoline));
210 	rm_gdt_descr->limit = (u16) (&ap_rm_gdt_end - &ap_rm_gdt - 1);
211 
212 	/*
213 	 * Since 1. compile time calculation of offsets is not allowed when
214 	 * building with -shared, and 2. rip-relative addressing is not supported in
215 	 * 16-bit mode, the relocated address of ap_rm_gdt_descr needs to be stored at
216 	 * a location known to / accessible from the trampoline.
217 	 *
218 	 * Use the last two bytes of the trampoline page (REALMODE_GDT_LOWMEM) to store
219 	 * a pointer to relocated ap_rm_gdt_descr addr. This way, the trampoline code can
220 	 * find the relocated descriptor using the lowmem address at pa=REALMODE_GDT_LOWMEM,
221 	 * and this relocated descriptor points to the relocated GDT.
222 	 */
223 	*((u16 *)(REALMODE_GDT_LOWMEM)) = (u16) (u64) rm_gdt_descr;
224 
225 	/*
226 	 * Set up a call gate to the 32-bit entrypoint (ap_start32) within GDT, since
227 	 * EFI may not load the 32-bit AP entrypoint (ap_start32) low enough
228 	 * to be reachable from the SIPI vector.
229 	 *
230 	 * Since kvm-unit-tests builds with -shared, this location needs to be fetched
231 	 * at runtime, and rip-relative addressing is not supported in 16-bit mode. This
232 	 * prevents using a long jump to ap_start32 (`ljmpl $cs, $ap_start32`).
233 	 *
234 	 * As an alternative, a far return via `push $cs; push $label; lret` would require
235 	 * an intermediate trampoline since $label must still be within 0 - 0xFFFF for
236 	 * 16-bit far return to work.
237 	 *
238 	 * Using a call gate allows for an easier 16-bit -> 32-bit transition via `lcall`.
239 	 *
240 	 * GDT layout:
241 	 *
242 	 * Entry | Segment
243 	 * 0	 | NULL descr
244 	 * 1	 | Code segment descr
245 	 * 2	 | Data segment descr
246 	 * 3	 | Call gate descr
247 	 *
248 	 * This layout is only used for reaching 32-bit mode. APs load a 64-bit GDT
249 	 * later during boot, which does not need to follow this layout.
250 	 */
251 	gate_descr = ((void *)(&ap_rm_gdt - &rm_trampoline) + 3 * sizeof(gdt_entry_t));
252 	set_desc_entry(gate_descr, sizeof(gdt_entry_t), (void *) &ap_start32,
253 		       0x8 /* sel */, 0xc /* type */, 0 /* dpl */);
254 #endif
255 }
256 
bringup_aps(void)257 void bringup_aps(void)
258 {
259 	void *rm_trampoline_dst = RM_TRAMPOLINE_ADDR;
260 	size_t rm_trampoline_size = (&rm_trampoline_end - &rm_trampoline) + 1;
261 	assert(rm_trampoline_size < PAGE_SIZE);
262 
263 	asm volatile("cld");
264 
265 	/*
266 	 * Fill the trampoline page with with INT3 (0xcc) so that any AP
267 	 * that goes astray within the first page gets a fault.
268 	 */
269 	memset(rm_trampoline_dst, 0xcc /* INT3 */, PAGE_SIZE);
270 
271 	memcpy(rm_trampoline_dst, &rm_trampoline, rm_trampoline_size);
272 
273 	setup_rm_gdt();
274 
275 #ifdef CONFIG_EFI
276 	smp_stacktop = ((u64) (&stacktop)) - PER_CPU_SIZE;
277 #endif
278 
279 	/* INIT */
280 	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
281 
282 	/* SIPI */
283 	apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP, 0);
284 
285 	_cpu_count = fwcfg_get_nb_cpus();
286 
287 	printf("smp: waiting for %d APs\n", _cpu_count - 1);
288 	while (_cpu_count != atomic_read(&cpu_online_count))
289 		cpu_relax();
290 }
291