1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * xapic_ipi_test
4  *
5  * Copyright (C) 2020, Google LLC.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2.
8  *
9  * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
10  * another vCPU that is halted when KVM's backing page for the APIC access
11  * address has been moved by mm.
12  *
13  * The test starts two vCPUs: one that sends IPIs and one that continually
14  * executes HLT. The sender checks that the halter has woken from the HLT and
15  * has reentered HLT before sending the next IPI. While the vCPUs are running,
16  * the host continually calls migrate_pages to move all of the process' pages
17  * amongst the available numa nodes on the machine.
18  *
19  * Migration is a command line option. When used on non-numa machines will
20  * exit with error. Test is still usefull on non-numa for testing IPIs.
21  */
22 #include <getopt.h>
23 #include <pthread.h>
24 #include <inttypes.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "kvm_util.h"
29 #include "numaif.h"
30 #include "processor.h"
31 #include "test_util.h"
32 #include "vmx.h"
33 
34 /* Default running time for the test */
35 #define DEFAULT_RUN_SECS 3
36 
37 /* Default delay between migrate_pages calls (microseconds) */
38 #define DEFAULT_DELAY_USECS 500000
39 
40 /*
41  * Vector for IPI from sender vCPU to halting vCPU.
42  * Value is arbitrary and was chosen for the alternating bit pattern. Any
43  * value should work.
44  */
45 #define IPI_VECTOR	 0xa5
46 
47 /*
48  * Incremented in the IPI handler. Provides evidence to the sender that the IPI
49  * arrived at the destination
50  */
51 static volatile uint64_t ipis_rcvd;
52 
53 /* Data struct shared between host main thread and vCPUs */
54 struct test_data_page {
55 	uint32_t halter_apic_id;
56 	volatile uint64_t hlt_count;
57 	volatile uint64_t wake_count;
58 	uint64_t ipis_sent;
59 	uint64_t migrations_attempted;
60 	uint64_t migrations_completed;
61 	uint32_t icr;
62 	uint32_t icr2;
63 	uint32_t halter_tpr;
64 	uint32_t halter_ppr;
65 
66 	/*
67 	 *  Record local version register as a cross-check that APIC access
68 	 *  worked. Value should match what KVM reports (APIC_VERSION in
69 	 *  arch/x86/kvm/lapic.c). If test is failing, check that values match
70 	 *  to determine whether APIC access exits are working.
71 	 */
72 	uint32_t halter_lvr;
73 };
74 
75 struct thread_params {
76 	struct test_data_page *data;
77 	struct kvm_vcpu *vcpu;
78 	uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
79 };
80 
verify_apic_base_addr(void)81 void verify_apic_base_addr(void)
82 {
83 	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
84 	uint64_t base = GET_APIC_BASE(msr);
85 
86 	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
87 }
88 
halter_guest_code(struct test_data_page * data)89 static void halter_guest_code(struct test_data_page *data)
90 {
91 	verify_apic_base_addr();
92 	xapic_enable();
93 
94 	data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
95 	data->halter_lvr = xapic_read_reg(APIC_LVR);
96 
97 	/*
98 	 * Loop forever HLTing and recording halts & wakes. Disable interrupts
99 	 * each time around to minimize window between signaling the pending
100 	 * halt to the sender vCPU and executing the halt. No need to disable on
101 	 * first run as this vCPU executes first and the host waits for it to
102 	 * signal going into first halt before starting the sender vCPU. Record
103 	 * TPR and PPR for diagnostic purposes in case the test fails.
104 	 */
105 	for (;;) {
106 		data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
107 		data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
108 		data->hlt_count++;
109 		safe_halt();
110 		cli();
111 		data->wake_count++;
112 	}
113 }
114 
115 /*
116  * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
117  * enable diagnosing errant writes to the APIC access address backing page in
118  * case of test failure.
119  */
guest_ipi_handler(struct ex_regs * regs)120 static void guest_ipi_handler(struct ex_regs *regs)
121 {
122 	ipis_rcvd++;
123 	xapic_write_reg(APIC_EOI, 77);
124 }
125 
sender_guest_code(struct test_data_page * data)126 static void sender_guest_code(struct test_data_page *data)
127 {
128 	uint64_t last_wake_count;
129 	uint64_t last_hlt_count;
130 	uint64_t last_ipis_rcvd_count;
131 	uint32_t icr_val;
132 	uint32_t icr2_val;
133 	uint64_t tsc_start;
134 
135 	verify_apic_base_addr();
136 	xapic_enable();
137 
138 	/*
139 	 * Init interrupt command register for sending IPIs
140 	 *
141 	 * Delivery mode=fixed, per SDM:
142 	 *   "Delivers the interrupt specified in the vector field to the target
143 	 *    processor."
144 	 *
145 	 * Destination mode=physical i.e. specify target by its local APIC
146 	 * ID. This vCPU assumes that the halter vCPU has already started and
147 	 * set data->halter_apic_id.
148 	 */
149 	icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
150 	icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
151 	data->icr = icr_val;
152 	data->icr2 = icr2_val;
153 
154 	last_wake_count = data->wake_count;
155 	last_hlt_count = data->hlt_count;
156 	last_ipis_rcvd_count = ipis_rcvd;
157 	for (;;) {
158 		/*
159 		 * Send IPI to halter vCPU.
160 		 * First IPI can be sent unconditionally because halter vCPU
161 		 * starts earlier.
162 		 */
163 		xapic_write_reg(APIC_ICR2, icr2_val);
164 		xapic_write_reg(APIC_ICR, icr_val);
165 		data->ipis_sent++;
166 
167 		/*
168 		 * Wait up to ~1 sec for halter to indicate that it has:
169 		 * 1. Received the IPI
170 		 * 2. Woken up from the halt
171 		 * 3. Gone back into halt
172 		 * Current CPUs typically run at 2.x Ghz which is ~2
173 		 * billion ticks per second.
174 		 */
175 		tsc_start = rdtsc();
176 		while (rdtsc() - tsc_start < 2000000000) {
177 			if ((ipis_rcvd != last_ipis_rcvd_count) &&
178 			    (data->wake_count != last_wake_count) &&
179 			    (data->hlt_count != last_hlt_count))
180 				break;
181 		}
182 
183 		GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
184 			     (data->wake_count != last_wake_count) &&
185 			     (data->hlt_count != last_hlt_count));
186 
187 		last_wake_count = data->wake_count;
188 		last_hlt_count = data->hlt_count;
189 		last_ipis_rcvd_count = ipis_rcvd;
190 	}
191 }
192 
vcpu_thread(void * arg)193 static void *vcpu_thread(void *arg)
194 {
195 	struct thread_params *params = (struct thread_params *)arg;
196 	struct kvm_vcpu *vcpu = params->vcpu;
197 	struct ucall uc;
198 	int old;
199 	int r;
200 
201 	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
202 	TEST_ASSERT(r == 0,
203 		    "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
204 		    vcpu->id, r);
205 
206 	fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
207 	vcpu_run(vcpu);
208 
209 	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
210 
211 	if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
212 		TEST_ASSERT(false,
213 			    "vCPU %u exited with error: %s.\n"
214 			    "Sending vCPU sent %lu IPIs to halting vCPU\n"
215 			    "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
216 			    "Halter TPR=%#x PPR=%#x LVR=%#x\n"
217 			    "Migrations attempted: %lu\n"
218 			    "Migrations completed: %lu",
219 			    vcpu->id, (const char *)uc.args[0],
220 			    params->data->ipis_sent, params->data->hlt_count,
221 			    params->data->wake_count,
222 			    *params->pipis_rcvd, params->data->halter_tpr,
223 			    params->data->halter_ppr, params->data->halter_lvr,
224 			    params->data->migrations_attempted,
225 			    params->data->migrations_completed);
226 	}
227 
228 	return NULL;
229 }
230 
cancel_join_vcpu_thread(pthread_t thread,struct kvm_vcpu * vcpu)231 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
232 {
233 	void *retval;
234 	int r;
235 
236 	r = pthread_cancel(thread);
237 	TEST_ASSERT(r == 0,
238 		    "pthread_cancel on vcpu_id=%d failed with errno=%d",
239 		    vcpu->id, r);
240 
241 	r = pthread_join(thread, &retval);
242 	TEST_ASSERT(r == 0,
243 		    "pthread_join on vcpu_id=%d failed with errno=%d",
244 		    vcpu->id, r);
245 	TEST_ASSERT(retval == PTHREAD_CANCELED,
246 		    "expected retval=%p, got %p", PTHREAD_CANCELED,
247 		    retval);
248 }
249 
do_migrations(struct test_data_page * data,int run_secs,int delay_usecs,uint64_t * pipis_rcvd)250 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
251 		   uint64_t *pipis_rcvd)
252 {
253 	long pages_not_moved;
254 	unsigned long nodemask = 0;
255 	unsigned long nodemasks[sizeof(nodemask) * 8];
256 	int nodes = 0;
257 	time_t start_time, last_update, now;
258 	time_t interval_secs = 1;
259 	int i, r;
260 	int from, to;
261 	unsigned long bit;
262 	uint64_t hlt_count;
263 	uint64_t wake_count;
264 	uint64_t ipis_sent;
265 
266 	fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
267 		delay_usecs);
268 
269 	/* Get set of first 64 numa nodes available */
270 	r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
271 			  0, MPOL_F_MEMS_ALLOWED);
272 	TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
273 
274 	fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
275 		"(each 1-bit indicates node is present): %#lx\n",
276 		sizeof(nodemask) * 8, nodemask);
277 
278 	/* Init array of masks containing a single-bit in each, one for each
279 	 * available node. migrate_pages called below requires specifying nodes
280 	 * as bit masks.
281 	 */
282 	for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
283 		if (nodemask & bit) {
284 			nodemasks[nodes] = nodemask & bit;
285 			nodes++;
286 		}
287 	}
288 
289 	TEST_ASSERT(nodes > 1,
290 		    "Did not find at least 2 numa nodes. Can't do migration");
291 
292 	fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
293 
294 	from = 0;
295 	to = 1;
296 	start_time = time(NULL);
297 	last_update = start_time;
298 
299 	ipis_sent = data->ipis_sent;
300 	hlt_count = data->hlt_count;
301 	wake_count = data->wake_count;
302 
303 	while ((int)(time(NULL) - start_time) < run_secs) {
304 		data->migrations_attempted++;
305 
306 		/*
307 		 * migrate_pages with PID=0 will migrate all pages of this
308 		 * process between the nodes specified as bitmasks. The page
309 		 * backing the APIC access address belongs to this process
310 		 * because it is allocated by KVM in the context of the
311 		 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
312 		 * test may break or give a false positive signal.
313 		 */
314 		pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
315 						&nodemasks[from],
316 						&nodemasks[to]);
317 		if (pages_not_moved < 0)
318 			fprintf(stderr,
319 				"migrate_pages failed, errno=%d\n", errno);
320 		else if (pages_not_moved > 0)
321 			fprintf(stderr,
322 				"migrate_pages could not move %ld pages\n",
323 				pages_not_moved);
324 		else
325 			data->migrations_completed++;
326 
327 		from = to;
328 		to++;
329 		if (to == nodes)
330 			to = 0;
331 
332 		now = time(NULL);
333 		if (((now - start_time) % interval_secs == 0) &&
334 		    (now != last_update)) {
335 			last_update = now;
336 			fprintf(stderr,
337 				"%lu seconds: Migrations attempted=%lu completed=%lu, "
338 				"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
339 				now - start_time, data->migrations_attempted,
340 				data->migrations_completed,
341 				data->ipis_sent, *pipis_rcvd,
342 				data->hlt_count, data->wake_count);
343 
344 			TEST_ASSERT(ipis_sent != data->ipis_sent &&
345 				    hlt_count != data->hlt_count &&
346 				    wake_count != data->wake_count,
347 				    "IPI, HLT and wake count have not increased "
348 				    "in the last %lu seconds. "
349 				    "HLTer is likely hung.", interval_secs);
350 
351 			ipis_sent = data->ipis_sent;
352 			hlt_count = data->hlt_count;
353 			wake_count = data->wake_count;
354 		}
355 		usleep(delay_usecs);
356 	}
357 }
358 
get_cmdline_args(int argc,char * argv[],int * run_secs,bool * migrate,int * delay_usecs)359 void get_cmdline_args(int argc, char *argv[], int *run_secs,
360 		      bool *migrate, int *delay_usecs)
361 {
362 	for (;;) {
363 		int opt = getopt(argc, argv, "s:d:m");
364 
365 		if (opt == -1)
366 			break;
367 		switch (opt) {
368 		case 's':
369 			*run_secs = parse_size(optarg);
370 			break;
371 		case 'm':
372 			*migrate = true;
373 			break;
374 		case 'd':
375 			*delay_usecs = parse_size(optarg);
376 			break;
377 		default:
378 			TEST_ASSERT(false,
379 				    "Usage: -s <runtime seconds>. Default is %d seconds.\n"
380 				    "-m adds calls to migrate_pages while vCPUs are running."
381 				    " Default is no migrations.\n"
382 				    "-d <delay microseconds> - delay between migrate_pages() calls."
383 				    " Default is %d microseconds.",
384 				    DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
385 		}
386 	}
387 }
388 
main(int argc,char * argv[])389 int main(int argc, char *argv[])
390 {
391 	int r;
392 	int wait_secs;
393 	const int max_halter_wait = 10;
394 	int run_secs = 0;
395 	int delay_usecs = 0;
396 	struct test_data_page *data;
397 	vm_vaddr_t test_data_page_vaddr;
398 	bool migrate = false;
399 	pthread_t threads[2];
400 	struct thread_params params[2];
401 	struct kvm_vm *vm;
402 	uint64_t *pipis_rcvd;
403 
404 	get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
405 	if (run_secs <= 0)
406 		run_secs = DEFAULT_RUN_SECS;
407 	if (delay_usecs <= 0)
408 		delay_usecs = DEFAULT_DELAY_USECS;
409 
410 	vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
411 
412 	vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
413 
414 	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
415 
416 	params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
417 
418 	test_data_page_vaddr = vm_vaddr_alloc_page(vm);
419 	data = addr_gva2hva(vm, test_data_page_vaddr);
420 	memset(data, 0, sizeof(*data));
421 	params[0].data = data;
422 	params[1].data = data;
423 
424 	vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
425 	vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
426 
427 	pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
428 	params[0].pipis_rcvd = pipis_rcvd;
429 	params[1].pipis_rcvd = pipis_rcvd;
430 
431 	/* Start halter vCPU thread and wait for it to execute first HLT. */
432 	r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
433 	TEST_ASSERT(r == 0,
434 		    "pthread_create halter failed errno=%d", errno);
435 	fprintf(stderr, "Halter vCPU thread started\n");
436 
437 	wait_secs = 0;
438 	while ((wait_secs < max_halter_wait) && !data->hlt_count) {
439 		sleep(1);
440 		wait_secs++;
441 	}
442 
443 	TEST_ASSERT(data->hlt_count,
444 		    "Halter vCPU did not execute first HLT within %d seconds",
445 		    max_halter_wait);
446 
447 	fprintf(stderr,
448 		"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
449 		data->halter_apic_id, wait_secs);
450 
451 	r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
452 	TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
453 
454 	fprintf(stderr,
455 		"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
456 		run_secs);
457 
458 	if (!migrate)
459 		sleep(run_secs);
460 	else
461 		do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
462 
463 	/*
464 	 * Cancel threads and wait for them to stop.
465 	 */
466 	cancel_join_vcpu_thread(threads[0], params[0].vcpu);
467 	cancel_join_vcpu_thread(threads[1], params[1].vcpu);
468 
469 	/*
470 	 * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
471 	 * then the number of HLT exits may be less than the number of HLTs
472 	 * that were executed, as Idle HLT elides the exit if the vCPU has an
473 	 * unmasked, pending IRQ (or NMI).
474 	 */
475 	if (this_cpu_has(X86_FEATURE_IDLE_HLT))
476 		TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
477 			    "HLT insns = %lu, HLT exits = %lu",
478 			    data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
479 	else
480 		TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
481 
482 	fprintf(stderr,
483 		"Test successful after running for %d seconds.\n"
484 		"Sending vCPU sent %lu IPIs to halting vCPU\n"
485 		"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
486 		"Halter APIC ID=%#x\n"
487 		"Sender ICR value=%#x ICR2 value=%#x\n"
488 		"Halter TPR=%#x PPR=%#x LVR=%#x\n"
489 		"Migrations attempted: %lu\n"
490 		"Migrations completed: %lu\n",
491 		run_secs, data->ipis_sent,
492 		data->hlt_count, data->wake_count, *pipis_rcvd,
493 		data->halter_apic_id,
494 		data->icr, data->icr2,
495 		data->halter_tpr, data->halter_ppr, data->halter_lvr,
496 		data->migrations_attempted, data->migrations_completed);
497 
498 	kvm_vm_free(vm);
499 
500 	return 0;
501 }
502