xref: /kvm-unit-tests/powerpc/atomics.c (revision 1422f720633622f15800590785bc4f1ce92d4452)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Test some powerpc instructions
4  *
5  * Copyright 2024 Nicholas Piggin, IBM Corp.
6  */
7 #include <stdint.h>
8 #include <libcflat.h>
9 #include <migrate.h>
10 #include <asm/processor.h>
11 #include <asm/time.h>
12 #include <asm/atomic.h>
13 #include <asm/setup.h>
14 #include <asm/barrier.h>
15 #include <asm/smp.h>
16 
17 static bool do_migrate;
18 static bool do_record;
19 
20 #define RSV_SIZE 128
21 
22 static uint8_t granule[RSV_SIZE] __attribute((__aligned__(RSV_SIZE)));
23 
spin_lock(unsigned int * lock)24 static void spin_lock(unsigned int *lock)
25 {
26 	unsigned int old;
27 
28 	asm volatile ("1:"
29 		      "lwarx	%0,0,%2;"
30 		      "cmpwi	%0,0;"
31 		      "bne	1b;"
32 		      "stwcx.	%1,0,%2;"
33 		      "bne-	1b;"
34 		      "lwsync;"
35 		      : "=&r"(old) : "r"(1), "r"(lock) : "cr0", "memory");
36 }
37 
spin_unlock(unsigned int * lock)38 static void spin_unlock(unsigned int *lock)
39 {
40 	asm volatile("lwsync;"
41 		     "stw	%1,%0;"
42 		     : "+m"(*lock) : "r"(0) : "memory");
43 }
44 
45 static volatile bool got_interrupt;
46 static volatile struct pt_regs recorded_regs;
47 
interrupt_handler(struct pt_regs * regs,void * opaque)48 static void interrupt_handler(struct pt_regs *regs, void *opaque)
49 {
50 	assert(!got_interrupt);
51 	got_interrupt = true;
52 	memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs));
53 	regs_advance_insn(regs);
54 }
55 
test_lwarx_stwcx(int argc,char * argv[])56 static void test_lwarx_stwcx(int argc, char *argv[])
57 {
58 	unsigned int *var = (unsigned int *)granule;
59 	unsigned int old;
60 	unsigned int result;
61 
62 	*var = 0;
63 	asm volatile ("1:"
64 		      "lwarx	%0,0,%2;"
65 		      "stwcx.	%1,0,%2;"
66 		      "bne-	1b;"
67 		      : "=&r"(old) : "r"(1), "r"(var) : "cr0", "memory");
68 	report(old == 0 && *var == 1, "simple update");
69 
70 	*var = 0;
71 	asm volatile ("li	%0,0;"
72 		      "stwcx.	%1,0,%2;"
73 		      "stwcx.	%1,0,%2;"
74 		      "bne-	1f;"
75 		      "li	%0,1;"
76 		      "1:"
77 		      : "=&r"(result)
78 		      : "r"(1), "r"(var) : "cr0", "memory");
79 	report(result == 0 && *var == 0, "failed stwcx. (no reservation)");
80 
81 	*var = 0;
82 	asm volatile ("li	%0,0;"
83 		      "lwarx	%1,0,%4;"
84 		      "stw	%3,0(%4);"
85 		      "stwcx.	%2,0,%4;"
86 		      "bne-	1f;"
87 		      "li	%0,1;"
88 		      "1:"
89 		      : "=&r"(result), "=&r"(old)
90 		      : "r"(1), "r"(2), "r"(var) : "cr0", "memory");
91 	/* This is implementation specific, so don't fail */
92 	if (result == 0 && *var == 2)
93 		report(true, "failed stwcx. (intervening store)");
94 	else
95 		report(true, "succeeded stwcx. (intervening store)");
96 
97 	handle_exception(0x600, interrupt_handler, NULL);
98 	handle_exception(0x700, interrupt_handler, NULL);
99 
100 	/* Implementations may not necessarily invoke the alignment interrupt */
101 	old = 10;
102 	*var = 0;
103 	asm volatile (
104 		      "lwarx	%0,0,%1;"
105 		      : "+&r"(old) : "r"((char *)var + 1));
106 	report(old == 10 && got_interrupt && recorded_regs.trap == 0x600,
107 	       "unaligned lwarx causes fault");
108 	got_interrupt = false;
109 
110 	/*
111 	 * Unaligned stwcx. is more difficult to test, at least under QEMU,
112 	 * the store does not proceed if there is no matching reservation, so
113 	 * the alignment handler does not get invoked. This is okay according
114 	 * to the Power ISA (unalignment does not necessarily invoke the
115 	 * alignment interrupt). But POWER CPUs do cause alignment interrupt.
116 	 */
117 	*var = 0;
118 	asm volatile (
119 		      "lwarx	%0,0,%2;"
120 		      "stwcx.	%1,0,%3;"
121 		      : "=&r"(old) : "r"(1), "r"(var), "r"((char *)var+1)
122 		      : "cr0", "memory");
123 	/*
124 	 * An unaligned larx/stcx. is not required by the ISA to cause an
125 	 * exception, and in TCG the stcx does not though it does on POWER CPUs.
126 	 */
127 	report_kfail(host_is_tcg, old == 0 && *var == 0 &&
128 				  got_interrupt && recorded_regs.trap == 0x600,
129 		     "unaligned stwcx. causes fault");
130 	got_interrupt = false;
131 
132 	handle_exception(0x600, NULL, NULL);
133 
134 }
135 
test_lqarx_stqcx(int argc,char * argv[])136 static void test_lqarx_stqcx(int argc, char *argv[])
137 {
138 	union {
139 		__int128_t var;
140 		struct {
141 #if  __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
142 			unsigned long var1;
143 			unsigned long var2;
144 #else
145 			unsigned long var2;
146 			unsigned long var1;
147 #endif
148 		};
149 	} var __attribute__((aligned(16)));
150 	register unsigned long new1 asm("r8");
151 	register unsigned long new2 asm("r9");
152 	register unsigned long old1 asm("r10");
153 	register unsigned long old2 asm("r11");
154 	unsigned int result;
155 
156 	var.var1 = 1;
157 	var.var2 = 2;
158 
159 	(void)new2;
160 	(void)old2;
161 
162 	old1 = 0;
163 	old2 = 0;
164 	new1 = 3;
165 	new2 = 4;
166 	asm volatile ("1:"
167 		      "lqarx	%0,0,%4;"
168 		      "stqcx.	%2,0,%4;"
169 		      "bne-	1b;"
170 		      : "=&r"(old1), "=&r"(old2)
171 		      : "r"(new1), "r"(new2), "r"(&var)
172 		      : "cr0", "memory");
173 
174 	report(old1 == 2 && old2 == 1 && var.var1 == 4 && var.var2 == 3,
175 	       "simple update");
176 
177 	var.var1 = 1;
178 	var.var2 = 2;
179 	new1 = 3;
180 	new2 = 4;
181 	asm volatile ("li	%0,0;"
182 		      "stqcx.	%1,0,%3;"
183 		      "stqcx.	%1,0,%3;"
184 		      "bne-	1f;"
185 		      "li	%0,1;"
186 		      "1:"
187 		      : "=&r"(result)
188 		      : "r"(new1), "r"(new2), "r"(&var)
189 		      : "cr0", "memory");
190 	report(result == 0 && var.var1 == 1 && var.var2 == 2,
191 	       "failed stqcx. (no reservation)");
192 
193 	var.var1 = 1;
194 	var.var2 = 2;
195 	new1 = 3;
196 	new2 = 4;
197 	asm volatile ("li	%0,0;"
198 		      "lqarx	%1,0,%6;"
199 		      "std	%5,0(%6);"
200 		      "stqcx.	%3,0,%6;"
201 		      "bne-	1f;"
202 		      "li	%0,1;"
203 		      "1:"
204 		      : "=&r"(result), "=&r"(old1), "=&r"(old2)
205 		      : "r"(new1), "r"(new2), "r"(0), "r"(&var)
206 		      : "cr0", "memory");
207 	/* This is implementation specific, so don't fail */
208 	if (result == 0 && (var.var1 == 0 || var.var2 == 0))
209 		report(true, "failed stqcx. (intervening store)");
210 	else
211 		report(true, "succeeded stqcx. (intervening store)");
212 }
213 
test_migrate_reserve(int argc,char * argv[])214 static void test_migrate_reserve(int argc, char *argv[])
215 {
216 	unsigned int *var = (unsigned int *)granule;
217 	unsigned int old;
218 	int i;
219 	int succeed = 0;
220 
221 	if (!do_migrate)
222 		return;
223 
224 	for (i = 0; i < 10; i++) {
225 		*var = 0x12345;
226 		asm volatile ("lwarx	%0,0,%1" : "=&r"(old) : "r"(var) : "memory");
227 		migrate_quiet();
228 		asm volatile ("stwcx.	%0,0,%1" : : "r"(0xf00d), "r"(var) : "cr0", "memory");
229 		if (*var == 0xf00d)
230 			succeed++;
231 	}
232 
233 	if (do_record) {
234 		/*
235 		 * Running under TCG record-replay, reservations must not
236 		 * be lost by migration
237 		 */
238 		report(succeed > 0, "migrated reservation is not lost");
239 	} else {
240 		report(succeed == 0, "migrated reservation is lost");
241 	}
242 
243 	report_prefix_pop();
244 }
245 
246 #define ITERS 10000000
247 static int test_counter = 0;
test_inc_perf(int argc,char * argv[])248 static void test_inc_perf(int argc, char *argv[])
249 {
250 	int i;
251 	uint64_t tb1, tb2;
252 
253 	tb1 = get_tb();
254 	for (i = 0; i < ITERS; i++)
255 		__atomic_fetch_add(&test_counter, 1, __ATOMIC_RELAXED);
256 	tb2 = get_tb();
257 	report(true, "atomic add takes %ldns",
258 		    (tb2 - tb1) * 1000000000 / ITERS / tb_hz);
259 
260 	tb1 = get_tb();
261 	for (i = 0; i < ITERS; i++)
262 		__atomic_fetch_add(&test_counter, 1, __ATOMIC_SEQ_CST);
263 	tb2 = get_tb();
264 	report(true, "sequentially conssistent atomic add takes %ldns",
265 	       (tb2 - tb1) * 1000000000 / ITERS / tb_hz);
266 }
267 
268 static long smp_inc_counter = 0;
269 static int smp_inc_started;
270 
smp_inc_fn(int cpu_id)271 static void smp_inc_fn(int cpu_id)
272 {
273 	long i;
274 
275 	atomic_fetch_inc(&smp_inc_started);
276 	while (smp_inc_started < nr_cpus_present)
277 		cpu_relax();
278 
279 	for (i = 0; i < ITERS; i++)
280 		atomic_fetch_inc(&smp_inc_counter);
281 	atomic_fetch_dec(&smp_inc_started);
282 }
283 
test_smp_inc(int argc,char ** argv)284 static void test_smp_inc(int argc, char **argv)
285 {
286 	if (nr_cpus_present < 2)
287 		return;
288 
289 	if (!start_all_cpus(smp_inc_fn))
290 		report_abort("Failed to start secondary cpus");
291 
292 	while (smp_inc_started < nr_cpus_present - 1)
293 		cpu_relax();
294 	smp_inc_fn(smp_processor_id());
295 	while (smp_inc_started > 0)
296 		cpu_relax();
297 
298 	stop_all_cpus();
299 
300 	report(smp_inc_counter == nr_cpus_present * ITERS,
301 	       "counter lost no increments");
302 }
303 
304 static long smp_lock_counter __attribute__((aligned(128))) = 0;
305 static unsigned int smp_lock __attribute__((aligned(128)));
306 static int smp_lock_started;
307 
smp_lock_fn(int cpu_id)308 static void smp_lock_fn(int cpu_id)
309 {
310 	long i;
311 
312 	atomic_fetch_inc(&smp_lock_started);
313 	while (smp_lock_started < nr_cpus_present)
314 		cpu_relax();
315 
316 	for (i = 0; i < ITERS; i++) {
317 		spin_lock(&smp_lock);
318 		smp_lock_counter++;
319 		spin_unlock(&smp_lock);
320 	}
321 	atomic_fetch_dec(&smp_lock_started);
322 }
323 
test_smp_lock(int argc,char ** argv)324 static void test_smp_lock(int argc, char **argv)
325 {
326 	if (nr_cpus_present < 2)
327 		return;
328 
329 	if (!start_all_cpus(smp_lock_fn))
330 		report_abort("Failed to start secondary cpus");
331 
332 	while (smp_lock_started < nr_cpus_present - 1)
333 		cpu_relax();
334 	smp_lock_fn(smp_processor_id());
335 	while (smp_lock_started > 0)
336 		cpu_relax();
337 
338 	stop_all_cpus();
339 
340 	report(smp_lock_counter == nr_cpus_present * ITERS,
341 	       "counter lost no increments");
342 }
343 
344 struct {
345 	const char *name;
346 	void (*func)(int argc, char **argv);
347 } hctests[] = {
348 	{ "lwarx/stwcx", test_lwarx_stwcx },
349 	{ "lqarx/stqcx", test_lqarx_stqcx },
350 	{ "migration", test_migrate_reserve },
351 	{ "performance", test_inc_perf },
352 	{ "SMP-atomic", test_smp_inc },
353 	{ "SMP-lock", test_smp_lock },
354 	{ NULL, NULL }
355 };
356 
main(int argc,char ** argv)357 int main(int argc, char **argv)
358 {
359 	int i;
360 	int all;
361 
362 	all = argc == 1 || !strcmp(argv[1], "all");
363 
364 	for (i = 1; i < argc; i++) {
365 		if (strcmp(argv[i], "-r") == 0) {
366 			do_record = true;
367 		}
368 		if (strcmp(argv[i], "-m") == 0) {
369 			do_migrate = true;
370 		}
371 	}
372 
373 	report_prefix_push("atomics");
374 
375 	for (i = 0; hctests[i].name != NULL; i++) {
376 		if (all || strcmp(argv[1], hctests[i].name) == 0) {
377 			report_prefix_push(hctests[i].name);
378 			hctests[i].func(argc, argv);
379 			report_prefix_pop();
380 		}
381 	}
382 
383 	report_prefix_pop();
384 
385 	return report_summary();
386 }
387