1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Test some powerpc instructions
4 *
5 * Copyright 2024 Nicholas Piggin, IBM Corp.
6 */
7 #include <stdint.h>
8 #include <libcflat.h>
9 #include <migrate.h>
10 #include <asm/processor.h>
11 #include <asm/time.h>
12 #include <asm/atomic.h>
13 #include <asm/setup.h>
14 #include <asm/barrier.h>
15 #include <asm/smp.h>
16
17 static bool do_migrate;
18 static bool do_record;
19
20 #define RSV_SIZE 128
21
22 static uint8_t granule[RSV_SIZE] __attribute((__aligned__(RSV_SIZE)));
23
spin_lock(unsigned int * lock)24 static void spin_lock(unsigned int *lock)
25 {
26 unsigned int old;
27
28 asm volatile ("1:"
29 "lwarx %0,0,%2;"
30 "cmpwi %0,0;"
31 "bne 1b;"
32 "stwcx. %1,0,%2;"
33 "bne- 1b;"
34 "lwsync;"
35 : "=&r"(old) : "r"(1), "r"(lock) : "cr0", "memory");
36 }
37
spin_unlock(unsigned int * lock)38 static void spin_unlock(unsigned int *lock)
39 {
40 asm volatile("lwsync;"
41 "stw %1,%0;"
42 : "+m"(*lock) : "r"(0) : "memory");
43 }
44
45 static volatile bool got_interrupt;
46 static volatile struct pt_regs recorded_regs;
47
interrupt_handler(struct pt_regs * regs,void * opaque)48 static void interrupt_handler(struct pt_regs *regs, void *opaque)
49 {
50 assert(!got_interrupt);
51 got_interrupt = true;
52 memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs));
53 regs_advance_insn(regs);
54 }
55
test_lwarx_stwcx(int argc,char * argv[])56 static void test_lwarx_stwcx(int argc, char *argv[])
57 {
58 unsigned int *var = (unsigned int *)granule;
59 unsigned int old;
60 unsigned int result;
61
62 *var = 0;
63 asm volatile ("1:"
64 "lwarx %0,0,%2;"
65 "stwcx. %1,0,%2;"
66 "bne- 1b;"
67 : "=&r"(old) : "r"(1), "r"(var) : "cr0", "memory");
68 report(old == 0 && *var == 1, "simple update");
69
70 *var = 0;
71 asm volatile ("li %0,0;"
72 "stwcx. %1,0,%2;"
73 "stwcx. %1,0,%2;"
74 "bne- 1f;"
75 "li %0,1;"
76 "1:"
77 : "=&r"(result)
78 : "r"(1), "r"(var) : "cr0", "memory");
79 report(result == 0 && *var == 0, "failed stwcx. (no reservation)");
80
81 *var = 0;
82 asm volatile ("li %0,0;"
83 "lwarx %1,0,%4;"
84 "stw %3,0(%4);"
85 "stwcx. %2,0,%4;"
86 "bne- 1f;"
87 "li %0,1;"
88 "1:"
89 : "=&r"(result), "=&r"(old)
90 : "r"(1), "r"(2), "r"(var) : "cr0", "memory");
91 /* This is implementation specific, so don't fail */
92 if (result == 0 && *var == 2)
93 report(true, "failed stwcx. (intervening store)");
94 else
95 report(true, "succeeded stwcx. (intervening store)");
96
97 handle_exception(0x600, interrupt_handler, NULL);
98 handle_exception(0x700, interrupt_handler, NULL);
99
100 /* Implementations may not necessarily invoke the alignment interrupt */
101 old = 10;
102 *var = 0;
103 asm volatile (
104 "lwarx %0,0,%1;"
105 : "+&r"(old) : "r"((char *)var + 1));
106 report(old == 10 && got_interrupt && recorded_regs.trap == 0x600,
107 "unaligned lwarx causes fault");
108 got_interrupt = false;
109
110 /*
111 * Unaligned stwcx. is more difficult to test, at least under QEMU,
112 * the store does not proceed if there is no matching reservation, so
113 * the alignment handler does not get invoked. This is okay according
114 * to the Power ISA (unalignment does not necessarily invoke the
115 * alignment interrupt). But POWER CPUs do cause alignment interrupt.
116 */
117 *var = 0;
118 asm volatile (
119 "lwarx %0,0,%2;"
120 "stwcx. %1,0,%3;"
121 : "=&r"(old) : "r"(1), "r"(var), "r"((char *)var+1)
122 : "cr0", "memory");
123 /*
124 * An unaligned larx/stcx. is not required by the ISA to cause an
125 * exception, and in TCG the stcx does not though it does on POWER CPUs.
126 */
127 report_kfail(host_is_tcg, old == 0 && *var == 0 &&
128 got_interrupt && recorded_regs.trap == 0x600,
129 "unaligned stwcx. causes fault");
130 got_interrupt = false;
131
132 handle_exception(0x600, NULL, NULL);
133
134 }
135
test_lqarx_stqcx(int argc,char * argv[])136 static void test_lqarx_stqcx(int argc, char *argv[])
137 {
138 union {
139 __int128_t var;
140 struct {
141 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
142 unsigned long var1;
143 unsigned long var2;
144 #else
145 unsigned long var2;
146 unsigned long var1;
147 #endif
148 };
149 } var __attribute__((aligned(16)));
150 register unsigned long new1 asm("r8");
151 register unsigned long new2 asm("r9");
152 register unsigned long old1 asm("r10");
153 register unsigned long old2 asm("r11");
154 unsigned int result;
155
156 var.var1 = 1;
157 var.var2 = 2;
158
159 (void)new2;
160 (void)old2;
161
162 old1 = 0;
163 old2 = 0;
164 new1 = 3;
165 new2 = 4;
166 asm volatile ("1:"
167 "lqarx %0,0,%4;"
168 "stqcx. %2,0,%4;"
169 "bne- 1b;"
170 : "=&r"(old1), "=&r"(old2)
171 : "r"(new1), "r"(new2), "r"(&var)
172 : "cr0", "memory");
173
174 report(old1 == 2 && old2 == 1 && var.var1 == 4 && var.var2 == 3,
175 "simple update");
176
177 var.var1 = 1;
178 var.var2 = 2;
179 new1 = 3;
180 new2 = 4;
181 asm volatile ("li %0,0;"
182 "stqcx. %1,0,%3;"
183 "stqcx. %1,0,%3;"
184 "bne- 1f;"
185 "li %0,1;"
186 "1:"
187 : "=&r"(result)
188 : "r"(new1), "r"(new2), "r"(&var)
189 : "cr0", "memory");
190 report(result == 0 && var.var1 == 1 && var.var2 == 2,
191 "failed stqcx. (no reservation)");
192
193 var.var1 = 1;
194 var.var2 = 2;
195 new1 = 3;
196 new2 = 4;
197 asm volatile ("li %0,0;"
198 "lqarx %1,0,%6;"
199 "std %5,0(%6);"
200 "stqcx. %3,0,%6;"
201 "bne- 1f;"
202 "li %0,1;"
203 "1:"
204 : "=&r"(result), "=&r"(old1), "=&r"(old2)
205 : "r"(new1), "r"(new2), "r"(0), "r"(&var)
206 : "cr0", "memory");
207 /* This is implementation specific, so don't fail */
208 if (result == 0 && (var.var1 == 0 || var.var2 == 0))
209 report(true, "failed stqcx. (intervening store)");
210 else
211 report(true, "succeeded stqcx. (intervening store)");
212 }
213
test_migrate_reserve(int argc,char * argv[])214 static void test_migrate_reserve(int argc, char *argv[])
215 {
216 unsigned int *var = (unsigned int *)granule;
217 unsigned int old;
218 int i;
219 int succeed = 0;
220
221 if (!do_migrate)
222 return;
223
224 for (i = 0; i < 10; i++) {
225 *var = 0x12345;
226 asm volatile ("lwarx %0,0,%1" : "=&r"(old) : "r"(var) : "memory");
227 migrate_quiet();
228 asm volatile ("stwcx. %0,0,%1" : : "r"(0xf00d), "r"(var) : "cr0", "memory");
229 if (*var == 0xf00d)
230 succeed++;
231 }
232
233 if (do_record) {
234 /*
235 * Running under TCG record-replay, reservations must not
236 * be lost by migration
237 */
238 report(succeed > 0, "migrated reservation is not lost");
239 } else {
240 report(succeed == 0, "migrated reservation is lost");
241 }
242
243 report_prefix_pop();
244 }
245
246 #define ITERS 10000000
247 static int test_counter = 0;
test_inc_perf(int argc,char * argv[])248 static void test_inc_perf(int argc, char *argv[])
249 {
250 int i;
251 uint64_t tb1, tb2;
252
253 tb1 = get_tb();
254 for (i = 0; i < ITERS; i++)
255 __atomic_fetch_add(&test_counter, 1, __ATOMIC_RELAXED);
256 tb2 = get_tb();
257 report(true, "atomic add takes %ldns",
258 (tb2 - tb1) * 1000000000 / ITERS / tb_hz);
259
260 tb1 = get_tb();
261 for (i = 0; i < ITERS; i++)
262 __atomic_fetch_add(&test_counter, 1, __ATOMIC_SEQ_CST);
263 tb2 = get_tb();
264 report(true, "sequentially conssistent atomic add takes %ldns",
265 (tb2 - tb1) * 1000000000 / ITERS / tb_hz);
266 }
267
268 static long smp_inc_counter = 0;
269 static int smp_inc_started;
270
smp_inc_fn(int cpu_id)271 static void smp_inc_fn(int cpu_id)
272 {
273 long i;
274
275 atomic_fetch_inc(&smp_inc_started);
276 while (smp_inc_started < nr_cpus_present)
277 cpu_relax();
278
279 for (i = 0; i < ITERS; i++)
280 atomic_fetch_inc(&smp_inc_counter);
281 atomic_fetch_dec(&smp_inc_started);
282 }
283
test_smp_inc(int argc,char ** argv)284 static void test_smp_inc(int argc, char **argv)
285 {
286 if (nr_cpus_present < 2)
287 return;
288
289 if (!start_all_cpus(smp_inc_fn))
290 report_abort("Failed to start secondary cpus");
291
292 while (smp_inc_started < nr_cpus_present - 1)
293 cpu_relax();
294 smp_inc_fn(smp_processor_id());
295 while (smp_inc_started > 0)
296 cpu_relax();
297
298 stop_all_cpus();
299
300 report(smp_inc_counter == nr_cpus_present * ITERS,
301 "counter lost no increments");
302 }
303
304 static long smp_lock_counter __attribute__((aligned(128))) = 0;
305 static unsigned int smp_lock __attribute__((aligned(128)));
306 static int smp_lock_started;
307
smp_lock_fn(int cpu_id)308 static void smp_lock_fn(int cpu_id)
309 {
310 long i;
311
312 atomic_fetch_inc(&smp_lock_started);
313 while (smp_lock_started < nr_cpus_present)
314 cpu_relax();
315
316 for (i = 0; i < ITERS; i++) {
317 spin_lock(&smp_lock);
318 smp_lock_counter++;
319 spin_unlock(&smp_lock);
320 }
321 atomic_fetch_dec(&smp_lock_started);
322 }
323
test_smp_lock(int argc,char ** argv)324 static void test_smp_lock(int argc, char **argv)
325 {
326 if (nr_cpus_present < 2)
327 return;
328
329 if (!start_all_cpus(smp_lock_fn))
330 report_abort("Failed to start secondary cpus");
331
332 while (smp_lock_started < nr_cpus_present - 1)
333 cpu_relax();
334 smp_lock_fn(smp_processor_id());
335 while (smp_lock_started > 0)
336 cpu_relax();
337
338 stop_all_cpus();
339
340 report(smp_lock_counter == nr_cpus_present * ITERS,
341 "counter lost no increments");
342 }
343
344 struct {
345 const char *name;
346 void (*func)(int argc, char **argv);
347 } hctests[] = {
348 { "lwarx/stwcx", test_lwarx_stwcx },
349 { "lqarx/stqcx", test_lqarx_stqcx },
350 { "migration", test_migrate_reserve },
351 { "performance", test_inc_perf },
352 { "SMP-atomic", test_smp_inc },
353 { "SMP-lock", test_smp_lock },
354 { NULL, NULL }
355 };
356
main(int argc,char ** argv)357 int main(int argc, char **argv)
358 {
359 int i;
360 int all;
361
362 all = argc == 1 || !strcmp(argv[1], "all");
363
364 for (i = 1; i < argc; i++) {
365 if (strcmp(argv[i], "-r") == 0) {
366 do_record = true;
367 }
368 if (strcmp(argv[i], "-m") == 0) {
369 do_migrate = true;
370 }
371 }
372
373 report_prefix_push("atomics");
374
375 for (i = 0; hctests[i].name != NULL; i++) {
376 if (all || strcmp(argv[1], hctests[i].name) == 0) {
377 report_prefix_push(hctests[i].name);
378 hctests[i].func(argc, argv);
379 report_prefix_pop();
380 }
381 }
382
383 report_prefix_pop();
384
385 return report_summary();
386 }
387