1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Test some powerpc instructions 4 * 5 * Copyright 2024 Nicholas Piggin, IBM Corp. 6 */ 7 #include <stdint.h> 8 #include <libcflat.h> 9 #include <migrate.h> 10 #include <asm/processor.h> 11 #include <asm/time.h> 12 #include <asm/atomic.h> 13 #include <asm/setup.h> 14 #include <asm/barrier.h> 15 #include <asm/smp.h> 16 17 static bool do_migrate; 18 static bool do_record; 19 20 #define RSV_SIZE 128 21 22 static uint8_t granule[RSV_SIZE] __attribute((__aligned__(RSV_SIZE))); 23 24 static void spin_lock(unsigned int *lock) 25 { 26 unsigned int old; 27 28 asm volatile ("1:" 29 "lwarx %0,0,%2;" 30 "cmpwi %0,0;" 31 "bne 1b;" 32 "stwcx. %1,0,%2;" 33 "bne- 1b;" 34 "lwsync;" 35 : "=&r"(old) : "r"(1), "r"(lock) : "cr0", "memory"); 36 } 37 38 static void spin_unlock(unsigned int *lock) 39 { 40 asm volatile("lwsync;" 41 "stw %1,%0;" 42 : "+m"(*lock) : "r"(0) : "memory"); 43 } 44 45 static volatile bool got_interrupt; 46 static volatile struct pt_regs recorded_regs; 47 48 static void interrupt_handler(struct pt_regs *regs, void *opaque) 49 { 50 assert(!got_interrupt); 51 got_interrupt = true; 52 memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs)); 53 regs_advance_insn(regs); 54 } 55 56 static void test_lwarx_stwcx(int argc, char *argv[]) 57 { 58 unsigned int *var = (unsigned int *)granule; 59 unsigned int old; 60 unsigned int result; 61 62 *var = 0; 63 asm volatile ("1:" 64 "lwarx %0,0,%2;" 65 "stwcx. %1,0,%2;" 66 "bne- 1b;" 67 : "=&r"(old) : "r"(1), "r"(var) : "cr0", "memory"); 68 report(old == 0 && *var == 1, "simple update"); 69 70 *var = 0; 71 asm volatile ("li %0,0;" 72 "stwcx. %1,0,%2;" 73 "stwcx. %1,0,%2;" 74 "bne- 1f;" 75 "li %0,1;" 76 "1:" 77 : "=&r"(result) 78 : "r"(1), "r"(var) : "cr0", "memory"); 79 report(result == 0 && *var == 0, "failed stwcx. (no reservation)"); 80 81 *var = 0; 82 asm volatile ("li %0,0;" 83 "lwarx %1,0,%4;" 84 "stw %3,0(%4);" 85 "stwcx. %2,0,%4;" 86 "bne- 1f;" 87 "li %0,1;" 88 "1:" 89 : "=&r"(result), "=&r"(old) 90 : "r"(1), "r"(2), "r"(var) : "cr0", "memory"); 91 /* This is implementation specific, so don't fail */ 92 if (result == 0 && *var == 2) 93 report(true, "failed stwcx. (intervening store)"); 94 else 95 report(true, "succeeded stwcx. (intervening store)"); 96 97 handle_exception(0x600, interrupt_handler, NULL); 98 handle_exception(0x700, interrupt_handler, NULL); 99 100 /* Implementations may not necessarily invoke the alignment interrupt */ 101 old = 10; 102 *var = 0; 103 asm volatile ( 104 "lwarx %0,0,%1;" 105 : "+&r"(old) : "r"((char *)var + 1)); 106 report(old == 10 && got_interrupt && recorded_regs.trap == 0x600, 107 "unaligned lwarx causes fault"); 108 got_interrupt = false; 109 110 /* 111 * Unaligned stwcx. is more difficult to test, at least under QEMU, 112 * the store does not proceed if there is no matching reservation, so 113 * the alignment handler does not get invoked. This is okay according 114 * to the Power ISA (unalignment does not necessarily invoke the 115 * alignment interrupt). But POWER CPUs do cause alignment interrupt. 116 */ 117 *var = 0; 118 asm volatile ( 119 "lwarx %0,0,%2;" 120 "stwcx. %1,0,%3;" 121 : "=&r"(old) : "r"(1), "r"(var), "r"((char *)var+1) 122 : "cr0", "memory"); 123 /* 124 * An unaligned larx/stcx. is not required by the ISA to cause an 125 * exception, and in TCG the stcx does not though it does on POWER CPUs. 126 */ 127 report_kfail(host_is_tcg, old == 0 && *var == 0 && 128 got_interrupt && recorded_regs.trap == 0x600, 129 "unaligned stwcx. causes fault"); 130 got_interrupt = false; 131 132 handle_exception(0x600, NULL, NULL); 133 134 } 135 136 static void test_lqarx_stqcx(int argc, char *argv[]) 137 { 138 union { 139 __int128_t var; 140 struct { 141 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 142 unsigned long var1; 143 unsigned long var2; 144 #else 145 unsigned long var2; 146 unsigned long var1; 147 #endif 148 }; 149 } var __attribute__((aligned(16))); 150 register unsigned long new1 asm("r8"); 151 register unsigned long new2 asm("r9"); 152 register unsigned long old1 asm("r10"); 153 register unsigned long old2 asm("r11"); 154 unsigned int result; 155 156 var.var1 = 1; 157 var.var2 = 2; 158 159 (void)new2; 160 (void)old2; 161 162 old1 = 0; 163 old2 = 0; 164 new1 = 3; 165 new2 = 4; 166 asm volatile ("1:" 167 "lqarx %0,0,%4;" 168 "stqcx. %2,0,%4;" 169 "bne- 1b;" 170 : "=&r"(old1), "=&r"(old2) 171 : "r"(new1), "r"(new2), "r"(&var) 172 : "cr0", "memory"); 173 174 report(old1 == 2 && old2 == 1 && var.var1 == 4 && var.var2 == 3, 175 "simple update"); 176 177 var.var1 = 1; 178 var.var2 = 2; 179 new1 = 3; 180 new2 = 4; 181 asm volatile ("li %0,0;" 182 "stqcx. %1,0,%3;" 183 "stqcx. %1,0,%3;" 184 "bne- 1f;" 185 "li %0,1;" 186 "1:" 187 : "=&r"(result) 188 : "r"(new1), "r"(new2), "r"(&var) 189 : "cr0", "memory"); 190 report(result == 0 && var.var1 == 1 && var.var2 == 2, 191 "failed stqcx. (no reservation)"); 192 193 var.var1 = 1; 194 var.var2 = 2; 195 new1 = 3; 196 new2 = 4; 197 asm volatile ("li %0,0;" 198 "lqarx %1,0,%6;" 199 "std %5,0(%6);" 200 "stqcx. %3,0,%6;" 201 "bne- 1f;" 202 "li %0,1;" 203 "1:" 204 : "=&r"(result), "=&r"(old1), "=&r"(old2) 205 : "r"(new1), "r"(new2), "r"(0), "r"(&var) 206 : "cr0", "memory"); 207 /* This is implementation specific, so don't fail */ 208 if (result == 0 && (var.var1 == 0 || var.var2 == 0)) 209 report(true, "failed stqcx. (intervening store)"); 210 else 211 report(true, "succeeded stqcx. (intervening store)"); 212 } 213 214 static void test_migrate_reserve(int argc, char *argv[]) 215 { 216 unsigned int *var = (unsigned int *)granule; 217 unsigned int old; 218 int i; 219 int succeed = 0; 220 221 if (!do_migrate) 222 return; 223 224 for (i = 0; i < 10; i++) { 225 *var = 0x12345; 226 asm volatile ("lwarx %0,0,%1" : "=&r"(old) : "r"(var) : "memory"); 227 migrate_quiet(); 228 asm volatile ("stwcx. %0,0,%1" : : "r"(0xf00d), "r"(var) : "cr0", "memory"); 229 if (*var == 0xf00d) 230 succeed++; 231 } 232 233 if (do_record) { 234 /* 235 * Running under TCG record-replay, reservations must not 236 * be lost by migration 237 */ 238 report(succeed > 0, "migrated reservation is not lost"); 239 } else { 240 report(succeed == 0, "migrated reservation is lost"); 241 } 242 243 report_prefix_pop(); 244 } 245 246 #define ITERS 10000000 247 static int test_counter = 0; 248 static void test_inc_perf(int argc, char *argv[]) 249 { 250 int i; 251 uint64_t tb1, tb2; 252 253 tb1 = get_tb(); 254 for (i = 0; i < ITERS; i++) 255 __atomic_fetch_add(&test_counter, 1, __ATOMIC_RELAXED); 256 tb2 = get_tb(); 257 report(true, "atomic add takes %ldns", 258 (tb2 - tb1) * 1000000000 / ITERS / tb_hz); 259 260 tb1 = get_tb(); 261 for (i = 0; i < ITERS; i++) 262 __atomic_fetch_add(&test_counter, 1, __ATOMIC_SEQ_CST); 263 tb2 = get_tb(); 264 report(true, "sequentially conssistent atomic add takes %ldns", 265 (tb2 - tb1) * 1000000000 / ITERS / tb_hz); 266 } 267 268 static long smp_inc_counter = 0; 269 static int smp_inc_started; 270 271 static void smp_inc_fn(int cpu_id) 272 { 273 long i; 274 275 atomic_fetch_inc(&smp_inc_started); 276 while (smp_inc_started < nr_cpus_present) 277 cpu_relax(); 278 279 for (i = 0; i < ITERS; i++) 280 atomic_fetch_inc(&smp_inc_counter); 281 atomic_fetch_dec(&smp_inc_started); 282 } 283 284 static void test_smp_inc(int argc, char **argv) 285 { 286 if (nr_cpus_present < 2) 287 return; 288 289 if (!start_all_cpus(smp_inc_fn)) 290 report_abort("Failed to start secondary cpus"); 291 292 while (smp_inc_started < nr_cpus_present - 1) 293 cpu_relax(); 294 smp_inc_fn(smp_processor_id()); 295 while (smp_inc_started > 0) 296 cpu_relax(); 297 298 stop_all_cpus(); 299 300 report(smp_inc_counter == nr_cpus_present * ITERS, 301 "counter lost no increments"); 302 } 303 304 static long smp_lock_counter __attribute__((aligned(128))) = 0; 305 static unsigned int smp_lock __attribute__((aligned(128))); 306 static int smp_lock_started; 307 308 static void smp_lock_fn(int cpu_id) 309 { 310 long i; 311 312 atomic_fetch_inc(&smp_lock_started); 313 while (smp_lock_started < nr_cpus_present) 314 cpu_relax(); 315 316 for (i = 0; i < ITERS; i++) { 317 spin_lock(&smp_lock); 318 smp_lock_counter++; 319 spin_unlock(&smp_lock); 320 } 321 atomic_fetch_dec(&smp_lock_started); 322 } 323 324 static void test_smp_lock(int argc, char **argv) 325 { 326 if (nr_cpus_present < 2) 327 return; 328 329 if (!start_all_cpus(smp_lock_fn)) 330 report_abort("Failed to start secondary cpus"); 331 332 while (smp_lock_started < nr_cpus_present - 1) 333 cpu_relax(); 334 smp_lock_fn(smp_processor_id()); 335 while (smp_lock_started > 0) 336 cpu_relax(); 337 338 stop_all_cpus(); 339 340 report(smp_lock_counter == nr_cpus_present * ITERS, 341 "counter lost no increments"); 342 } 343 344 struct { 345 const char *name; 346 void (*func)(int argc, char **argv); 347 } hctests[] = { 348 { "lwarx/stwcx", test_lwarx_stwcx }, 349 { "lqarx/stqcx", test_lqarx_stqcx }, 350 { "migration", test_migrate_reserve }, 351 { "performance", test_inc_perf }, 352 { "SMP-atomic", test_smp_inc }, 353 { "SMP-lock", test_smp_lock }, 354 { NULL, NULL } 355 }; 356 357 int main(int argc, char **argv) 358 { 359 int i; 360 int all; 361 362 all = argc == 1 || !strcmp(argv[1], "all"); 363 364 for (i = 1; i < argc; i++) { 365 if (strcmp(argv[i], "-r") == 0) { 366 do_record = true; 367 } 368 if (strcmp(argv[i], "-m") == 0) { 369 do_migrate = true; 370 } 371 } 372 373 report_prefix_push("atomics"); 374 375 for (i = 0; hctests[i].name != NULL; i++) { 376 if (all || strcmp(argv[1], hctests[i].name) == 0) { 377 report_prefix_push(hctests[i].name); 378 hctests[i].func(argc, argv); 379 report_prefix_pop(); 380 } 381 } 382 383 report_prefix_pop(); 384 385 return report_summary(); 386 } 387