// SPDX-License-Identifier: GPL-2.0-only /* * Test some powerpc instructions * * Copyright 2024 Nicholas Piggin, IBM Corp. */ #include #include #include #include #include #include #include #include #include static bool do_migrate; static bool do_record; #define RSV_SIZE 128 static uint8_t granule[RSV_SIZE] __attribute((__aligned__(RSV_SIZE))); static void spin_lock(unsigned int *lock) { unsigned int old; asm volatile ("1:" "lwarx %0,0,%2;" "cmpwi %0,0;" "bne 1b;" "stwcx. %1,0,%2;" "bne- 1b;" "lwsync;" : "=&r"(old) : "r"(1), "r"(lock) : "cr0", "memory"); } static void spin_unlock(unsigned int *lock) { asm volatile("lwsync;" "stw %1,%0;" : "+m"(*lock) : "r"(0) : "memory"); } static volatile bool got_interrupt; static volatile struct pt_regs recorded_regs; static void interrupt_handler(struct pt_regs *regs, void *opaque) { assert(!got_interrupt); got_interrupt = true; memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs)); regs_advance_insn(regs); } static void test_lwarx_stwcx(int argc, char *argv[]) { unsigned int *var = (unsigned int *)granule; unsigned int old; unsigned int result; *var = 0; asm volatile ("1:" "lwarx %0,0,%2;" "stwcx. %1,0,%2;" "bne- 1b;" : "=&r"(old) : "r"(1), "r"(var) : "cr0", "memory"); report(old == 0 && *var == 1, "simple update"); *var = 0; asm volatile ("li %0,0;" "stwcx. %1,0,%2;" "stwcx. %1,0,%2;" "bne- 1f;" "li %0,1;" "1:" : "=&r"(result) : "r"(1), "r"(var) : "cr0", "memory"); report(result == 0 && *var == 0, "failed stwcx. (no reservation)"); *var = 0; asm volatile ("li %0,0;" "lwarx %1,0,%4;" "stw %3,0(%4);" "stwcx. %2,0,%4;" "bne- 1f;" "li %0,1;" "1:" : "=&r"(result), "=&r"(old) : "r"(1), "r"(2), "r"(var) : "cr0", "memory"); /* This is implementation specific, so don't fail */ if (result == 0 && *var == 2) report(true, "failed stwcx. (intervening store)"); else report(true, "succeeded stwcx. (intervening store)"); handle_exception(0x600, interrupt_handler, NULL); handle_exception(0x700, interrupt_handler, NULL); /* Implementations may not necessarily invoke the alignment interrupt */ old = 10; *var = 0; asm volatile ( "lwarx %0,0,%1;" : "+&r"(old) : "r"((char *)var + 1)); report(old == 10 && got_interrupt && recorded_regs.trap == 0x600, "unaligned lwarx causes fault"); got_interrupt = false; /* * Unaligned stwcx. is more difficult to test, at least under QEMU, * the store does not proceed if there is no matching reservation, so * the alignment handler does not get invoked. This is okay according * to the Power ISA (unalignment does not necessarily invoke the * alignment interrupt). But POWER CPUs do cause alignment interrupt. */ *var = 0; asm volatile ( "lwarx %0,0,%2;" "stwcx. %1,0,%3;" : "=&r"(old) : "r"(1), "r"(var), "r"((char *)var+1) : "cr0", "memory"); /* * An unaligned larx/stcx. is not required by the ISA to cause an * exception, and in TCG the stcx does not though it does on POWER CPUs. */ report_kfail(host_is_tcg, old == 0 && *var == 0 && got_interrupt && recorded_regs.trap == 0x600, "unaligned stwcx. causes fault"); got_interrupt = false; handle_exception(0x600, NULL, NULL); } static void test_lqarx_stqcx(int argc, char *argv[]) { union { __int128_t var; struct { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ unsigned long var1; unsigned long var2; #else unsigned long var2; unsigned long var1; #endif }; } var __attribute__((aligned(16))); register unsigned long new1 asm("r8"); register unsigned long new2 asm("r9"); register unsigned long old1 asm("r10"); register unsigned long old2 asm("r11"); unsigned int result; var.var1 = 1; var.var2 = 2; (void)new2; (void)old2; old1 = 0; old2 = 0; new1 = 3; new2 = 4; asm volatile ("1:" "lqarx %0,0,%4;" "stqcx. %2,0,%4;" "bne- 1b;" : "=&r"(old1), "=&r"(old2) : "r"(new1), "r"(new2), "r"(&var) : "cr0", "memory"); report(old1 == 2 && old2 == 1 && var.var1 == 4 && var.var2 == 3, "simple update"); var.var1 = 1; var.var2 = 2; new1 = 3; new2 = 4; asm volatile ("li %0,0;" "stqcx. %1,0,%3;" "stqcx. %1,0,%3;" "bne- 1f;" "li %0,1;" "1:" : "=&r"(result) : "r"(new1), "r"(new2), "r"(&var) : "cr0", "memory"); report(result == 0 && var.var1 == 1 && var.var2 == 2, "failed stqcx. (no reservation)"); var.var1 = 1; var.var2 = 2; new1 = 3; new2 = 4; asm volatile ("li %0,0;" "lqarx %1,0,%6;" "std %5,0(%6);" "stqcx. %3,0,%6;" "bne- 1f;" "li %0,1;" "1:" : "=&r"(result), "=&r"(old1), "=&r"(old2) : "r"(new1), "r"(new2), "r"(0), "r"(&var) : "cr0", "memory"); /* This is implementation specific, so don't fail */ if (result == 0 && (var.var1 == 0 || var.var2 == 0)) report(true, "failed stqcx. (intervening store)"); else report(true, "succeeded stqcx. (intervening store)"); } static void test_migrate_reserve(int argc, char *argv[]) { unsigned int *var = (unsigned int *)granule; unsigned int old; int i; int succeed = 0; if (!do_migrate) return; for (i = 0; i < 10; i++) { *var = 0x12345; asm volatile ("lwarx %0,0,%1" : "=&r"(old) : "r"(var) : "memory"); migrate_quiet(); asm volatile ("stwcx. %0,0,%1" : : "r"(0xf00d), "r"(var) : "cr0", "memory"); if (*var == 0xf00d) succeed++; } if (do_record) { /* * Running under TCG record-replay, reservations must not * be lost by migration */ report(succeed > 0, "migrated reservation is not lost"); } else { report(succeed == 0, "migrated reservation is lost"); } report_prefix_pop(); } #define ITERS 10000000 static int test_counter = 0; static void test_inc_perf(int argc, char *argv[]) { int i; uint64_t tb1, tb2; tb1 = get_tb(); for (i = 0; i < ITERS; i++) __atomic_fetch_add(&test_counter, 1, __ATOMIC_RELAXED); tb2 = get_tb(); report(true, "atomic add takes %ldns", (tb2 - tb1) * 1000000000 / ITERS / tb_hz); tb1 = get_tb(); for (i = 0; i < ITERS; i++) __atomic_fetch_add(&test_counter, 1, __ATOMIC_SEQ_CST); tb2 = get_tb(); report(true, "sequentially conssistent atomic add takes %ldns", (tb2 - tb1) * 1000000000 / ITERS / tb_hz); } static long smp_inc_counter = 0; static int smp_inc_started; static void smp_inc_fn(int cpu_id) { long i; atomic_fetch_inc(&smp_inc_started); while (smp_inc_started < nr_cpus_present) cpu_relax(); for (i = 0; i < ITERS; i++) atomic_fetch_inc(&smp_inc_counter); atomic_fetch_dec(&smp_inc_started); } static void test_smp_inc(int argc, char **argv) { if (nr_cpus_present < 2) return; if (!start_all_cpus(smp_inc_fn)) report_abort("Failed to start secondary cpus"); while (smp_inc_started < nr_cpus_present - 1) cpu_relax(); smp_inc_fn(smp_processor_id()); while (smp_inc_started > 0) cpu_relax(); stop_all_cpus(); report(smp_inc_counter == nr_cpus_present * ITERS, "counter lost no increments"); } static long smp_lock_counter __attribute__((aligned(128))) = 0; static unsigned int smp_lock __attribute__((aligned(128))); static int smp_lock_started; static void smp_lock_fn(int cpu_id) { long i; atomic_fetch_inc(&smp_lock_started); while (smp_lock_started < nr_cpus_present) cpu_relax(); for (i = 0; i < ITERS; i++) { spin_lock(&smp_lock); smp_lock_counter++; spin_unlock(&smp_lock); } atomic_fetch_dec(&smp_lock_started); } static void test_smp_lock(int argc, char **argv) { if (nr_cpus_present < 2) return; if (!start_all_cpus(smp_lock_fn)) report_abort("Failed to start secondary cpus"); while (smp_lock_started < nr_cpus_present - 1) cpu_relax(); smp_lock_fn(smp_processor_id()); while (smp_lock_started > 0) cpu_relax(); stop_all_cpus(); report(smp_lock_counter == nr_cpus_present * ITERS, "counter lost no increments"); } struct { const char *name; void (*func)(int argc, char **argv); } hctests[] = { { "lwarx/stwcx", test_lwarx_stwcx }, { "lqarx/stqcx", test_lqarx_stqcx }, { "migration", test_migrate_reserve }, { "performance", test_inc_perf }, { "SMP-atomic", test_smp_inc }, { "SMP-lock", test_smp_lock }, { NULL, NULL } }; int main(int argc, char **argv) { int i; int all; all = argc == 1 || !strcmp(argv[1], "all"); for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-r") == 0) { do_record = true; } if (strcmp(argv[i], "-m") == 0) { do_migrate = true; } } report_prefix_push("atomics"); for (i = 0; hctests[i].name != NULL; i++) { if (all || strcmp(argv[1], hctests[i].name) == 0) { report_prefix_push(hctests[i].name); hctests[i].func(argc, argv); report_prefix_pop(); } } report_prefix_pop(); return report_summary(); }