1a0c9400aSSong Gao /* SPDX-License-Identifier: GPL-2.0-or-later */ 2a0c9400aSSong Gao /* 31dc33f26SSong Gao * QEMU LoongArch vector helper functions. 4a0c9400aSSong Gao * 5a0c9400aSSong Gao * Copyright (c) 2022-2023 Loongson Technology Corporation Limited 6a0c9400aSSong Gao */ 7c037fbc9SSong Gao 8c037fbc9SSong Gao #include "qemu/osdep.h" 9c037fbc9SSong Gao #include "cpu.h" 10c037fbc9SSong Gao #include "exec/exec-all.h" 11c037fbc9SSong Gao #include "exec/helper-proto.h" 12aca67472SSong Gao #include "fpu/softfloat.h" 13aca67472SSong Gao #include "internals.h" 14d0dfa19aSSong Gao #include "tcg/tcg.h" 15c037fbc9SSong Gao 16c037fbc9SSong Gao #define DO_ADD(a, b) (a + b) 17c037fbc9SSong Gao #define DO_SUB(a, b) (a - b) 18c037fbc9SSong Gao 19c037fbc9SSong Gao #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ 20c037fbc9SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 21c037fbc9SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 22c037fbc9SSong Gao { \ 23c037fbc9SSong Gao int i; \ 24c037fbc9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 25c037fbc9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 26c037fbc9SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 27c037fbc9SSong Gao typedef __typeof(Vd->E1(0)) TD; \ 28c037fbc9SSong Gao \ 29c037fbc9SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 30c037fbc9SSong Gao Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ 31c037fbc9SSong Gao } \ 32c037fbc9SSong Gao } 33c037fbc9SSong Gao 34c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) 35c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) 36c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) 37c037fbc9SSong Gao 38c037fbc9SSong Gao void HELPER(vhaddw_q_d)(CPULoongArchState *env, 39c037fbc9SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 40c037fbc9SSong Gao { 41c037fbc9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 42c037fbc9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 43c037fbc9SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 44c037fbc9SSong Gao 45c037fbc9SSong Gao Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); 46c037fbc9SSong Gao } 47c037fbc9SSong Gao 48c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) 49c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) 50c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) 51c037fbc9SSong Gao 52c037fbc9SSong Gao void HELPER(vhsubw_q_d)(CPULoongArchState *env, 53c037fbc9SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 54c037fbc9SSong Gao { 55c037fbc9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 56c037fbc9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 57c037fbc9SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 58c037fbc9SSong Gao 59c037fbc9SSong Gao Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0))); 60c037fbc9SSong Gao } 61c037fbc9SSong Gao 62c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) 63c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) 64c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) 65c037fbc9SSong Gao 66c037fbc9SSong Gao void HELPER(vhaddw_qu_du)(CPULoongArchState *env, 67c037fbc9SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 68c037fbc9SSong Gao { 69c037fbc9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 70c037fbc9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 71c037fbc9SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 72c037fbc9SSong Gao 73c037fbc9SSong Gao Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), 74c037fbc9SSong Gao int128_make64((uint64_t)Vk->D(0))); 75c037fbc9SSong Gao } 76c037fbc9SSong Gao 77c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) 78c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) 79c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) 80c037fbc9SSong Gao 81c037fbc9SSong Gao void HELPER(vhsubw_qu_du)(CPULoongArchState *env, 82c037fbc9SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 83c037fbc9SSong Gao { 84c037fbc9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 85c037fbc9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 86c037fbc9SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 87c037fbc9SSong Gao 88c037fbc9SSong Gao Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), 89c037fbc9SSong Gao int128_make64((uint64_t)Vk->D(0))); 90c037fbc9SSong Gao } 912d5f950cSSong Gao 922d5f950cSSong Gao #define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ 932d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 942d5f950cSSong Gao { \ 952d5f950cSSong Gao int i; \ 962d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 972d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 982d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 992d5f950cSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 1002d5f950cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1012d5f950cSSong Gao Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ 1022d5f950cSSong Gao } \ 1032d5f950cSSong Gao } 1042d5f950cSSong Gao 1052d5f950cSSong Gao #define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ 1062d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 1072d5f950cSSong Gao { \ 1082d5f950cSSong Gao int i; \ 1092d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 1102d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 1112d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 1122d5f950cSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 1132d5f950cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1142d5f950cSSong Gao Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ 1152d5f950cSSong Gao } \ 1162d5f950cSSong Gao } 1172d5f950cSSong Gao 1182d5f950cSSong Gao void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v) 1192d5f950cSSong Gao { 1202d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1212d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1222d5f950cSSong Gao VReg *Vk = (VReg *)vk; 1232d5f950cSSong Gao 1242d5f950cSSong Gao Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0))); 1252d5f950cSSong Gao } 1262d5f950cSSong Gao 1272d5f950cSSong Gao DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) 1282d5f950cSSong Gao DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) 1292d5f950cSSong Gao DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) 1302d5f950cSSong Gao 1312d5f950cSSong Gao void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v) 1322d5f950cSSong Gao { 1332d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1342d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1352d5f950cSSong Gao VReg *Vk = (VReg *)vk; 1362d5f950cSSong Gao 1372d5f950cSSong Gao Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1))); 1382d5f950cSSong Gao } 1392d5f950cSSong Gao 1402d5f950cSSong Gao DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) 1412d5f950cSSong Gao DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) 1422d5f950cSSong Gao DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) 1432d5f950cSSong Gao 1442d5f950cSSong Gao void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v) 1452d5f950cSSong Gao { 1462d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1472d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1482d5f950cSSong Gao VReg *Vk = (VReg *)vk; 1492d5f950cSSong Gao 1502d5f950cSSong Gao Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0))); 1512d5f950cSSong Gao } 1522d5f950cSSong Gao 1532d5f950cSSong Gao DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) 1542d5f950cSSong Gao DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) 1552d5f950cSSong Gao DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) 1562d5f950cSSong Gao 1572d5f950cSSong Gao void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v) 1582d5f950cSSong Gao { 1592d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1602d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1612d5f950cSSong Gao VReg *Vk = (VReg *)vk; 1622d5f950cSSong Gao 1632d5f950cSSong Gao Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1))); 1642d5f950cSSong Gao } 1652d5f950cSSong Gao 1662d5f950cSSong Gao DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) 1672d5f950cSSong Gao DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) 1682d5f950cSSong Gao DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) 1692d5f950cSSong Gao 1702d5f950cSSong Gao void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v) 1712d5f950cSSong Gao { 1722d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1732d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1742d5f950cSSong Gao VReg *Vk = (VReg *)vk; 1752d5f950cSSong Gao 1762d5f950cSSong Gao Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)), 1772d5f950cSSong Gao int128_make64((uint64_t)Vk->D(0))); 1782d5f950cSSong Gao } 1792d5f950cSSong Gao 1802d5f950cSSong Gao DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) 1812d5f950cSSong Gao DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) 1822d5f950cSSong Gao DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) 1832d5f950cSSong Gao 1842d5f950cSSong Gao void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v) 1852d5f950cSSong Gao { 1862d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1872d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1882d5f950cSSong Gao VReg *Vk = (VReg *)vk; 1892d5f950cSSong Gao 1902d5f950cSSong Gao Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), 1912d5f950cSSong Gao int128_make64((uint64_t)Vk->D(1))); 1922d5f950cSSong Gao } 1932d5f950cSSong Gao 1942d5f950cSSong Gao DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) 1952d5f950cSSong Gao DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) 1962d5f950cSSong Gao DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) 1972d5f950cSSong Gao 1982d5f950cSSong Gao void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v) 1992d5f950cSSong Gao { 2002d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2012d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2022d5f950cSSong Gao VReg *Vk = (VReg *)vk; 2032d5f950cSSong Gao 2042d5f950cSSong Gao Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)), 2052d5f950cSSong Gao int128_make64((uint64_t)Vk->D(0))); 2062d5f950cSSong Gao } 2072d5f950cSSong Gao 2082d5f950cSSong Gao DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) 2092d5f950cSSong Gao DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) 2102d5f950cSSong Gao DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) 2112d5f950cSSong Gao 2122d5f950cSSong Gao void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v) 2132d5f950cSSong Gao { 2142d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2152d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2162d5f950cSSong Gao VReg *Vk = (VReg *)vk; 2172d5f950cSSong Gao 2182d5f950cSSong Gao Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)), 2192d5f950cSSong Gao int128_make64((uint64_t)Vk->D(1))); 2202d5f950cSSong Gao } 2212d5f950cSSong Gao 2222d5f950cSSong Gao DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) 2232d5f950cSSong Gao DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) 2242d5f950cSSong Gao DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) 2252d5f950cSSong Gao 2262d5f950cSSong Gao #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 2272d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 2282d5f950cSSong Gao { \ 2292d5f950cSSong Gao int i; \ 2302d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 2312d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 2322d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 2332d5f950cSSong Gao typedef __typeof(Vd->ES1(0)) TDS; \ 2342d5f950cSSong Gao typedef __typeof(Vd->EU1(0)) TDU; \ 2352d5f950cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2362d5f950cSSong Gao Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ 2372d5f950cSSong Gao } \ 2382d5f950cSSong Gao } 2392d5f950cSSong Gao 2402d5f950cSSong Gao #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 2412d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 2422d5f950cSSong Gao { \ 2432d5f950cSSong Gao int i; \ 2442d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 2452d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 2462d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 2472d5f950cSSong Gao typedef __typeof(Vd->ES1(0)) TDS; \ 2482d5f950cSSong Gao typedef __typeof(Vd->EU1(0)) TDU; \ 2492d5f950cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2502d5f950cSSong Gao Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ 2512d5f950cSSong Gao } \ 2522d5f950cSSong Gao } 2532d5f950cSSong Gao 2542d5f950cSSong Gao void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) 2552d5f950cSSong Gao { 2562d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2572d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2582d5f950cSSong Gao VReg *Vk = (VReg *)vk; 2592d5f950cSSong Gao 2602d5f950cSSong Gao Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)), 2612d5f950cSSong Gao int128_makes64(Vk->D(0))); 2622d5f950cSSong Gao } 2632d5f950cSSong Gao 2642d5f950cSSong Gao DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) 2652d5f950cSSong Gao DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) 2662d5f950cSSong Gao DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) 2672d5f950cSSong Gao 2682d5f950cSSong Gao void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v) 2692d5f950cSSong Gao { 2702d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2712d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2722d5f950cSSong Gao VReg *Vk = (VReg *)vk; 2732d5f950cSSong Gao 2742d5f950cSSong Gao Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)), 2752d5f950cSSong Gao int128_makes64(Vk->D(1))); 2762d5f950cSSong Gao } 2772d5f950cSSong Gao 2782d5f950cSSong Gao DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) 2792d5f950cSSong Gao DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) 2802d5f950cSSong Gao DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) 28139e9b0a7SSong Gao 28239e9b0a7SSong Gao #define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) 28339e9b0a7SSong Gao #define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) 28439e9b0a7SSong Gao 28539e9b0a7SSong Gao #define DO_3OP(NAME, BIT, E, DO_OP) \ 28639e9b0a7SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 28739e9b0a7SSong Gao { \ 28839e9b0a7SSong Gao int i; \ 28939e9b0a7SSong Gao VReg *Vd = (VReg *)vd; \ 29039e9b0a7SSong Gao VReg *Vj = (VReg *)vj; \ 29139e9b0a7SSong Gao VReg *Vk = (VReg *)vk; \ 29239e9b0a7SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 29339e9b0a7SSong Gao Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ 29439e9b0a7SSong Gao } \ 29539e9b0a7SSong Gao } 29639e9b0a7SSong Gao 29739e9b0a7SSong Gao DO_3OP(vavg_b, 8, B, DO_VAVG) 29839e9b0a7SSong Gao DO_3OP(vavg_h, 16, H, DO_VAVG) 29939e9b0a7SSong Gao DO_3OP(vavg_w, 32, W, DO_VAVG) 30039e9b0a7SSong Gao DO_3OP(vavg_d, 64, D, DO_VAVG) 30139e9b0a7SSong Gao DO_3OP(vavgr_b, 8, B, DO_VAVGR) 30239e9b0a7SSong Gao DO_3OP(vavgr_h, 16, H, DO_VAVGR) 30339e9b0a7SSong Gao DO_3OP(vavgr_w, 32, W, DO_VAVGR) 30439e9b0a7SSong Gao DO_3OP(vavgr_d, 64, D, DO_VAVGR) 30539e9b0a7SSong Gao DO_3OP(vavg_bu, 8, UB, DO_VAVG) 30639e9b0a7SSong Gao DO_3OP(vavg_hu, 16, UH, DO_VAVG) 30739e9b0a7SSong Gao DO_3OP(vavg_wu, 32, UW, DO_VAVG) 30839e9b0a7SSong Gao DO_3OP(vavg_du, 64, UD, DO_VAVG) 30939e9b0a7SSong Gao DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) 31039e9b0a7SSong Gao DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) 31139e9b0a7SSong Gao DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) 31239e9b0a7SSong Gao DO_3OP(vavgr_du, 64, UD, DO_VAVGR) 31349725659SSong Gao 31449725659SSong Gao #define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a)) 31549725659SSong Gao 31649725659SSong Gao DO_3OP(vabsd_b, 8, B, DO_VABSD) 31749725659SSong Gao DO_3OP(vabsd_h, 16, H, DO_VABSD) 31849725659SSong Gao DO_3OP(vabsd_w, 32, W, DO_VABSD) 31949725659SSong Gao DO_3OP(vabsd_d, 64, D, DO_VABSD) 32049725659SSong Gao DO_3OP(vabsd_bu, 8, UB, DO_VABSD) 32149725659SSong Gao DO_3OP(vabsd_hu, 16, UH, DO_VABSD) 32249725659SSong Gao DO_3OP(vabsd_wu, 32, UW, DO_VABSD) 32349725659SSong Gao DO_3OP(vabsd_du, 64, UD, DO_VABSD) 324af448cb3SSong Gao 325af448cb3SSong Gao #define DO_VABS(a) ((a < 0) ? (-a) : (a)) 326af448cb3SSong Gao 327af448cb3SSong Gao #define DO_VADDA(NAME, BIT, E, DO_OP) \ 328af448cb3SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 329af448cb3SSong Gao { \ 330af448cb3SSong Gao int i; \ 331af448cb3SSong Gao VReg *Vd = (VReg *)vd; \ 332af448cb3SSong Gao VReg *Vj = (VReg *)vj; \ 333af448cb3SSong Gao VReg *Vk = (VReg *)vk; \ 334af448cb3SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 335af448cb3SSong Gao Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i)); \ 336af448cb3SSong Gao } \ 337af448cb3SSong Gao } 338af448cb3SSong Gao 339af448cb3SSong Gao DO_VADDA(vadda_b, 8, B, DO_VABS) 340af448cb3SSong Gao DO_VADDA(vadda_h, 16, H, DO_VABS) 341af448cb3SSong Gao DO_VADDA(vadda_w, 32, W, DO_VABS) 342af448cb3SSong Gao DO_VADDA(vadda_d, 64, D, DO_VABS) 3439ab29520SSong Gao 3449ab29520SSong Gao #define DO_MIN(a, b) (a < b ? a : b) 3459ab29520SSong Gao #define DO_MAX(a, b) (a > b ? a : b) 3469ab29520SSong Gao 3479ab29520SSong Gao #define VMINMAXI(NAME, BIT, E, DO_OP) \ 3489ab29520SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ 3499ab29520SSong Gao { \ 3509ab29520SSong Gao int i; \ 3519ab29520SSong Gao VReg *Vd = (VReg *)vd; \ 3529ab29520SSong Gao VReg *Vj = (VReg *)vj; \ 3539ab29520SSong Gao typedef __typeof(Vd->E(0)) TD; \ 3549ab29520SSong Gao \ 3559ab29520SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 3569ab29520SSong Gao Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ 3579ab29520SSong Gao } \ 3589ab29520SSong Gao } 3599ab29520SSong Gao 3609ab29520SSong Gao VMINMAXI(vmini_b, 8, B, DO_MIN) 3619ab29520SSong Gao VMINMAXI(vmini_h, 16, H, DO_MIN) 3629ab29520SSong Gao VMINMAXI(vmini_w, 32, W, DO_MIN) 3639ab29520SSong Gao VMINMAXI(vmini_d, 64, D, DO_MIN) 3649ab29520SSong Gao VMINMAXI(vmaxi_b, 8, B, DO_MAX) 3659ab29520SSong Gao VMINMAXI(vmaxi_h, 16, H, DO_MAX) 3669ab29520SSong Gao VMINMAXI(vmaxi_w, 32, W, DO_MAX) 3679ab29520SSong Gao VMINMAXI(vmaxi_d, 64, D, DO_MAX) 3689ab29520SSong Gao VMINMAXI(vmini_bu, 8, UB, DO_MIN) 3699ab29520SSong Gao VMINMAXI(vmini_hu, 16, UH, DO_MIN) 3709ab29520SSong Gao VMINMAXI(vmini_wu, 32, UW, DO_MIN) 3719ab29520SSong Gao VMINMAXI(vmini_du, 64, UD, DO_MIN) 3729ab29520SSong Gao VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) 3739ab29520SSong Gao VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) 3749ab29520SSong Gao VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) 3759ab29520SSong Gao VMINMAXI(vmaxi_du, 64, UD, DO_MAX) 376cd1c49adSSong Gao 377cd1c49adSSong Gao #define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ 378cd1c49adSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 379cd1c49adSSong Gao { \ 380cd1c49adSSong Gao int i; \ 381cd1c49adSSong Gao VReg *Vd = (VReg *)vd; \ 382cd1c49adSSong Gao VReg *Vj = (VReg *)vj; \ 383cd1c49adSSong Gao VReg *Vk = (VReg *)vk; \ 384cd1c49adSSong Gao typedef __typeof(Vd->E1(0)) T; \ 385cd1c49adSSong Gao \ 386cd1c49adSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 387cd1c49adSSong Gao Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ 388cd1c49adSSong Gao } \ 389cd1c49adSSong Gao } 390cd1c49adSSong Gao 391cd1c49adSSong Gao void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v) 392cd1c49adSSong Gao { 393cd1c49adSSong Gao uint64_t l, h1, h2; 394cd1c49adSSong Gao VReg *Vd = (VReg *)vd; 395cd1c49adSSong Gao VReg *Vj = (VReg *)vj; 396cd1c49adSSong Gao VReg *Vk = (VReg *)vk; 397cd1c49adSSong Gao 398cd1c49adSSong Gao muls64(&l, &h1, Vj->D(0), Vk->D(0)); 399cd1c49adSSong Gao muls64(&l, &h2, Vj->D(1), Vk->D(1)); 400cd1c49adSSong Gao 401cd1c49adSSong Gao Vd->D(0) = h1; 402cd1c49adSSong Gao Vd->D(1) = h2; 403cd1c49adSSong Gao } 404cd1c49adSSong Gao 405cd1c49adSSong Gao DO_VMUH(vmuh_b, 8, H, B, DO_MUH) 406cd1c49adSSong Gao DO_VMUH(vmuh_h, 16, W, H, DO_MUH) 407cd1c49adSSong Gao DO_VMUH(vmuh_w, 32, D, W, DO_MUH) 408cd1c49adSSong Gao 409cd1c49adSSong Gao void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v) 410cd1c49adSSong Gao { 411cd1c49adSSong Gao uint64_t l, h1, h2; 412cd1c49adSSong Gao VReg *Vd = (VReg *)vd; 413cd1c49adSSong Gao VReg *Vj = (VReg *)vj; 414cd1c49adSSong Gao VReg *Vk = (VReg *)vk; 415cd1c49adSSong Gao 416cd1c49adSSong Gao mulu64(&l, &h1, Vj->D(0), Vk->D(0)); 417cd1c49adSSong Gao mulu64(&l, &h2, Vj->D(1), Vk->D(1)); 418cd1c49adSSong Gao 419cd1c49adSSong Gao Vd->D(0) = h1; 420cd1c49adSSong Gao Vd->D(1) = h2; 421cd1c49adSSong Gao } 422cd1c49adSSong Gao 423cd1c49adSSong Gao DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) 424cd1c49adSSong Gao DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) 425cd1c49adSSong Gao DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) 426cd1c49adSSong Gao 427cd1c49adSSong Gao #define DO_MUL(a, b) (a * b) 428cd1c49adSSong Gao 429cd1c49adSSong Gao DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) 430cd1c49adSSong Gao DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) 431cd1c49adSSong Gao DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) 432cd1c49adSSong Gao 433cd1c49adSSong Gao DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) 434cd1c49adSSong Gao DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) 435cd1c49adSSong Gao DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) 436cd1c49adSSong Gao 437cd1c49adSSong Gao DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) 438cd1c49adSSong Gao DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) 439cd1c49adSSong Gao DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) 440cd1c49adSSong Gao 441cd1c49adSSong Gao DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) 442cd1c49adSSong Gao DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) 443cd1c49adSSong Gao DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) 444cd1c49adSSong Gao 445cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) 446cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) 447cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) 448cd1c49adSSong Gao 449cd1c49adSSong Gao DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) 450cd1c49adSSong Gao DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) 451cd1c49adSSong Gao DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) 452d3aec65bSSong Gao 453d3aec65bSSong Gao #define DO_MADD(a, b, c) (a + b * c) 454d3aec65bSSong Gao #define DO_MSUB(a, b, c) (a - b * c) 455d3aec65bSSong Gao 456d3aec65bSSong Gao #define VMADDSUB(NAME, BIT, E, DO_OP) \ 457d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 458d3aec65bSSong Gao { \ 459d3aec65bSSong Gao int i; \ 460d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 461d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 462d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 463d3aec65bSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 464d3aec65bSSong Gao Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ 465d3aec65bSSong Gao } \ 466d3aec65bSSong Gao } 467d3aec65bSSong Gao 468d3aec65bSSong Gao VMADDSUB(vmadd_b, 8, B, DO_MADD) 469d3aec65bSSong Gao VMADDSUB(vmadd_h, 16, H, DO_MADD) 470d3aec65bSSong Gao VMADDSUB(vmadd_w, 32, W, DO_MADD) 471d3aec65bSSong Gao VMADDSUB(vmadd_d, 64, D, DO_MADD) 472d3aec65bSSong Gao VMADDSUB(vmsub_b, 8, B, DO_MSUB) 473d3aec65bSSong Gao VMADDSUB(vmsub_h, 16, H, DO_MSUB) 474d3aec65bSSong Gao VMADDSUB(vmsub_w, 32, W, DO_MSUB) 475d3aec65bSSong Gao VMADDSUB(vmsub_d, 64, D, DO_MSUB) 476d3aec65bSSong Gao 477d3aec65bSSong Gao #define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ 478d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 479d3aec65bSSong Gao { \ 480d3aec65bSSong Gao int i; \ 481d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 482d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 483d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 484d3aec65bSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 485d3aec65bSSong Gao \ 486d3aec65bSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 487d3aec65bSSong Gao Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ 488d3aec65bSSong Gao } \ 489d3aec65bSSong Gao } 490d3aec65bSSong Gao 491d3aec65bSSong Gao VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) 492d3aec65bSSong Gao VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) 493d3aec65bSSong Gao VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) 494d3aec65bSSong Gao VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) 495d3aec65bSSong Gao VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) 496d3aec65bSSong Gao VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) 497d3aec65bSSong Gao 498d3aec65bSSong Gao #define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ 499d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 500d3aec65bSSong Gao { \ 501d3aec65bSSong Gao int i; \ 502d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 503d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 504d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 505d3aec65bSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 506d3aec65bSSong Gao \ 507d3aec65bSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 508d3aec65bSSong Gao Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ 509d3aec65bSSong Gao (TD)Vk->E2(2 * i + 1)); \ 510d3aec65bSSong Gao } \ 511d3aec65bSSong Gao } 512d3aec65bSSong Gao 513d3aec65bSSong Gao VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) 514d3aec65bSSong Gao VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) 515d3aec65bSSong Gao VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) 516d3aec65bSSong Gao VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) 517d3aec65bSSong Gao VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) 518d3aec65bSSong Gao VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) 519d3aec65bSSong Gao 520d3aec65bSSong Gao #define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 521d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 522d3aec65bSSong Gao { \ 523d3aec65bSSong Gao int i; \ 524d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 525d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 526d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 527d3aec65bSSong Gao typedef __typeof(Vd->ES1(0)) TS1; \ 528d3aec65bSSong Gao typedef __typeof(Vd->EU1(0)) TU1; \ 529d3aec65bSSong Gao \ 530d3aec65bSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 531d3aec65bSSong Gao Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ 532d3aec65bSSong Gao (TS1)Vk->ES2(2 * i)); \ 533d3aec65bSSong Gao } \ 534d3aec65bSSong Gao } 535d3aec65bSSong Gao 536d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) 537d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) 538d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) 539d3aec65bSSong Gao 540d3aec65bSSong Gao #define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 541d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 542d3aec65bSSong Gao { \ 543d3aec65bSSong Gao int i; \ 544d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 545d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 546d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 547d3aec65bSSong Gao typedef __typeof(Vd->ES1(0)) TS1; \ 548d3aec65bSSong Gao typedef __typeof(Vd->EU1(0)) TU1; \ 549d3aec65bSSong Gao \ 550d3aec65bSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 551d3aec65bSSong Gao Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ 552d3aec65bSSong Gao (TS1)Vk->ES2(2 * i + 1)); \ 553d3aec65bSSong Gao } \ 554d3aec65bSSong Gao } 555d3aec65bSSong Gao 556d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) 557d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) 558d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) 5594cc4c0f7SSong Gao 5604cc4c0f7SSong Gao #define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M) 5614cc4c0f7SSong Gao #define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M) 5624cc4c0f7SSong Gao #define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\ 5634cc4c0f7SSong Gao unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 5644cc4c0f7SSong Gao #define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ 5654cc4c0f7SSong Gao unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 5664cc4c0f7SSong Gao 5674cc4c0f7SSong Gao #define VDIV(NAME, BIT, E, DO_OP) \ 5684cc4c0f7SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 5694cc4c0f7SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 5704cc4c0f7SSong Gao { \ 5714cc4c0f7SSong Gao int i; \ 5724cc4c0f7SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 5734cc4c0f7SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 5744cc4c0f7SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 5754cc4c0f7SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 5764cc4c0f7SSong Gao Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ 5774cc4c0f7SSong Gao } \ 5784cc4c0f7SSong Gao } 5794cc4c0f7SSong Gao 5804cc4c0f7SSong Gao VDIV(vdiv_b, 8, B, DO_DIV) 5814cc4c0f7SSong Gao VDIV(vdiv_h, 16, H, DO_DIV) 5824cc4c0f7SSong Gao VDIV(vdiv_w, 32, W, DO_DIV) 5834cc4c0f7SSong Gao VDIV(vdiv_d, 64, D, DO_DIV) 5844cc4c0f7SSong Gao VDIV(vdiv_bu, 8, UB, DO_DIVU) 5854cc4c0f7SSong Gao VDIV(vdiv_hu, 16, UH, DO_DIVU) 5864cc4c0f7SSong Gao VDIV(vdiv_wu, 32, UW, DO_DIVU) 5874cc4c0f7SSong Gao VDIV(vdiv_du, 64, UD, DO_DIVU) 5884cc4c0f7SSong Gao VDIV(vmod_b, 8, B, DO_REM) 5894cc4c0f7SSong Gao VDIV(vmod_h, 16, H, DO_REM) 5904cc4c0f7SSong Gao VDIV(vmod_w, 32, W, DO_REM) 5914cc4c0f7SSong Gao VDIV(vmod_d, 64, D, DO_REM) 5924cc4c0f7SSong Gao VDIV(vmod_bu, 8, UB, DO_REMU) 5934cc4c0f7SSong Gao VDIV(vmod_hu, 16, UH, DO_REMU) 5944cc4c0f7SSong Gao VDIV(vmod_wu, 32, UW, DO_REMU) 5954cc4c0f7SSong Gao VDIV(vmod_du, 64, UD, DO_REMU) 596cbe44190SSong Gao 597cbe44190SSong Gao #define VSAT_S(NAME, BIT, E) \ 598cbe44190SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ 599cbe44190SSong Gao { \ 600cbe44190SSong Gao int i; \ 601cbe44190SSong Gao VReg *Vd = (VReg *)vd; \ 602cbe44190SSong Gao VReg *Vj = (VReg *)vj; \ 603cbe44190SSong Gao typedef __typeof(Vd->E(0)) TD; \ 604cbe44190SSong Gao \ 605cbe44190SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 606cbe44190SSong Gao Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ 607cbe44190SSong Gao Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ 608cbe44190SSong Gao } \ 609cbe44190SSong Gao } 610cbe44190SSong Gao 611cbe44190SSong Gao VSAT_S(vsat_b, 8, B) 612cbe44190SSong Gao VSAT_S(vsat_h, 16, H) 613cbe44190SSong Gao VSAT_S(vsat_w, 32, W) 614cbe44190SSong Gao VSAT_S(vsat_d, 64, D) 615cbe44190SSong Gao 616cbe44190SSong Gao #define VSAT_U(NAME, BIT, E) \ 617cbe44190SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ 618cbe44190SSong Gao { \ 619cbe44190SSong Gao int i; \ 620cbe44190SSong Gao VReg *Vd = (VReg *)vd; \ 621cbe44190SSong Gao VReg *Vj = (VReg *)vj; \ 622cbe44190SSong Gao typedef __typeof(Vd->E(0)) TD; \ 623cbe44190SSong Gao \ 624cbe44190SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 625cbe44190SSong Gao Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ 626cbe44190SSong Gao } \ 627cbe44190SSong Gao } 628cbe44190SSong Gao 629cbe44190SSong Gao VSAT_U(vsat_bu, 8, UB) 630cbe44190SSong Gao VSAT_U(vsat_hu, 16, UH) 631cbe44190SSong Gao VSAT_U(vsat_wu, 32, UW) 632cbe44190SSong Gao VSAT_U(vsat_du, 64, UD) 6333734ad93SSong Gao 6343734ad93SSong Gao #define VEXTH(NAME, BIT, E1, E2) \ 6353734ad93SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 6363734ad93SSong Gao { \ 6373734ad93SSong Gao int i; \ 6383734ad93SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 6393734ad93SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 6403734ad93SSong Gao \ 6413734ad93SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 6423734ad93SSong Gao Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \ 6433734ad93SSong Gao } \ 6443734ad93SSong Gao } 6453734ad93SSong Gao 6463734ad93SSong Gao void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 6473734ad93SSong Gao { 6483734ad93SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 6493734ad93SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 6503734ad93SSong Gao 6513734ad93SSong Gao Vd->Q(0) = int128_makes64(Vj->D(1)); 6523734ad93SSong Gao } 6533734ad93SSong Gao 6543734ad93SSong Gao void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 6553734ad93SSong Gao { 6563734ad93SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 6573734ad93SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 6583734ad93SSong Gao 6593734ad93SSong Gao Vd->Q(0) = int128_make64((uint64_t)Vj->D(1)); 6603734ad93SSong Gao } 6613734ad93SSong Gao 6623734ad93SSong Gao VEXTH(vexth_h_b, 16, H, B) 6633734ad93SSong Gao VEXTH(vexth_w_h, 32, W, H) 6643734ad93SSong Gao VEXTH(vexth_d_w, 64, D, W) 6653734ad93SSong Gao VEXTH(vexth_hu_bu, 16, UH, UB) 6663734ad93SSong Gao VEXTH(vexth_wu_hu, 32, UW, UH) 6673734ad93SSong Gao VEXTH(vexth_du_wu, 64, UD, UW) 668f0e395dfSSong Gao 669f0e395dfSSong Gao #define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) 670f0e395dfSSong Gao 671f0e395dfSSong Gao DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) 672f0e395dfSSong Gao DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) 673f0e395dfSSong Gao DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) 674f0e395dfSSong Gao DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) 675789f4a4cSSong Gao 676789f4a4cSSong Gao static uint64_t do_vmskltz_b(int64_t val) 677789f4a4cSSong Gao { 678789f4a4cSSong Gao uint64_t m = 0x8080808080808080ULL; 679789f4a4cSSong Gao uint64_t c = val & m; 680789f4a4cSSong Gao c |= c << 7; 681789f4a4cSSong Gao c |= c << 14; 682789f4a4cSSong Gao c |= c << 28; 683789f4a4cSSong Gao return c >> 56; 684789f4a4cSSong Gao } 685789f4a4cSSong Gao 686789f4a4cSSong Gao void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 687789f4a4cSSong Gao { 688789f4a4cSSong Gao uint16_t temp = 0; 689789f4a4cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 690789f4a4cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 691789f4a4cSSong Gao 692789f4a4cSSong Gao temp = do_vmskltz_b(Vj->D(0)); 693789f4a4cSSong Gao temp |= (do_vmskltz_b(Vj->D(1)) << 8); 694789f4a4cSSong Gao Vd->D(0) = temp; 695789f4a4cSSong Gao Vd->D(1) = 0; 696789f4a4cSSong Gao } 697789f4a4cSSong Gao 698789f4a4cSSong Gao static uint64_t do_vmskltz_h(int64_t val) 699789f4a4cSSong Gao { 700789f4a4cSSong Gao uint64_t m = 0x8000800080008000ULL; 701789f4a4cSSong Gao uint64_t c = val & m; 702789f4a4cSSong Gao c |= c << 15; 703789f4a4cSSong Gao c |= c << 30; 704789f4a4cSSong Gao return c >> 60; 705789f4a4cSSong Gao } 706789f4a4cSSong Gao 707789f4a4cSSong Gao void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 708789f4a4cSSong Gao { 709789f4a4cSSong Gao uint16_t temp = 0; 710789f4a4cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 711789f4a4cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 712789f4a4cSSong Gao 713789f4a4cSSong Gao temp = do_vmskltz_h(Vj->D(0)); 714789f4a4cSSong Gao temp |= (do_vmskltz_h(Vj->D(1)) << 4); 715789f4a4cSSong Gao Vd->D(0) = temp; 716789f4a4cSSong Gao Vd->D(1) = 0; 717789f4a4cSSong Gao } 718789f4a4cSSong Gao 719789f4a4cSSong Gao static uint64_t do_vmskltz_w(int64_t val) 720789f4a4cSSong Gao { 721789f4a4cSSong Gao uint64_t m = 0x8000000080000000ULL; 722789f4a4cSSong Gao uint64_t c = val & m; 723789f4a4cSSong Gao c |= c << 31; 724789f4a4cSSong Gao return c >> 62; 725789f4a4cSSong Gao } 726789f4a4cSSong Gao 727789f4a4cSSong Gao void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 728789f4a4cSSong Gao { 729789f4a4cSSong Gao uint16_t temp = 0; 730789f4a4cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 731789f4a4cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 732789f4a4cSSong Gao 733789f4a4cSSong Gao temp = do_vmskltz_w(Vj->D(0)); 734789f4a4cSSong Gao temp |= (do_vmskltz_w(Vj->D(1)) << 2); 735789f4a4cSSong Gao Vd->D(0) = temp; 736789f4a4cSSong Gao Vd->D(1) = 0; 737789f4a4cSSong Gao } 738789f4a4cSSong Gao 739789f4a4cSSong Gao static uint64_t do_vmskltz_d(int64_t val) 740789f4a4cSSong Gao { 741789f4a4cSSong Gao return (uint64_t)val >> 63; 742789f4a4cSSong Gao } 743789f4a4cSSong Gao void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 744789f4a4cSSong Gao { 745789f4a4cSSong Gao uint16_t temp = 0; 746789f4a4cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 747789f4a4cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 748789f4a4cSSong Gao 749789f4a4cSSong Gao temp = do_vmskltz_d(Vj->D(0)); 750789f4a4cSSong Gao temp |= (do_vmskltz_d(Vj->D(1)) << 1); 751789f4a4cSSong Gao Vd->D(0) = temp; 752789f4a4cSSong Gao Vd->D(1) = 0; 753789f4a4cSSong Gao } 754789f4a4cSSong Gao 755789f4a4cSSong Gao void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 756789f4a4cSSong Gao { 757789f4a4cSSong Gao uint16_t temp = 0; 758789f4a4cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 759789f4a4cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 760789f4a4cSSong Gao 761789f4a4cSSong Gao temp = do_vmskltz_b(Vj->D(0)); 762789f4a4cSSong Gao temp |= (do_vmskltz_b(Vj->D(1)) << 8); 763789f4a4cSSong Gao Vd->D(0) = (uint16_t)(~temp); 764789f4a4cSSong Gao Vd->D(1) = 0; 765789f4a4cSSong Gao } 766789f4a4cSSong Gao 767789f4a4cSSong Gao static uint64_t do_vmskez_b(uint64_t a) 768789f4a4cSSong Gao { 769789f4a4cSSong Gao uint64_t m = 0x7f7f7f7f7f7f7f7fULL; 770789f4a4cSSong Gao uint64_t c = ~(((a & m) + m) | a | m); 771789f4a4cSSong Gao c |= c << 7; 772789f4a4cSSong Gao c |= c << 14; 773789f4a4cSSong Gao c |= c << 28; 774789f4a4cSSong Gao return c >> 56; 775789f4a4cSSong Gao } 776789f4a4cSSong Gao 777789f4a4cSSong Gao void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 778789f4a4cSSong Gao { 779789f4a4cSSong Gao uint16_t temp = 0; 780789f4a4cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 781789f4a4cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 782789f4a4cSSong Gao 783789f4a4cSSong Gao temp = do_vmskez_b(Vj->D(0)); 784789f4a4cSSong Gao temp |= (do_vmskez_b(Vj->D(1)) << 8); 785789f4a4cSSong Gao Vd->D(0) = (uint16_t)(~temp); 786789f4a4cSSong Gao Vd->D(1) = 0; 787789f4a4cSSong Gao } 788f205a539SSong Gao 789f205a539SSong Gao void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) 790f205a539SSong Gao { 791f205a539SSong Gao int i; 792f205a539SSong Gao VReg *Vd = (VReg *)vd; 793f205a539SSong Gao VReg *Vj = (VReg *)vj; 794f205a539SSong Gao 795f205a539SSong Gao for (i = 0; i < LSX_LEN/8; i++) { 796f205a539SSong Gao Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); 797f205a539SSong Gao } 798f205a539SSong Gao } 7999b21a7a5SSong Gao 8009b21a7a5SSong Gao #define VSLLWIL(NAME, BIT, E1, E2) \ 8019b21a7a5SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 8029b21a7a5SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 8039b21a7a5SSong Gao { \ 8049b21a7a5SSong Gao int i; \ 8059b21a7a5SSong Gao VReg temp; \ 8069b21a7a5SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 8079b21a7a5SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 8089b21a7a5SSong Gao typedef __typeof(temp.E1(0)) TD; \ 8099b21a7a5SSong Gao \ 8109b21a7a5SSong Gao temp.D(0) = 0; \ 8119b21a7a5SSong Gao temp.D(1) = 0; \ 8129b21a7a5SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 8139b21a7a5SSong Gao temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ 8149b21a7a5SSong Gao } \ 8159b21a7a5SSong Gao *Vd = temp; \ 8169b21a7a5SSong Gao } 8179b21a7a5SSong Gao 8189b21a7a5SSong Gao void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 8199b21a7a5SSong Gao { 8209b21a7a5SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 8219b21a7a5SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 8229b21a7a5SSong Gao 8239b21a7a5SSong Gao Vd->Q(0) = int128_makes64(Vj->D(0)); 8249b21a7a5SSong Gao } 8259b21a7a5SSong Gao 8269b21a7a5SSong Gao void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 8279b21a7a5SSong Gao { 8289b21a7a5SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 8299b21a7a5SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 8309b21a7a5SSong Gao 8319b21a7a5SSong Gao Vd->Q(0) = int128_make64(Vj->D(0)); 8329b21a7a5SSong Gao } 8339b21a7a5SSong Gao 8349b21a7a5SSong Gao VSLLWIL(vsllwil_h_b, 16, H, B) 8359b21a7a5SSong Gao VSLLWIL(vsllwil_w_h, 32, W, H) 8369b21a7a5SSong Gao VSLLWIL(vsllwil_d_w, 64, D, W) 8379b21a7a5SSong Gao VSLLWIL(vsllwil_hu_bu, 16, UH, UB) 8389b21a7a5SSong Gao VSLLWIL(vsllwil_wu_hu, 32, UW, UH) 8399b21a7a5SSong Gao VSLLWIL(vsllwil_du_wu, 64, UD, UW) 840ecb93716SSong Gao 841ecb93716SSong Gao #define do_vsrlr(E, T) \ 842ecb93716SSong Gao static T do_vsrlr_ ##E(T s1, int sh) \ 843ecb93716SSong Gao { \ 844ecb93716SSong Gao if (sh == 0) { \ 845ecb93716SSong Gao return s1; \ 846ecb93716SSong Gao } else { \ 847ecb93716SSong Gao return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ 848ecb93716SSong Gao } \ 849ecb93716SSong Gao } 850ecb93716SSong Gao 851ecb93716SSong Gao do_vsrlr(B, uint8_t) 852ecb93716SSong Gao do_vsrlr(H, uint16_t) 853ecb93716SSong Gao do_vsrlr(W, uint32_t) 854ecb93716SSong Gao do_vsrlr(D, uint64_t) 855ecb93716SSong Gao 856ecb93716SSong Gao #define VSRLR(NAME, BIT, T, E) \ 857ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 858ecb93716SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 859ecb93716SSong Gao { \ 860ecb93716SSong Gao int i; \ 861ecb93716SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 862ecb93716SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 863ecb93716SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 864ecb93716SSong Gao \ 865ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 866ecb93716SSong Gao Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ 867ecb93716SSong Gao } \ 868ecb93716SSong Gao } 869ecb93716SSong Gao 870ecb93716SSong Gao VSRLR(vsrlr_b, 8, uint8_t, B) 871ecb93716SSong Gao VSRLR(vsrlr_h, 16, uint16_t, H) 872ecb93716SSong Gao VSRLR(vsrlr_w, 32, uint32_t, W) 873ecb93716SSong Gao VSRLR(vsrlr_d, 64, uint64_t, D) 874ecb93716SSong Gao 875ecb93716SSong Gao #define VSRLRI(NAME, BIT, E) \ 876ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 877ecb93716SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 878ecb93716SSong Gao { \ 879ecb93716SSong Gao int i; \ 880ecb93716SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 881ecb93716SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 882ecb93716SSong Gao \ 883ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 884ecb93716SSong Gao Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ 885ecb93716SSong Gao } \ 886ecb93716SSong Gao } 887ecb93716SSong Gao 888ecb93716SSong Gao VSRLRI(vsrlri_b, 8, B) 889ecb93716SSong Gao VSRLRI(vsrlri_h, 16, H) 890ecb93716SSong Gao VSRLRI(vsrlri_w, 32, W) 891ecb93716SSong Gao VSRLRI(vsrlri_d, 64, D) 892ecb93716SSong Gao 893ecb93716SSong Gao #define do_vsrar(E, T) \ 894ecb93716SSong Gao static T do_vsrar_ ##E(T s1, int sh) \ 895ecb93716SSong Gao { \ 896ecb93716SSong Gao if (sh == 0) { \ 897ecb93716SSong Gao return s1; \ 898ecb93716SSong Gao } else { \ 899ecb93716SSong Gao return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ 900ecb93716SSong Gao } \ 901ecb93716SSong Gao } 902ecb93716SSong Gao 903ecb93716SSong Gao do_vsrar(B, int8_t) 904ecb93716SSong Gao do_vsrar(H, int16_t) 905ecb93716SSong Gao do_vsrar(W, int32_t) 906ecb93716SSong Gao do_vsrar(D, int64_t) 907ecb93716SSong Gao 908ecb93716SSong Gao #define VSRAR(NAME, BIT, T, E) \ 909ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 910ecb93716SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 911ecb93716SSong Gao { \ 912ecb93716SSong Gao int i; \ 913ecb93716SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 914ecb93716SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 915ecb93716SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 916ecb93716SSong Gao \ 917ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 918ecb93716SSong Gao Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ 919ecb93716SSong Gao } \ 920ecb93716SSong Gao } 921ecb93716SSong Gao 922ecb93716SSong Gao VSRAR(vsrar_b, 8, uint8_t, B) 923ecb93716SSong Gao VSRAR(vsrar_h, 16, uint16_t, H) 924ecb93716SSong Gao VSRAR(vsrar_w, 32, uint32_t, W) 925ecb93716SSong Gao VSRAR(vsrar_d, 64, uint64_t, D) 926ecb93716SSong Gao 927ecb93716SSong Gao #define VSRARI(NAME, BIT, E) \ 928ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 929ecb93716SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 930ecb93716SSong Gao { \ 931ecb93716SSong Gao int i; \ 932ecb93716SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 933ecb93716SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 934ecb93716SSong Gao \ 935ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 936ecb93716SSong Gao Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ 937ecb93716SSong Gao } \ 938ecb93716SSong Gao } 939ecb93716SSong Gao 940ecb93716SSong Gao VSRARI(vsrari_b, 8, B) 941ecb93716SSong Gao VSRARI(vsrari_h, 16, H) 942ecb93716SSong Gao VSRARI(vsrari_w, 32, W) 943ecb93716SSong Gao VSRARI(vsrari_d, 64, D) 944d79fb8ddSSong Gao 945d79fb8ddSSong Gao #define R_SHIFT(a, b) (a >> b) 946d79fb8ddSSong Gao 947d79fb8ddSSong Gao #define VSRLN(NAME, BIT, T, E1, E2) \ 948d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 949d79fb8ddSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 950d79fb8ddSSong Gao { \ 951d79fb8ddSSong Gao int i; \ 952d79fb8ddSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 953d79fb8ddSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 954d79fb8ddSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 955d79fb8ddSSong Gao \ 956d79fb8ddSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 957d79fb8ddSSong Gao Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \ 958d79fb8ddSSong Gao } \ 959d79fb8ddSSong Gao Vd->D(1) = 0; \ 960d79fb8ddSSong Gao } 961d79fb8ddSSong Gao 962d79fb8ddSSong Gao VSRLN(vsrln_b_h, 16, uint16_t, B, H) 963d79fb8ddSSong Gao VSRLN(vsrln_h_w, 32, uint32_t, H, W) 964d79fb8ddSSong Gao VSRLN(vsrln_w_d, 64, uint64_t, W, D) 965d79fb8ddSSong Gao 966d79fb8ddSSong Gao #define VSRAN(NAME, BIT, T, E1, E2) \ 967d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 968d79fb8ddSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 969d79fb8ddSSong Gao { \ 970d79fb8ddSSong Gao int i; \ 971d79fb8ddSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 972d79fb8ddSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 973d79fb8ddSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 974d79fb8ddSSong Gao \ 975d79fb8ddSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 976d79fb8ddSSong Gao Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ 977d79fb8ddSSong Gao } \ 978d79fb8ddSSong Gao Vd->D(1) = 0; \ 979d79fb8ddSSong Gao } 980d79fb8ddSSong Gao 981d79fb8ddSSong Gao VSRAN(vsran_b_h, 16, uint16_t, B, H) 982d79fb8ddSSong Gao VSRAN(vsran_h_w, 32, uint32_t, H, W) 983d79fb8ddSSong Gao VSRAN(vsran_w_d, 64, uint64_t, W, D) 984d79fb8ddSSong Gao 985d79fb8ddSSong Gao #define VSRLNI(NAME, BIT, T, E1, E2) \ 986d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 987d79fb8ddSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 988d79fb8ddSSong Gao { \ 989d79fb8ddSSong Gao int i, max; \ 990d79fb8ddSSong Gao VReg temp; \ 991d79fb8ddSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 992d79fb8ddSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 993d79fb8ddSSong Gao \ 994d79fb8ddSSong Gao temp.D(0) = 0; \ 995d79fb8ddSSong Gao temp.D(1) = 0; \ 996d79fb8ddSSong Gao max = LSX_LEN/BIT; \ 997d79fb8ddSSong Gao for (i = 0; i < max; i++) { \ 998d79fb8ddSSong Gao temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ 999d79fb8ddSSong Gao temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ 1000d79fb8ddSSong Gao } \ 1001d79fb8ddSSong Gao *Vd = temp; \ 1002d79fb8ddSSong Gao } 1003d79fb8ddSSong Gao 1004d79fb8ddSSong Gao void HELPER(vsrlni_d_q)(CPULoongArchState *env, 1005d79fb8ddSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 1006d79fb8ddSSong Gao { 1007d79fb8ddSSong Gao VReg temp; 1008d79fb8ddSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 1009d79fb8ddSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 1010d79fb8ddSSong Gao 1011d79fb8ddSSong Gao temp.D(0) = 0; 1012d79fb8ddSSong Gao temp.D(1) = 0; 1013d79fb8ddSSong Gao temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128)); 1014d79fb8ddSSong Gao temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128)); 1015d79fb8ddSSong Gao *Vd = temp; 1016d79fb8ddSSong Gao } 1017d79fb8ddSSong Gao 1018d79fb8ddSSong Gao VSRLNI(vsrlni_b_h, 16, uint16_t, B, H) 1019d79fb8ddSSong Gao VSRLNI(vsrlni_h_w, 32, uint32_t, H, W) 1020d79fb8ddSSong Gao VSRLNI(vsrlni_w_d, 64, uint64_t, W, D) 1021d79fb8ddSSong Gao 1022d79fb8ddSSong Gao #define VSRANI(NAME, BIT, E1, E2) \ 1023d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1024d79fb8ddSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1025d79fb8ddSSong Gao { \ 1026d79fb8ddSSong Gao int i, max; \ 1027d79fb8ddSSong Gao VReg temp; \ 1028d79fb8ddSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1029d79fb8ddSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1030d79fb8ddSSong Gao \ 1031d79fb8ddSSong Gao temp.D(0) = 0; \ 1032d79fb8ddSSong Gao temp.D(1) = 0; \ 1033d79fb8ddSSong Gao max = LSX_LEN/BIT; \ 1034d79fb8ddSSong Gao for (i = 0; i < max; i++) { \ 1035d79fb8ddSSong Gao temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ 1036d79fb8ddSSong Gao temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ 1037d79fb8ddSSong Gao } \ 1038d79fb8ddSSong Gao *Vd = temp; \ 1039d79fb8ddSSong Gao } 1040d79fb8ddSSong Gao 1041d79fb8ddSSong Gao void HELPER(vsrani_d_q)(CPULoongArchState *env, 1042d79fb8ddSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 1043d79fb8ddSSong Gao { 1044d79fb8ddSSong Gao VReg temp; 1045d79fb8ddSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 1046d79fb8ddSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 1047d79fb8ddSSong Gao 1048d79fb8ddSSong Gao temp.D(0) = 0; 1049d79fb8ddSSong Gao temp.D(1) = 0; 1050d79fb8ddSSong Gao temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128)); 1051d79fb8ddSSong Gao temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128)); 1052d79fb8ddSSong Gao *Vd = temp; 1053d79fb8ddSSong Gao } 1054d79fb8ddSSong Gao 1055d79fb8ddSSong Gao VSRANI(vsrani_b_h, 16, B, H) 1056d79fb8ddSSong Gao VSRANI(vsrani_h_w, 32, H, W) 1057d79fb8ddSSong Gao VSRANI(vsrani_w_d, 64, W, D) 1058a5200a17SSong Gao 1059a5200a17SSong Gao #define VSRLRN(NAME, BIT, T, E1, E2) \ 1060a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1061a5200a17SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 1062a5200a17SSong Gao { \ 1063a5200a17SSong Gao int i; \ 1064a5200a17SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1065a5200a17SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1066a5200a17SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 1067a5200a17SSong Gao \ 1068a5200a17SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1069a5200a17SSong Gao Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ 1070a5200a17SSong Gao } \ 1071a5200a17SSong Gao Vd->D(1) = 0; \ 1072a5200a17SSong Gao } 1073a5200a17SSong Gao 1074a5200a17SSong Gao VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H) 1075a5200a17SSong Gao VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W) 1076a5200a17SSong Gao VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D) 1077a5200a17SSong Gao 1078a5200a17SSong Gao #define VSRARN(NAME, BIT, T, E1, E2) \ 1079a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1080a5200a17SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 1081a5200a17SSong Gao { \ 1082a5200a17SSong Gao int i; \ 1083a5200a17SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1084a5200a17SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1085a5200a17SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 1086a5200a17SSong Gao \ 1087a5200a17SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1088a5200a17SSong Gao Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ 1089a5200a17SSong Gao } \ 1090a5200a17SSong Gao Vd->D(1) = 0; \ 1091a5200a17SSong Gao } 1092a5200a17SSong Gao 1093a5200a17SSong Gao VSRARN(vsrarn_b_h, 16, uint8_t, B, H) 1094a5200a17SSong Gao VSRARN(vsrarn_h_w, 32, uint16_t, H, W) 1095a5200a17SSong Gao VSRARN(vsrarn_w_d, 64, uint32_t, W, D) 1096a5200a17SSong Gao 1097a5200a17SSong Gao #define VSRLRNI(NAME, BIT, E1, E2) \ 1098a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1099a5200a17SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1100a5200a17SSong Gao { \ 1101a5200a17SSong Gao int i, max; \ 1102a5200a17SSong Gao VReg temp; \ 1103a5200a17SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1104a5200a17SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1105a5200a17SSong Gao \ 1106a5200a17SSong Gao temp.D(0) = 0; \ 1107a5200a17SSong Gao temp.D(1) = 0; \ 1108a5200a17SSong Gao max = LSX_LEN/BIT; \ 1109a5200a17SSong Gao for (i = 0; i < max; i++) { \ 1110a5200a17SSong Gao temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ 1111a5200a17SSong Gao temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ 1112a5200a17SSong Gao } \ 1113a5200a17SSong Gao *Vd = temp; \ 1114a5200a17SSong Gao } 1115a5200a17SSong Gao 1116a5200a17SSong Gao void HELPER(vsrlrni_d_q)(CPULoongArchState *env, 1117a5200a17SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 1118a5200a17SSong Gao { 1119a5200a17SSong Gao VReg temp; 1120a5200a17SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 1121a5200a17SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 1122a5200a17SSong Gao Int128 r1, r2; 1123a5200a17SSong Gao 1124a5200a17SSong Gao if (imm == 0) { 1125a5200a17SSong Gao temp.D(0) = int128_getlo(Vj->Q(0)); 1126a5200a17SSong Gao temp.D(1) = int128_getlo(Vd->Q(0)); 1127a5200a17SSong Gao } else { 1128a5200a17SSong Gao r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); 1129a5200a17SSong Gao r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); 1130a5200a17SSong Gao 1131a5200a17SSong Gao temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1)); 1132a5200a17SSong Gao temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2)); 1133a5200a17SSong Gao } 1134a5200a17SSong Gao *Vd = temp; 1135a5200a17SSong Gao } 1136a5200a17SSong Gao 1137a5200a17SSong Gao VSRLRNI(vsrlrni_b_h, 16, B, H) 1138a5200a17SSong Gao VSRLRNI(vsrlrni_h_w, 32, H, W) 1139a5200a17SSong Gao VSRLRNI(vsrlrni_w_d, 64, W, D) 1140a5200a17SSong Gao 1141a5200a17SSong Gao #define VSRARNI(NAME, BIT, E1, E2) \ 1142a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1143a5200a17SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1144a5200a17SSong Gao { \ 1145a5200a17SSong Gao int i, max; \ 1146a5200a17SSong Gao VReg temp; \ 1147a5200a17SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1148a5200a17SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1149a5200a17SSong Gao \ 1150a5200a17SSong Gao temp.D(0) = 0; \ 1151a5200a17SSong Gao temp.D(1) = 0; \ 1152a5200a17SSong Gao max = LSX_LEN/BIT; \ 1153a5200a17SSong Gao for (i = 0; i < max; i++) { \ 1154a5200a17SSong Gao temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ 1155a5200a17SSong Gao temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ 1156a5200a17SSong Gao } \ 1157a5200a17SSong Gao *Vd = temp; \ 1158a5200a17SSong Gao } 1159a5200a17SSong Gao 1160a5200a17SSong Gao void HELPER(vsrarni_d_q)(CPULoongArchState *env, 1161a5200a17SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 1162a5200a17SSong Gao { 1163a5200a17SSong Gao VReg temp; 1164a5200a17SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 1165a5200a17SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 1166a5200a17SSong Gao Int128 r1, r2; 1167a5200a17SSong Gao 1168a5200a17SSong Gao if (imm == 0) { 1169a5200a17SSong Gao temp.D(0) = int128_getlo(Vj->Q(0)); 1170a5200a17SSong Gao temp.D(1) = int128_getlo(Vd->Q(0)); 1171a5200a17SSong Gao } else { 1172a5200a17SSong Gao r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); 1173a5200a17SSong Gao r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); 1174a5200a17SSong Gao 1175a5200a17SSong Gao temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1)); 1176a5200a17SSong Gao temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2)); 1177a5200a17SSong Gao } 1178a5200a17SSong Gao *Vd = temp; 1179a5200a17SSong Gao } 1180a5200a17SSong Gao 1181a5200a17SSong Gao VSRARNI(vsrarni_b_h, 16, B, H) 1182a5200a17SSong Gao VSRARNI(vsrarni_h_w, 32, H, W) 1183a5200a17SSong Gao VSRARNI(vsrarni_w_d, 64, W, D) 118483b3815dSSong Gao 118583b3815dSSong Gao #define SSRLNS(NAME, T1, T2, T3) \ 118683b3815dSSong Gao static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ 118783b3815dSSong Gao { \ 118883b3815dSSong Gao T1 shft_res; \ 118983b3815dSSong Gao if (sa == 0) { \ 119083b3815dSSong Gao shft_res = e2; \ 119183b3815dSSong Gao } else { \ 119283b3815dSSong Gao shft_res = (((T1)e2) >> sa); \ 119383b3815dSSong Gao } \ 119483b3815dSSong Gao T3 mask; \ 119583b3815dSSong Gao mask = (1ull << sh) -1; \ 119683b3815dSSong Gao if (shft_res > mask) { \ 119783b3815dSSong Gao return mask; \ 119883b3815dSSong Gao } else { \ 119983b3815dSSong Gao return shft_res; \ 120083b3815dSSong Gao } \ 120183b3815dSSong Gao } 120283b3815dSSong Gao 120383b3815dSSong Gao SSRLNS(B, uint16_t, int16_t, uint8_t) 120483b3815dSSong Gao SSRLNS(H, uint32_t, int32_t, uint16_t) 120583b3815dSSong Gao SSRLNS(W, uint64_t, int64_t, uint32_t) 120683b3815dSSong Gao 120783b3815dSSong Gao #define VSSRLN(NAME, BIT, T, E1, E2) \ 120883b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 120983b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 121083b3815dSSong Gao { \ 121183b3815dSSong Gao int i; \ 121283b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 121383b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 121483b3815dSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 121583b3815dSSong Gao \ 121683b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 121783b3815dSSong Gao Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \ 121883b3815dSSong Gao } \ 121983b3815dSSong Gao Vd->D(1) = 0; \ 122083b3815dSSong Gao } 122183b3815dSSong Gao 122283b3815dSSong Gao VSSRLN(vssrln_b_h, 16, uint16_t, B, H) 122383b3815dSSong Gao VSSRLN(vssrln_h_w, 32, uint32_t, H, W) 122483b3815dSSong Gao VSSRLN(vssrln_w_d, 64, uint64_t, W, D) 122583b3815dSSong Gao 122683b3815dSSong Gao #define SSRANS(E, T1, T2) \ 122783b3815dSSong Gao static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ 122883b3815dSSong Gao { \ 122983b3815dSSong Gao T1 shft_res; \ 123083b3815dSSong Gao if (sa == 0) { \ 123183b3815dSSong Gao shft_res = e2; \ 123283b3815dSSong Gao } else { \ 123383b3815dSSong Gao shft_res = e2 >> sa; \ 123483b3815dSSong Gao } \ 123583b3815dSSong Gao T2 mask; \ 123683b3815dSSong Gao mask = (1ll << sh) -1; \ 123783b3815dSSong Gao if (shft_res > mask) { \ 123883b3815dSSong Gao return mask; \ 123983b3815dSSong Gao } else if (shft_res < -(mask +1)) { \ 124083b3815dSSong Gao return ~mask; \ 124183b3815dSSong Gao } else { \ 124283b3815dSSong Gao return shft_res; \ 124383b3815dSSong Gao } \ 124483b3815dSSong Gao } 124583b3815dSSong Gao 124683b3815dSSong Gao SSRANS(B, int16_t, int8_t) 124783b3815dSSong Gao SSRANS(H, int32_t, int16_t) 124883b3815dSSong Gao SSRANS(W, int64_t, int32_t) 124983b3815dSSong Gao 125083b3815dSSong Gao #define VSSRAN(NAME, BIT, T, E1, E2) \ 125183b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 125283b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 125383b3815dSSong Gao { \ 125483b3815dSSong Gao int i; \ 125583b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 125683b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 125783b3815dSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 125883b3815dSSong Gao \ 125983b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 126083b3815dSSong Gao Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ 126183b3815dSSong Gao } \ 126283b3815dSSong Gao Vd->D(1) = 0; \ 126383b3815dSSong Gao } 126483b3815dSSong Gao 126583b3815dSSong Gao VSSRAN(vssran_b_h, 16, uint16_t, B, H) 126683b3815dSSong Gao VSSRAN(vssran_h_w, 32, uint32_t, H, W) 126783b3815dSSong Gao VSSRAN(vssran_w_d, 64, uint64_t, W, D) 126883b3815dSSong Gao 126983b3815dSSong Gao #define SSRLNU(E, T1, T2, T3) \ 127083b3815dSSong Gao static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ 127183b3815dSSong Gao { \ 127283b3815dSSong Gao T1 shft_res; \ 127383b3815dSSong Gao if (sa == 0) { \ 127483b3815dSSong Gao shft_res = e2; \ 127583b3815dSSong Gao } else { \ 127683b3815dSSong Gao shft_res = (((T1)e2) >> sa); \ 127783b3815dSSong Gao } \ 127883b3815dSSong Gao T2 mask; \ 127983b3815dSSong Gao mask = (1ull << sh) -1; \ 128083b3815dSSong Gao if (shft_res > mask) { \ 128183b3815dSSong Gao return mask; \ 128283b3815dSSong Gao } else { \ 128383b3815dSSong Gao return shft_res; \ 128483b3815dSSong Gao } \ 128583b3815dSSong Gao } 128683b3815dSSong Gao 128783b3815dSSong Gao SSRLNU(B, uint16_t, uint8_t, int16_t) 128883b3815dSSong Gao SSRLNU(H, uint32_t, uint16_t, int32_t) 128983b3815dSSong Gao SSRLNU(W, uint64_t, uint32_t, int64_t) 129083b3815dSSong Gao 129183b3815dSSong Gao #define VSSRLNU(NAME, BIT, T, E1, E2) \ 129283b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 129383b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 129483b3815dSSong Gao { \ 129583b3815dSSong Gao int i; \ 129683b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 129783b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 129883b3815dSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 129983b3815dSSong Gao \ 130083b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 130183b3815dSSong Gao Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 130283b3815dSSong Gao } \ 130383b3815dSSong Gao Vd->D(1) = 0; \ 130483b3815dSSong Gao } 130583b3815dSSong Gao 130683b3815dSSong Gao VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H) 130783b3815dSSong Gao VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W) 130883b3815dSSong Gao VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D) 130983b3815dSSong Gao 131083b3815dSSong Gao #define SSRANU(E, T1, T2, T3) \ 131183b3815dSSong Gao static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ 131283b3815dSSong Gao { \ 131383b3815dSSong Gao T1 shft_res; \ 131483b3815dSSong Gao if (sa == 0) { \ 131583b3815dSSong Gao shft_res = e2; \ 131683b3815dSSong Gao } else { \ 131783b3815dSSong Gao shft_res = e2 >> sa; \ 131883b3815dSSong Gao } \ 131983b3815dSSong Gao if (e2 < 0) { \ 132083b3815dSSong Gao shft_res = 0; \ 132183b3815dSSong Gao } \ 132283b3815dSSong Gao T2 mask; \ 132383b3815dSSong Gao mask = (1ull << sh) -1; \ 132483b3815dSSong Gao if (shft_res > mask) { \ 132583b3815dSSong Gao return mask; \ 132683b3815dSSong Gao } else { \ 132783b3815dSSong Gao return shft_res; \ 132883b3815dSSong Gao } \ 132983b3815dSSong Gao } 133083b3815dSSong Gao 133183b3815dSSong Gao SSRANU(B, uint16_t, uint8_t, int16_t) 133283b3815dSSong Gao SSRANU(H, uint32_t, uint16_t, int32_t) 133383b3815dSSong Gao SSRANU(W, uint64_t, uint32_t, int64_t) 133483b3815dSSong Gao 133583b3815dSSong Gao #define VSSRANU(NAME, BIT, T, E1, E2) \ 133683b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 133783b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 133883b3815dSSong Gao { \ 133983b3815dSSong Gao int i; \ 134083b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 134183b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 134283b3815dSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 134383b3815dSSong Gao \ 134483b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 134583b3815dSSong Gao Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 134683b3815dSSong Gao } \ 134783b3815dSSong Gao Vd->D(1) = 0; \ 134883b3815dSSong Gao } 134983b3815dSSong Gao 135083b3815dSSong Gao VSSRANU(vssran_bu_h, 16, uint16_t, B, H) 135183b3815dSSong Gao VSSRANU(vssran_hu_w, 32, uint32_t, H, W) 135283b3815dSSong Gao VSSRANU(vssran_wu_d, 64, uint64_t, W, D) 135383b3815dSSong Gao 135483b3815dSSong Gao #define VSSRLNI(NAME, BIT, E1, E2) \ 135583b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 135683b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 135783b3815dSSong Gao { \ 135883b3815dSSong Gao int i; \ 135983b3815dSSong Gao VReg temp; \ 136083b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 136183b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 136283b3815dSSong Gao \ 136383b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 136483b3815dSSong Gao temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 136583b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ 136683b3815dSSong Gao } \ 136783b3815dSSong Gao *Vd = temp; \ 136883b3815dSSong Gao } 136983b3815dSSong Gao 137083b3815dSSong Gao void HELPER(vssrlni_d_q)(CPULoongArchState *env, 137183b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 137283b3815dSSong Gao { 137383b3815dSSong Gao Int128 shft_res1, shft_res2, mask; 137483b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 137583b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 137683b3815dSSong Gao 137783b3815dSSong Gao if (imm == 0) { 137883b3815dSSong Gao shft_res1 = Vj->Q(0); 137983b3815dSSong Gao shft_res2 = Vd->Q(0); 138083b3815dSSong Gao } else { 138183b3815dSSong Gao shft_res1 = int128_urshift(Vj->Q(0), imm); 138283b3815dSSong Gao shft_res2 = int128_urshift(Vd->Q(0), imm); 138383b3815dSSong Gao } 138483b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); 138583b3815dSSong Gao 138683b3815dSSong Gao if (int128_ult(mask, shft_res1)) { 138783b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 138883b3815dSSong Gao }else { 138983b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 139083b3815dSSong Gao } 139183b3815dSSong Gao 139283b3815dSSong Gao if (int128_ult(mask, shft_res2)) { 139383b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 139483b3815dSSong Gao }else { 139583b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 139683b3815dSSong Gao } 139783b3815dSSong Gao } 139883b3815dSSong Gao 139983b3815dSSong Gao VSSRLNI(vssrlni_b_h, 16, B, H) 140083b3815dSSong Gao VSSRLNI(vssrlni_h_w, 32, H, W) 140183b3815dSSong Gao VSSRLNI(vssrlni_w_d, 64, W, D) 140283b3815dSSong Gao 140383b3815dSSong Gao #define VSSRANI(NAME, BIT, E1, E2) \ 140483b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 140583b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 140683b3815dSSong Gao { \ 140783b3815dSSong Gao int i; \ 140883b3815dSSong Gao VReg temp; \ 140983b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 141083b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 141183b3815dSSong Gao \ 141283b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 141383b3815dSSong Gao temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 141483b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ 141583b3815dSSong Gao } \ 141683b3815dSSong Gao *Vd = temp; \ 141783b3815dSSong Gao } 141883b3815dSSong Gao 141983b3815dSSong Gao void HELPER(vssrani_d_q)(CPULoongArchState *env, 142083b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 142183b3815dSSong Gao { 142283b3815dSSong Gao Int128 shft_res1, shft_res2, mask, min; 142383b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 142483b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 142583b3815dSSong Gao 142683b3815dSSong Gao if (imm == 0) { 142783b3815dSSong Gao shft_res1 = Vj->Q(0); 142883b3815dSSong Gao shft_res2 = Vd->Q(0); 142983b3815dSSong Gao } else { 143083b3815dSSong Gao shft_res1 = int128_rshift(Vj->Q(0), imm); 143183b3815dSSong Gao shft_res2 = int128_rshift(Vd->Q(0), imm); 143283b3815dSSong Gao } 143383b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); 143483b3815dSSong Gao min = int128_lshift(int128_one(), 63); 143583b3815dSSong Gao 143683b3815dSSong Gao if (int128_gt(shft_res1, mask)) { 143783b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 143883b3815dSSong Gao } else if (int128_lt(shft_res1, int128_neg(min))) { 143983b3815dSSong Gao Vd->D(0) = int128_getlo(min); 144083b3815dSSong Gao } else { 144183b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 144283b3815dSSong Gao } 144383b3815dSSong Gao 144483b3815dSSong Gao if (int128_gt(shft_res2, mask)) { 144583b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 144683b3815dSSong Gao } else if (int128_lt(shft_res2, int128_neg(min))) { 144783b3815dSSong Gao Vd->D(1) = int128_getlo(min); 144883b3815dSSong Gao } else { 144983b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 145083b3815dSSong Gao } 145183b3815dSSong Gao } 145283b3815dSSong Gao 145383b3815dSSong Gao VSSRANI(vssrani_b_h, 16, B, H) 145483b3815dSSong Gao VSSRANI(vssrani_h_w, 32, H, W) 145583b3815dSSong Gao VSSRANI(vssrani_w_d, 64, W, D) 145683b3815dSSong Gao 145783b3815dSSong Gao #define VSSRLNUI(NAME, BIT, E1, E2) \ 145883b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 145983b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 146083b3815dSSong Gao { \ 146183b3815dSSong Gao int i; \ 146283b3815dSSong Gao VReg temp; \ 146383b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 146483b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 146583b3815dSSong Gao \ 146683b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 146783b3815dSSong Gao temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \ 146883b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \ 146983b3815dSSong Gao } \ 147083b3815dSSong Gao *Vd = temp; \ 147183b3815dSSong Gao } 147283b3815dSSong Gao 147383b3815dSSong Gao void HELPER(vssrlni_du_q)(CPULoongArchState *env, 147483b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 147583b3815dSSong Gao { 147683b3815dSSong Gao Int128 shft_res1, shft_res2, mask; 147783b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 147883b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 147983b3815dSSong Gao 148083b3815dSSong Gao if (imm == 0) { 148183b3815dSSong Gao shft_res1 = Vj->Q(0); 148283b3815dSSong Gao shft_res2 = Vd->Q(0); 148383b3815dSSong Gao } else { 148483b3815dSSong Gao shft_res1 = int128_urshift(Vj->Q(0), imm); 148583b3815dSSong Gao shft_res2 = int128_urshift(Vd->Q(0), imm); 148683b3815dSSong Gao } 148783b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); 148883b3815dSSong Gao 148983b3815dSSong Gao if (int128_ult(mask, shft_res1)) { 149083b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 149183b3815dSSong Gao }else { 149283b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 149383b3815dSSong Gao } 149483b3815dSSong Gao 149583b3815dSSong Gao if (int128_ult(mask, shft_res2)) { 149683b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 149783b3815dSSong Gao }else { 149883b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 149983b3815dSSong Gao } 150083b3815dSSong Gao } 150183b3815dSSong Gao 150283b3815dSSong Gao VSSRLNUI(vssrlni_bu_h, 16, B, H) 150383b3815dSSong Gao VSSRLNUI(vssrlni_hu_w, 32, H, W) 150483b3815dSSong Gao VSSRLNUI(vssrlni_wu_d, 64, W, D) 150583b3815dSSong Gao 150683b3815dSSong Gao #define VSSRANUI(NAME, BIT, E1, E2) \ 150783b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 150883b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 150983b3815dSSong Gao { \ 151083b3815dSSong Gao int i; \ 151183b3815dSSong Gao VReg temp; \ 151283b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 151383b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 151483b3815dSSong Gao \ 151583b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 151683b3815dSSong Gao temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \ 151783b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \ 151883b3815dSSong Gao } \ 151983b3815dSSong Gao *Vd = temp; \ 152083b3815dSSong Gao } 152183b3815dSSong Gao 152283b3815dSSong Gao void HELPER(vssrani_du_q)(CPULoongArchState *env, 152383b3815dSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 152483b3815dSSong Gao { 152583b3815dSSong Gao Int128 shft_res1, shft_res2, mask; 152683b3815dSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 152783b3815dSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 152883b3815dSSong Gao 152983b3815dSSong Gao if (imm == 0) { 153083b3815dSSong Gao shft_res1 = Vj->Q(0); 153183b3815dSSong Gao shft_res2 = Vd->Q(0); 153283b3815dSSong Gao } else { 153383b3815dSSong Gao shft_res1 = int128_rshift(Vj->Q(0), imm); 153483b3815dSSong Gao shft_res2 = int128_rshift(Vd->Q(0), imm); 153583b3815dSSong Gao } 153683b3815dSSong Gao 153783b3815dSSong Gao if (int128_lt(Vj->Q(0), int128_zero())) { 153883b3815dSSong Gao shft_res1 = int128_zero(); 153983b3815dSSong Gao } 154083b3815dSSong Gao 154183b3815dSSong Gao if (int128_lt(Vd->Q(0), int128_zero())) { 154283b3815dSSong Gao shft_res2 = int128_zero(); 154383b3815dSSong Gao } 154483b3815dSSong Gao 154583b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); 154683b3815dSSong Gao 154783b3815dSSong Gao if (int128_ult(mask, shft_res1)) { 154883b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 154983b3815dSSong Gao }else { 155083b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 155183b3815dSSong Gao } 155283b3815dSSong Gao 155383b3815dSSong Gao if (int128_ult(mask, shft_res2)) { 155483b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 155583b3815dSSong Gao }else { 155683b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 155783b3815dSSong Gao } 155883b3815dSSong Gao } 155983b3815dSSong Gao 156083b3815dSSong Gao VSSRANUI(vssrani_bu_h, 16, B, H) 156183b3815dSSong Gao VSSRANUI(vssrani_hu_w, 32, H, W) 156283b3815dSSong Gao VSSRANUI(vssrani_wu_d, 64, W, D) 1563162cd32cSSong Gao 1564162cd32cSSong Gao #define SSRLRNS(E1, E2, T1, T2, T3) \ 1565162cd32cSSong Gao static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ 1566162cd32cSSong Gao { \ 1567162cd32cSSong Gao T1 shft_res; \ 1568162cd32cSSong Gao \ 1569162cd32cSSong Gao shft_res = do_vsrlr_ ## E2(e2, sa); \ 1570162cd32cSSong Gao T1 mask; \ 1571162cd32cSSong Gao mask = (1ull << sh) -1; \ 1572162cd32cSSong Gao if (shft_res > mask) { \ 1573162cd32cSSong Gao return mask; \ 1574162cd32cSSong Gao } else { \ 1575162cd32cSSong Gao return shft_res; \ 1576162cd32cSSong Gao } \ 1577162cd32cSSong Gao } 1578162cd32cSSong Gao 1579162cd32cSSong Gao SSRLRNS(B, H, uint16_t, int16_t, uint8_t) 1580162cd32cSSong Gao SSRLRNS(H, W, uint32_t, int32_t, uint16_t) 1581162cd32cSSong Gao SSRLRNS(W, D, uint64_t, int64_t, uint32_t) 1582162cd32cSSong Gao 1583162cd32cSSong Gao #define VSSRLRN(NAME, BIT, T, E1, E2) \ 1584162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1585162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 1586162cd32cSSong Gao { \ 1587162cd32cSSong Gao int i; \ 1588162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1589162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1590162cd32cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 1591162cd32cSSong Gao \ 1592162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1593162cd32cSSong Gao Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ 1594162cd32cSSong Gao } \ 1595162cd32cSSong Gao Vd->D(1) = 0; \ 1596162cd32cSSong Gao } 1597162cd32cSSong Gao 1598162cd32cSSong Gao VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H) 1599162cd32cSSong Gao VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W) 1600162cd32cSSong Gao VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D) 1601162cd32cSSong Gao 1602162cd32cSSong Gao #define SSRARNS(E1, E2, T1, T2) \ 1603162cd32cSSong Gao static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ 1604162cd32cSSong Gao { \ 1605162cd32cSSong Gao T1 shft_res; \ 1606162cd32cSSong Gao \ 1607162cd32cSSong Gao shft_res = do_vsrar_ ## E2(e2, sa); \ 1608162cd32cSSong Gao T2 mask; \ 1609162cd32cSSong Gao mask = (1ll << sh) -1; \ 1610162cd32cSSong Gao if (shft_res > mask) { \ 1611162cd32cSSong Gao return mask; \ 1612162cd32cSSong Gao } else if (shft_res < -(mask +1)) { \ 1613162cd32cSSong Gao return ~mask; \ 1614162cd32cSSong Gao } else { \ 1615162cd32cSSong Gao return shft_res; \ 1616162cd32cSSong Gao } \ 1617162cd32cSSong Gao } 1618162cd32cSSong Gao 1619162cd32cSSong Gao SSRARNS(B, H, int16_t, int8_t) 1620162cd32cSSong Gao SSRARNS(H, W, int32_t, int16_t) 1621162cd32cSSong Gao SSRARNS(W, D, int64_t, int32_t) 1622162cd32cSSong Gao 1623162cd32cSSong Gao #define VSSRARN(NAME, BIT, T, E1, E2) \ 1624162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1625162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 1626162cd32cSSong Gao { \ 1627162cd32cSSong Gao int i; \ 1628162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1629162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1630162cd32cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 1631162cd32cSSong Gao \ 1632162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1633162cd32cSSong Gao Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ 1634162cd32cSSong Gao } \ 1635162cd32cSSong Gao Vd->D(1) = 0; \ 1636162cd32cSSong Gao } 1637162cd32cSSong Gao 1638162cd32cSSong Gao VSSRARN(vssrarn_b_h, 16, uint16_t, B, H) 1639162cd32cSSong Gao VSSRARN(vssrarn_h_w, 32, uint32_t, H, W) 1640162cd32cSSong Gao VSSRARN(vssrarn_w_d, 64, uint64_t, W, D) 1641162cd32cSSong Gao 1642162cd32cSSong Gao #define SSRLRNU(E1, E2, T1, T2, T3) \ 1643162cd32cSSong Gao static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ 1644162cd32cSSong Gao { \ 1645162cd32cSSong Gao T1 shft_res; \ 1646162cd32cSSong Gao \ 1647162cd32cSSong Gao shft_res = do_vsrlr_ ## E2(e2, sa); \ 1648162cd32cSSong Gao \ 1649162cd32cSSong Gao T2 mask; \ 1650162cd32cSSong Gao mask = (1ull << sh) -1; \ 1651162cd32cSSong Gao if (shft_res > mask) { \ 1652162cd32cSSong Gao return mask; \ 1653162cd32cSSong Gao } else { \ 1654162cd32cSSong Gao return shft_res; \ 1655162cd32cSSong Gao } \ 1656162cd32cSSong Gao } 1657162cd32cSSong Gao 1658162cd32cSSong Gao SSRLRNU(B, H, uint16_t, uint8_t, int16_t) 1659162cd32cSSong Gao SSRLRNU(H, W, uint32_t, uint16_t, int32_t) 1660162cd32cSSong Gao SSRLRNU(W, D, uint64_t, uint32_t, int64_t) 1661162cd32cSSong Gao 1662162cd32cSSong Gao #define VSSRLRNU(NAME, BIT, T, E1, E2) \ 1663162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1664162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 1665162cd32cSSong Gao { \ 1666162cd32cSSong Gao int i; \ 1667162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1668162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1669162cd32cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 1670162cd32cSSong Gao \ 1671162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1672162cd32cSSong Gao Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 1673162cd32cSSong Gao } \ 1674162cd32cSSong Gao Vd->D(1) = 0; \ 1675162cd32cSSong Gao } 1676162cd32cSSong Gao 1677162cd32cSSong Gao VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H) 1678162cd32cSSong Gao VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W) 1679162cd32cSSong Gao VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D) 1680162cd32cSSong Gao 1681162cd32cSSong Gao #define SSRARNU(E1, E2, T1, T2, T3) \ 1682162cd32cSSong Gao static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ 1683162cd32cSSong Gao { \ 1684162cd32cSSong Gao T1 shft_res; \ 1685162cd32cSSong Gao \ 1686162cd32cSSong Gao if (e2 < 0) { \ 1687162cd32cSSong Gao shft_res = 0; \ 1688162cd32cSSong Gao } else { \ 1689162cd32cSSong Gao shft_res = do_vsrar_ ## E2(e2, sa); \ 1690162cd32cSSong Gao } \ 1691162cd32cSSong Gao T2 mask; \ 1692162cd32cSSong Gao mask = (1ull << sh) -1; \ 1693162cd32cSSong Gao if (shft_res > mask) { \ 1694162cd32cSSong Gao return mask; \ 1695162cd32cSSong Gao } else { \ 1696162cd32cSSong Gao return shft_res; \ 1697162cd32cSSong Gao } \ 1698162cd32cSSong Gao } 1699162cd32cSSong Gao 1700162cd32cSSong Gao SSRARNU(B, H, uint16_t, uint8_t, int16_t) 1701162cd32cSSong Gao SSRARNU(H, W, uint32_t, uint16_t, int32_t) 1702162cd32cSSong Gao SSRARNU(W, D, uint64_t, uint32_t, int64_t) 1703162cd32cSSong Gao 1704162cd32cSSong Gao #define VSSRARNU(NAME, BIT, T, E1, E2) \ 1705162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1706162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 1707162cd32cSSong Gao { \ 1708162cd32cSSong Gao int i; \ 1709162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1710162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1711162cd32cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 1712162cd32cSSong Gao \ 1713162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1714162cd32cSSong Gao Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 1715162cd32cSSong Gao } \ 1716162cd32cSSong Gao Vd->D(1) = 0; \ 1717162cd32cSSong Gao } 1718162cd32cSSong Gao 1719162cd32cSSong Gao VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H) 1720162cd32cSSong Gao VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W) 1721162cd32cSSong Gao VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D) 1722162cd32cSSong Gao 1723162cd32cSSong Gao #define VSSRLRNI(NAME, BIT, E1, E2) \ 1724162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1725162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1726162cd32cSSong Gao { \ 1727162cd32cSSong Gao int i; \ 1728162cd32cSSong Gao VReg temp; \ 1729162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1730162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1731162cd32cSSong Gao \ 1732162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1733162cd32cSSong Gao temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 1734162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ 1735162cd32cSSong Gao } \ 1736162cd32cSSong Gao *Vd = temp; \ 1737162cd32cSSong Gao } 1738162cd32cSSong Gao 1739162cd32cSSong Gao #define VSSRLRNI_Q(NAME, sh) \ 1740162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1741162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1742162cd32cSSong Gao { \ 1743162cd32cSSong Gao Int128 shft_res1, shft_res2, mask, r1, r2; \ 1744162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1745162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1746162cd32cSSong Gao \ 1747162cd32cSSong Gao if (imm == 0) { \ 1748162cd32cSSong Gao shft_res1 = Vj->Q(0); \ 1749162cd32cSSong Gao shft_res2 = Vd->Q(0); \ 1750162cd32cSSong Gao } else { \ 1751162cd32cSSong Gao r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \ 1752162cd32cSSong Gao r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \ 1753162cd32cSSong Gao \ 1754162cd32cSSong Gao shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \ 1755162cd32cSSong Gao shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \ 1756162cd32cSSong Gao } \ 1757162cd32cSSong Gao \ 1758162cd32cSSong Gao mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \ 1759162cd32cSSong Gao \ 1760162cd32cSSong Gao if (int128_ult(mask, shft_res1)) { \ 1761162cd32cSSong Gao Vd->D(0) = int128_getlo(mask); \ 1762162cd32cSSong Gao }else { \ 1763162cd32cSSong Gao Vd->D(0) = int128_getlo(shft_res1); \ 1764162cd32cSSong Gao } \ 1765162cd32cSSong Gao \ 1766162cd32cSSong Gao if (int128_ult(mask, shft_res2)) { \ 1767162cd32cSSong Gao Vd->D(1) = int128_getlo(mask); \ 1768162cd32cSSong Gao }else { \ 1769162cd32cSSong Gao Vd->D(1) = int128_getlo(shft_res2); \ 1770162cd32cSSong Gao } \ 1771162cd32cSSong Gao } 1772162cd32cSSong Gao 1773162cd32cSSong Gao VSSRLRNI(vssrlrni_b_h, 16, B, H) 1774162cd32cSSong Gao VSSRLRNI(vssrlrni_h_w, 32, H, W) 1775162cd32cSSong Gao VSSRLRNI(vssrlrni_w_d, 64, W, D) 1776162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_d_q, 63) 1777162cd32cSSong Gao 1778162cd32cSSong Gao #define VSSRARNI(NAME, BIT, E1, E2) \ 1779162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1780162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1781162cd32cSSong Gao { \ 1782162cd32cSSong Gao int i; \ 1783162cd32cSSong Gao VReg temp; \ 1784162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1785162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1786162cd32cSSong Gao \ 1787162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1788162cd32cSSong Gao temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 1789162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ 1790162cd32cSSong Gao } \ 1791162cd32cSSong Gao *Vd = temp; \ 1792162cd32cSSong Gao } 1793162cd32cSSong Gao 1794162cd32cSSong Gao void HELPER(vssrarni_d_q)(CPULoongArchState *env, 1795162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 1796162cd32cSSong Gao { 1797162cd32cSSong Gao Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; 1798162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 1799162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 1800162cd32cSSong Gao 1801162cd32cSSong Gao if (imm == 0) { 1802162cd32cSSong Gao shft_res1 = Vj->Q(0); 1803162cd32cSSong Gao shft_res2 = Vd->Q(0); 1804162cd32cSSong Gao } else { 1805162cd32cSSong Gao r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); 1806162cd32cSSong Gao r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); 1807162cd32cSSong Gao 1808162cd32cSSong Gao shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); 1809162cd32cSSong Gao shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); 1810162cd32cSSong Gao } 1811162cd32cSSong Gao 1812162cd32cSSong Gao mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); 1813162cd32cSSong Gao mask2 = int128_lshift(int128_one(), 63); 1814162cd32cSSong Gao 1815162cd32cSSong Gao if (int128_gt(shft_res1, mask1)) { 1816162cd32cSSong Gao Vd->D(0) = int128_getlo(mask1); 1817162cd32cSSong Gao } else if (int128_lt(shft_res1, int128_neg(mask2))) { 1818162cd32cSSong Gao Vd->D(0) = int128_getlo(mask2); 1819162cd32cSSong Gao } else { 1820162cd32cSSong Gao Vd->D(0) = int128_getlo(shft_res1); 1821162cd32cSSong Gao } 1822162cd32cSSong Gao 1823162cd32cSSong Gao if (int128_gt(shft_res2, mask1)) { 1824162cd32cSSong Gao Vd->D(1) = int128_getlo(mask1); 1825162cd32cSSong Gao } else if (int128_lt(shft_res2, int128_neg(mask2))) { 1826162cd32cSSong Gao Vd->D(1) = int128_getlo(mask2); 1827162cd32cSSong Gao } else { 1828162cd32cSSong Gao Vd->D(1) = int128_getlo(shft_res2); 1829162cd32cSSong Gao } 1830162cd32cSSong Gao } 1831162cd32cSSong Gao 1832162cd32cSSong Gao VSSRARNI(vssrarni_b_h, 16, B, H) 1833162cd32cSSong Gao VSSRARNI(vssrarni_h_w, 32, H, W) 1834162cd32cSSong Gao VSSRARNI(vssrarni_w_d, 64, W, D) 1835162cd32cSSong Gao 1836162cd32cSSong Gao #define VSSRLRNUI(NAME, BIT, E1, E2) \ 1837162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1838162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1839162cd32cSSong Gao { \ 1840162cd32cSSong Gao int i; \ 1841162cd32cSSong Gao VReg temp; \ 1842162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1843162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1844162cd32cSSong Gao \ 1845162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1846162cd32cSSong Gao temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \ 1847162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \ 1848162cd32cSSong Gao } \ 1849162cd32cSSong Gao *Vd = temp; \ 1850162cd32cSSong Gao } 1851162cd32cSSong Gao 1852162cd32cSSong Gao VSSRLRNUI(vssrlrni_bu_h, 16, B, H) 1853162cd32cSSong Gao VSSRLRNUI(vssrlrni_hu_w, 32, H, W) 1854162cd32cSSong Gao VSSRLRNUI(vssrlrni_wu_d, 64, W, D) 1855162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_du_q, 64) 1856162cd32cSSong Gao 1857162cd32cSSong Gao #define VSSRARNUI(NAME, BIT, E1, E2) \ 1858162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 1859162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 1860162cd32cSSong Gao { \ 1861162cd32cSSong Gao int i; \ 1862162cd32cSSong Gao VReg temp; \ 1863162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1864162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1865162cd32cSSong Gao \ 1866162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1867162cd32cSSong Gao temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \ 1868162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \ 1869162cd32cSSong Gao } \ 1870162cd32cSSong Gao *Vd = temp; \ 1871162cd32cSSong Gao } 1872162cd32cSSong Gao 1873162cd32cSSong Gao void HELPER(vssrarni_du_q)(CPULoongArchState *env, 1874162cd32cSSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 1875162cd32cSSong Gao { 1876162cd32cSSong Gao Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; 1877162cd32cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); 1878162cd32cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); 1879162cd32cSSong Gao 1880162cd32cSSong Gao if (imm == 0) { 1881162cd32cSSong Gao shft_res1 = Vj->Q(0); 1882162cd32cSSong Gao shft_res2 = Vd->Q(0); 1883162cd32cSSong Gao } else { 1884162cd32cSSong Gao r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); 1885162cd32cSSong Gao r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); 1886162cd32cSSong Gao 1887162cd32cSSong Gao shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); 1888162cd32cSSong Gao shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); 1889162cd32cSSong Gao } 1890162cd32cSSong Gao 1891162cd32cSSong Gao if (int128_lt(Vj->Q(0), int128_zero())) { 1892162cd32cSSong Gao shft_res1 = int128_zero(); 1893162cd32cSSong Gao } 1894162cd32cSSong Gao if (int128_lt(Vd->Q(0), int128_zero())) { 1895162cd32cSSong Gao shft_res2 = int128_zero(); 1896162cd32cSSong Gao } 1897162cd32cSSong Gao 1898162cd32cSSong Gao mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); 1899162cd32cSSong Gao mask2 = int128_lshift(int128_one(), 64); 1900162cd32cSSong Gao 1901162cd32cSSong Gao if (int128_gt(shft_res1, mask1)) { 1902162cd32cSSong Gao Vd->D(0) = int128_getlo(mask1); 1903162cd32cSSong Gao } else if (int128_lt(shft_res1, int128_neg(mask2))) { 1904162cd32cSSong Gao Vd->D(0) = int128_getlo(mask2); 1905162cd32cSSong Gao } else { 1906162cd32cSSong Gao Vd->D(0) = int128_getlo(shft_res1); 1907162cd32cSSong Gao } 1908162cd32cSSong Gao 1909162cd32cSSong Gao if (int128_gt(shft_res2, mask1)) { 1910162cd32cSSong Gao Vd->D(1) = int128_getlo(mask1); 1911162cd32cSSong Gao } else if (int128_lt(shft_res2, int128_neg(mask2))) { 1912162cd32cSSong Gao Vd->D(1) = int128_getlo(mask2); 1913162cd32cSSong Gao } else { 1914162cd32cSSong Gao Vd->D(1) = int128_getlo(shft_res2); 1915162cd32cSSong Gao } 1916162cd32cSSong Gao } 1917162cd32cSSong Gao 1918162cd32cSSong Gao VSSRARNUI(vssrarni_bu_h, 16, B, H) 1919162cd32cSSong Gao VSSRARNUI(vssrarni_hu_w, 32, H, W) 1920162cd32cSSong Gao VSSRARNUI(vssrarni_wu_d, 64, W, D) 19212e105e12SSong Gao 19222e105e12SSong Gao #define DO_2OP(NAME, BIT, E, DO_OP) \ 19232e105e12SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 19242e105e12SSong Gao { \ 19252e105e12SSong Gao int i; \ 19262e105e12SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 19272e105e12SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 19282e105e12SSong Gao \ 19292e105e12SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) \ 19302e105e12SSong Gao { \ 19312e105e12SSong Gao Vd->E(i) = DO_OP(Vj->E(i)); \ 19322e105e12SSong Gao } \ 19332e105e12SSong Gao } 19342e105e12SSong Gao 19352e105e12SSong Gao #define DO_CLO_B(N) (clz32(~N & 0xff) - 24) 19362e105e12SSong Gao #define DO_CLO_H(N) (clz32(~N & 0xffff) - 16) 19372e105e12SSong Gao #define DO_CLO_W(N) (clz32(~N)) 19382e105e12SSong Gao #define DO_CLO_D(N) (clz64(~N)) 19392e105e12SSong Gao #define DO_CLZ_B(N) (clz32(N) - 24) 19402e105e12SSong Gao #define DO_CLZ_H(N) (clz32(N) - 16) 19412e105e12SSong Gao #define DO_CLZ_W(N) (clz32(N)) 19422e105e12SSong Gao #define DO_CLZ_D(N) (clz64(N)) 19432e105e12SSong Gao 19442e105e12SSong Gao DO_2OP(vclo_b, 8, UB, DO_CLO_B) 19452e105e12SSong Gao DO_2OP(vclo_h, 16, UH, DO_CLO_H) 19462e105e12SSong Gao DO_2OP(vclo_w, 32, UW, DO_CLO_W) 19472e105e12SSong Gao DO_2OP(vclo_d, 64, UD, DO_CLO_D) 19482e105e12SSong Gao DO_2OP(vclz_b, 8, UB, DO_CLZ_B) 19492e105e12SSong Gao DO_2OP(vclz_h, 16, UH, DO_CLZ_H) 19502e105e12SSong Gao DO_2OP(vclz_w, 32, UW, DO_CLZ_W) 19512e105e12SSong Gao DO_2OP(vclz_d, 64, UD, DO_CLZ_D) 1952bb22ee57SSong Gao 1953bb22ee57SSong Gao #define VPCNT(NAME, BIT, E, FN) \ 1954bb22ee57SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 1955bb22ee57SSong Gao { \ 1956bb22ee57SSong Gao int i; \ 1957bb22ee57SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 1958bb22ee57SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 1959bb22ee57SSong Gao \ 1960bb22ee57SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) \ 1961bb22ee57SSong Gao { \ 1962bb22ee57SSong Gao Vd->E(i) = FN(Vj->E(i)); \ 1963bb22ee57SSong Gao } \ 1964bb22ee57SSong Gao } 1965bb22ee57SSong Gao 1966bb22ee57SSong Gao VPCNT(vpcnt_b, 8, UB, ctpop8) 1967bb22ee57SSong Gao VPCNT(vpcnt_h, 16, UH, ctpop16) 1968bb22ee57SSong Gao VPCNT(vpcnt_w, 32, UW, ctpop32) 1969bb22ee57SSong Gao VPCNT(vpcnt_d, 64, UD, ctpop64) 19700b1e6705SSong Gao 19710b1e6705SSong Gao #define DO_BITCLR(a, bit) (a & ~(1ull << bit)) 19720b1e6705SSong Gao #define DO_BITSET(a, bit) (a | 1ull << bit) 19730b1e6705SSong Gao #define DO_BITREV(a, bit) (a ^ (1ull << bit)) 19740b1e6705SSong Gao 19750b1e6705SSong Gao #define DO_BIT(NAME, BIT, E, DO_OP) \ 19760b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 19770b1e6705SSong Gao { \ 19780b1e6705SSong Gao int i; \ 19790b1e6705SSong Gao VReg *Vd = (VReg *)vd; \ 19800b1e6705SSong Gao VReg *Vj = (VReg *)vj; \ 19810b1e6705SSong Gao VReg *Vk = (VReg *)vk; \ 19820b1e6705SSong Gao \ 19830b1e6705SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 19840b1e6705SSong Gao Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ 19850b1e6705SSong Gao } \ 19860b1e6705SSong Gao } 19870b1e6705SSong Gao 19880b1e6705SSong Gao DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) 19890b1e6705SSong Gao DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) 19900b1e6705SSong Gao DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) 19910b1e6705SSong Gao DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) 19920b1e6705SSong Gao DO_BIT(vbitset_b, 8, UB, DO_BITSET) 19930b1e6705SSong Gao DO_BIT(vbitset_h, 16, UH, DO_BITSET) 19940b1e6705SSong Gao DO_BIT(vbitset_w, 32, UW, DO_BITSET) 19950b1e6705SSong Gao DO_BIT(vbitset_d, 64, UD, DO_BITSET) 19960b1e6705SSong Gao DO_BIT(vbitrev_b, 8, UB, DO_BITREV) 19970b1e6705SSong Gao DO_BIT(vbitrev_h, 16, UH, DO_BITREV) 19980b1e6705SSong Gao DO_BIT(vbitrev_w, 32, UW, DO_BITREV) 19990b1e6705SSong Gao DO_BIT(vbitrev_d, 64, UD, DO_BITREV) 20000b1e6705SSong Gao 20010b1e6705SSong Gao #define DO_BITI(NAME, BIT, E, DO_OP) \ 20020b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ 20030b1e6705SSong Gao { \ 20040b1e6705SSong Gao int i; \ 20050b1e6705SSong Gao VReg *Vd = (VReg *)vd; \ 20060b1e6705SSong Gao VReg *Vj = (VReg *)vj; \ 20070b1e6705SSong Gao \ 20080b1e6705SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 20090b1e6705SSong Gao Vd->E(i) = DO_OP(Vj->E(i), imm); \ 20100b1e6705SSong Gao } \ 20110b1e6705SSong Gao } 20120b1e6705SSong Gao 20130b1e6705SSong Gao DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) 20140b1e6705SSong Gao DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) 20150b1e6705SSong Gao DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) 20160b1e6705SSong Gao DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) 20170b1e6705SSong Gao DO_BITI(vbitseti_b, 8, UB, DO_BITSET) 20180b1e6705SSong Gao DO_BITI(vbitseti_h, 16, UH, DO_BITSET) 20190b1e6705SSong Gao DO_BITI(vbitseti_w, 32, UW, DO_BITSET) 20200b1e6705SSong Gao DO_BITI(vbitseti_d, 64, UD, DO_BITSET) 20210b1e6705SSong Gao DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) 20220b1e6705SSong Gao DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) 20230b1e6705SSong Gao DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) 20240b1e6705SSong Gao DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) 2025ac95a0b9SSong Gao 2026ac95a0b9SSong Gao #define VFRSTP(NAME, BIT, MASK, E) \ 2027ac95a0b9SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2028ac95a0b9SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2029ac95a0b9SSong Gao { \ 2030ac95a0b9SSong Gao int i, m; \ 2031ac95a0b9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2032ac95a0b9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2033ac95a0b9SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2034ac95a0b9SSong Gao \ 2035ac95a0b9SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2036ac95a0b9SSong Gao if (Vj->E(i) < 0) { \ 2037ac95a0b9SSong Gao break; \ 2038ac95a0b9SSong Gao } \ 2039ac95a0b9SSong Gao } \ 2040ac95a0b9SSong Gao m = Vk->E(0) & MASK; \ 2041ac95a0b9SSong Gao Vd->E(m) = i; \ 2042ac95a0b9SSong Gao } 2043ac95a0b9SSong Gao 2044ac95a0b9SSong Gao VFRSTP(vfrstp_b, 8, 0xf, B) 2045ac95a0b9SSong Gao VFRSTP(vfrstp_h, 16, 0x7, H) 2046ac95a0b9SSong Gao 2047ac95a0b9SSong Gao #define VFRSTPI(NAME, BIT, E) \ 2048ac95a0b9SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2049ac95a0b9SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 2050ac95a0b9SSong Gao { \ 2051ac95a0b9SSong Gao int i, m; \ 2052ac95a0b9SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2053ac95a0b9SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2054ac95a0b9SSong Gao \ 2055ac95a0b9SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2056ac95a0b9SSong Gao if (Vj->E(i) < 0) { \ 2057ac95a0b9SSong Gao break; \ 2058ac95a0b9SSong Gao } \ 2059ac95a0b9SSong Gao } \ 2060ac95a0b9SSong Gao m = imm % (LSX_LEN/BIT); \ 2061ac95a0b9SSong Gao Vd->E(m) = i; \ 2062ac95a0b9SSong Gao } 2063ac95a0b9SSong Gao 2064ac95a0b9SSong Gao VFRSTPI(vfrstpi_b, 8, B) 2065ac95a0b9SSong Gao VFRSTPI(vfrstpi_h, 16, H) 2066aca67472SSong Gao 2067aca67472SSong Gao static void vec_update_fcsr0_mask(CPULoongArchState *env, 2068aca67472SSong Gao uintptr_t pc, int mask) 2069aca67472SSong Gao { 2070aca67472SSong Gao int flags = get_float_exception_flags(&env->fp_status); 2071aca67472SSong Gao 2072aca67472SSong Gao set_float_exception_flags(0, &env->fp_status); 2073aca67472SSong Gao 2074aca67472SSong Gao flags &= ~mask; 2075aca67472SSong Gao 2076aca67472SSong Gao if (flags) { 2077aca67472SSong Gao flags = ieee_ex_to_loongarch(flags); 2078aca67472SSong Gao UPDATE_FP_CAUSE(env->fcsr0, flags); 2079aca67472SSong Gao } 2080aca67472SSong Gao 2081aca67472SSong Gao if (GET_FP_ENABLES(env->fcsr0) & flags) { 2082aca67472SSong Gao do_raise_exception(env, EXCCODE_FPE, pc); 2083aca67472SSong Gao } else { 2084aca67472SSong Gao UPDATE_FP_FLAGS(env->fcsr0, flags); 2085aca67472SSong Gao } 2086aca67472SSong Gao } 2087aca67472SSong Gao 2088aca67472SSong Gao static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) 2089aca67472SSong Gao { 2090aca67472SSong Gao vec_update_fcsr0_mask(env, pc, 0); 2091aca67472SSong Gao } 2092aca67472SSong Gao 2093aca67472SSong Gao static inline void vec_clear_cause(CPULoongArchState *env) 2094aca67472SSong Gao { 2095aca67472SSong Gao SET_FP_CAUSE(env->fcsr0, 0); 2096aca67472SSong Gao } 2097aca67472SSong Gao 2098aca67472SSong Gao #define DO_3OP_F(NAME, BIT, E, FN) \ 2099aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2100aca67472SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2101aca67472SSong Gao { \ 2102aca67472SSong Gao int i; \ 2103aca67472SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2104aca67472SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2105aca67472SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2106aca67472SSong Gao \ 2107aca67472SSong Gao vec_clear_cause(env); \ 2108aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2109aca67472SSong Gao Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ 2110aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2111aca67472SSong Gao } \ 2112aca67472SSong Gao } 2113aca67472SSong Gao 2114aca67472SSong Gao DO_3OP_F(vfadd_s, 32, UW, float32_add) 2115aca67472SSong Gao DO_3OP_F(vfadd_d, 64, UD, float64_add) 2116aca67472SSong Gao DO_3OP_F(vfsub_s, 32, UW, float32_sub) 2117aca67472SSong Gao DO_3OP_F(vfsub_d, 64, UD, float64_sub) 2118aca67472SSong Gao DO_3OP_F(vfmul_s, 32, UW, float32_mul) 2119aca67472SSong Gao DO_3OP_F(vfmul_d, 64, UD, float64_mul) 2120aca67472SSong Gao DO_3OP_F(vfdiv_s, 32, UW, float32_div) 2121aca67472SSong Gao DO_3OP_F(vfdiv_d, 64, UD, float64_div) 2122aca67472SSong Gao DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) 2123aca67472SSong Gao DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) 2124aca67472SSong Gao DO_3OP_F(vfmin_s, 32, UW, float32_minnum) 2125aca67472SSong Gao DO_3OP_F(vfmin_d, 64, UD, float64_minnum) 2126aca67472SSong Gao DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) 2127aca67472SSong Gao DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) 2128aca67472SSong Gao DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) 2129aca67472SSong Gao DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) 2130aca67472SSong Gao 2131aca67472SSong Gao #define DO_4OP_F(NAME, BIT, E, FN, flags) \ 2132aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2133aca67472SSong Gao uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \ 2134aca67472SSong Gao { \ 2135aca67472SSong Gao int i; \ 2136aca67472SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2137aca67472SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2138aca67472SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2139aca67472SSong Gao VReg *Va = &(env->fpr[va].vreg); \ 2140aca67472SSong Gao \ 2141aca67472SSong Gao vec_clear_cause(env); \ 2142aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2143aca67472SSong Gao Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ 2144aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2145aca67472SSong Gao } \ 2146aca67472SSong Gao } 2147aca67472SSong Gao 2148aca67472SSong Gao DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) 2149aca67472SSong Gao DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) 2150aca67472SSong Gao DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) 2151aca67472SSong Gao DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) 2152aca67472SSong Gao DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) 2153aca67472SSong Gao DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) 2154aca67472SSong Gao DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, 2155aca67472SSong Gao float_muladd_negate_c | float_muladd_negate_result) 2156aca67472SSong Gao DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, 2157aca67472SSong Gao float_muladd_negate_c | float_muladd_negate_result) 2158aca67472SSong Gao 2159aca67472SSong Gao #define DO_2OP_F(NAME, BIT, E, FN) \ 2160aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 2161aca67472SSong Gao { \ 2162aca67472SSong Gao int i; \ 2163aca67472SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2164aca67472SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2165aca67472SSong Gao \ 2166aca67472SSong Gao vec_clear_cause(env); \ 2167aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2168aca67472SSong Gao Vd->E(i) = FN(env, Vj->E(i)); \ 2169aca67472SSong Gao } \ 2170aca67472SSong Gao } 2171aca67472SSong Gao 2172aca67472SSong Gao #define FLOGB(BIT, T) \ 2173aca67472SSong Gao static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ 2174aca67472SSong Gao { \ 2175aca67472SSong Gao T fp, fd; \ 2176aca67472SSong Gao float_status *status = &env->fp_status; \ 2177aca67472SSong Gao FloatRoundMode old_mode = get_float_rounding_mode(status); \ 2178aca67472SSong Gao \ 2179aca67472SSong Gao set_float_rounding_mode(float_round_down, status); \ 2180aca67472SSong Gao fp = float ## BIT ##_log2(fj, status); \ 2181aca67472SSong Gao fd = float ## BIT ##_round_to_int(fp, status); \ 2182aca67472SSong Gao set_float_rounding_mode(old_mode, status); \ 2183aca67472SSong Gao vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ 2184aca67472SSong Gao return fd; \ 2185aca67472SSong Gao } 2186aca67472SSong Gao 2187aca67472SSong Gao FLOGB(32, uint32_t) 2188aca67472SSong Gao FLOGB(64, uint64_t) 2189aca67472SSong Gao 2190aca67472SSong Gao #define FCLASS(NAME, BIT, E, FN) \ 2191aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 2192aca67472SSong Gao { \ 2193aca67472SSong Gao int i; \ 2194aca67472SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2195aca67472SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2196aca67472SSong Gao \ 2197aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2198aca67472SSong Gao Vd->E(i) = FN(env, Vj->E(i)); \ 2199aca67472SSong Gao } \ 2200aca67472SSong Gao } 2201aca67472SSong Gao 2202aca67472SSong Gao FCLASS(vfclass_s, 32, UW, helper_fclass_s) 2203aca67472SSong Gao FCLASS(vfclass_d, 64, UD, helper_fclass_d) 2204aca67472SSong Gao 2205aca67472SSong Gao #define FSQRT(BIT, T) \ 2206aca67472SSong Gao static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ 2207aca67472SSong Gao { \ 2208aca67472SSong Gao T fd; \ 2209aca67472SSong Gao fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ 2210aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2211aca67472SSong Gao return fd; \ 2212aca67472SSong Gao } 2213aca67472SSong Gao 2214aca67472SSong Gao FSQRT(32, uint32_t) 2215aca67472SSong Gao FSQRT(64, uint64_t) 2216aca67472SSong Gao 2217aca67472SSong Gao #define FRECIP(BIT, T) \ 2218aca67472SSong Gao static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ 2219aca67472SSong Gao { \ 2220aca67472SSong Gao T fd; \ 2221aca67472SSong Gao fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ 2222aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2223aca67472SSong Gao return fd; \ 2224aca67472SSong Gao } 2225aca67472SSong Gao 2226aca67472SSong Gao FRECIP(32, uint32_t) 2227aca67472SSong Gao FRECIP(64, uint64_t) 2228aca67472SSong Gao 2229aca67472SSong Gao #define FRSQRT(BIT, T) \ 2230aca67472SSong Gao static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ 2231aca67472SSong Gao { \ 2232aca67472SSong Gao T fd, fp; \ 2233aca67472SSong Gao fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ 2234aca67472SSong Gao fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ 2235aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2236aca67472SSong Gao return fd; \ 2237aca67472SSong Gao } 2238aca67472SSong Gao 2239aca67472SSong Gao FRSQRT(32, uint32_t) 2240aca67472SSong Gao FRSQRT(64, uint64_t) 2241aca67472SSong Gao 2242aca67472SSong Gao DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) 2243aca67472SSong Gao DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) 2244aca67472SSong Gao DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) 2245aca67472SSong Gao DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) 2246aca67472SSong Gao DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) 2247aca67472SSong Gao DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) 2248aca67472SSong Gao DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) 2249aca67472SSong Gao DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) 2250399665d2SSong Gao 2251399665d2SSong Gao static uint32_t float16_cvt_float32(uint16_t h, float_status *status) 2252399665d2SSong Gao { 2253399665d2SSong Gao return float16_to_float32(h, true, status); 2254399665d2SSong Gao } 2255399665d2SSong Gao static uint64_t float32_cvt_float64(uint32_t s, float_status *status) 2256399665d2SSong Gao { 2257399665d2SSong Gao return float32_to_float64(s, status); 2258399665d2SSong Gao } 2259399665d2SSong Gao 2260399665d2SSong Gao static uint16_t float32_cvt_float16(uint32_t s, float_status *status) 2261399665d2SSong Gao { 2262399665d2SSong Gao return float32_to_float16(s, true, status); 2263399665d2SSong Gao } 2264399665d2SSong Gao static uint32_t float64_cvt_float32(uint64_t d, float_status *status) 2265399665d2SSong Gao { 2266399665d2SSong Gao return float64_to_float32(d, status); 2267399665d2SSong Gao } 2268399665d2SSong Gao 2269399665d2SSong Gao void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2270399665d2SSong Gao { 2271399665d2SSong Gao int i; 2272399665d2SSong Gao VReg temp; 2273399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2274399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2275399665d2SSong Gao 2276399665d2SSong Gao vec_clear_cause(env); 2277399665d2SSong Gao for (i = 0; i < LSX_LEN/32; i++) { 2278399665d2SSong Gao temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status); 2279399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2280399665d2SSong Gao } 2281399665d2SSong Gao *Vd = temp; 2282399665d2SSong Gao } 2283399665d2SSong Gao 2284399665d2SSong Gao void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2285399665d2SSong Gao { 2286399665d2SSong Gao int i; 2287399665d2SSong Gao VReg temp; 2288399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2289399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2290399665d2SSong Gao 2291399665d2SSong Gao vec_clear_cause(env); 2292399665d2SSong Gao for (i = 0; i < LSX_LEN/64; i++) { 2293399665d2SSong Gao temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status); 2294399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2295399665d2SSong Gao } 2296399665d2SSong Gao *Vd = temp; 2297399665d2SSong Gao } 2298399665d2SSong Gao 2299399665d2SSong Gao void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2300399665d2SSong Gao { 2301399665d2SSong Gao int i; 2302399665d2SSong Gao VReg temp; 2303399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2304399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2305399665d2SSong Gao 2306399665d2SSong Gao vec_clear_cause(env); 2307399665d2SSong Gao for (i = 0; i < LSX_LEN/32; i++) { 2308399665d2SSong Gao temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status); 2309399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2310399665d2SSong Gao } 2311399665d2SSong Gao *Vd = temp; 2312399665d2SSong Gao } 2313399665d2SSong Gao 2314399665d2SSong Gao void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2315399665d2SSong Gao { 2316399665d2SSong Gao int i; 2317399665d2SSong Gao VReg temp; 2318399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2319399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2320399665d2SSong Gao 2321399665d2SSong Gao vec_clear_cause(env); 2322399665d2SSong Gao for (i = 0; i < LSX_LEN/64; i++) { 2323399665d2SSong Gao temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status); 2324399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2325399665d2SSong Gao } 2326399665d2SSong Gao *Vd = temp; 2327399665d2SSong Gao } 2328399665d2SSong Gao 2329399665d2SSong Gao void HELPER(vfcvt_h_s)(CPULoongArchState *env, 2330399665d2SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 2331399665d2SSong Gao { 2332399665d2SSong Gao int i; 2333399665d2SSong Gao VReg temp; 2334399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2335399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2336399665d2SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 2337399665d2SSong Gao 2338399665d2SSong Gao vec_clear_cause(env); 2339399665d2SSong Gao for(i = 0; i < LSX_LEN/32; i++) { 2340399665d2SSong Gao temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status); 2341399665d2SSong Gao temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status); 2342399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2343399665d2SSong Gao } 2344399665d2SSong Gao *Vd = temp; 2345399665d2SSong Gao } 2346399665d2SSong Gao 2347399665d2SSong Gao void HELPER(vfcvt_s_d)(CPULoongArchState *env, 2348399665d2SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 2349399665d2SSong Gao { 2350399665d2SSong Gao int i; 2351399665d2SSong Gao VReg temp; 2352399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2353399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2354399665d2SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 2355399665d2SSong Gao 2356399665d2SSong Gao vec_clear_cause(env); 2357399665d2SSong Gao for(i = 0; i < LSX_LEN/64; i++) { 2358399665d2SSong Gao temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status); 2359399665d2SSong Gao temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status); 2360399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2361399665d2SSong Gao } 2362399665d2SSong Gao *Vd = temp; 2363399665d2SSong Gao } 2364399665d2SSong Gao 2365399665d2SSong Gao void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2366399665d2SSong Gao { 2367399665d2SSong Gao int i; 2368399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2369399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2370399665d2SSong Gao 2371399665d2SSong Gao vec_clear_cause(env); 2372399665d2SSong Gao for (i = 0; i < 4; i++) { 2373399665d2SSong Gao Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); 2374399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2375399665d2SSong Gao } 2376399665d2SSong Gao } 2377399665d2SSong Gao 2378399665d2SSong Gao void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2379399665d2SSong Gao { 2380399665d2SSong Gao int i; 2381399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2382399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2383399665d2SSong Gao 2384399665d2SSong Gao vec_clear_cause(env); 2385399665d2SSong Gao for (i = 0; i < 2; i++) { 2386399665d2SSong Gao Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); 2387399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2388399665d2SSong Gao } 2389399665d2SSong Gao } 2390399665d2SSong Gao 2391399665d2SSong Gao #define FCVT_2OP(NAME, BIT, E, MODE) \ 2392399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 2393399665d2SSong Gao { \ 2394399665d2SSong Gao int i; \ 2395399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2396399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2397399665d2SSong Gao \ 2398399665d2SSong Gao vec_clear_cause(env); \ 2399399665d2SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2400399665d2SSong Gao FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ 2401399665d2SSong Gao set_float_rounding_mode(MODE, &env->fp_status); \ 2402399665d2SSong Gao Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ 2403399665d2SSong Gao set_float_rounding_mode(old_mode, &env->fp_status); \ 2404399665d2SSong Gao vec_update_fcsr0(env, GETPC()); \ 2405399665d2SSong Gao } \ 2406399665d2SSong Gao } 2407399665d2SSong Gao 2408399665d2SSong Gao FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) 2409399665d2SSong Gao FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) 2410399665d2SSong Gao FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) 2411399665d2SSong Gao FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) 2412399665d2SSong Gao FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) 2413399665d2SSong Gao FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) 2414399665d2SSong Gao FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) 2415399665d2SSong Gao FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) 2416399665d2SSong Gao 2417399665d2SSong Gao #define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ 2418399665d2SSong Gao static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ 2419399665d2SSong Gao { \ 2420399665d2SSong Gao T2 fd; \ 2421399665d2SSong Gao FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ 2422399665d2SSong Gao \ 2423399665d2SSong Gao set_float_rounding_mode(MODE, &env->fp_status); \ 2424399665d2SSong Gao fd = do_## FMT1 ##_to_## FMT2(env, fj); \ 2425399665d2SSong Gao set_float_rounding_mode(old_mode, &env->fp_status); \ 2426399665d2SSong Gao return fd; \ 2427399665d2SSong Gao } 2428399665d2SSong Gao 2429399665d2SSong Gao #define DO_FTINT(FMT1, FMT2, T1, T2) \ 2430399665d2SSong Gao static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ 2431399665d2SSong Gao { \ 2432399665d2SSong Gao T2 fd; \ 2433399665d2SSong Gao \ 2434399665d2SSong Gao fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ 2435399665d2SSong Gao if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ 2436399665d2SSong Gao if (FMT1 ##_is_any_nan(fj)) { \ 2437399665d2SSong Gao fd = 0; \ 2438399665d2SSong Gao } \ 2439399665d2SSong Gao } \ 2440399665d2SSong Gao vec_update_fcsr0(env, GETPC()); \ 2441399665d2SSong Gao return fd; \ 2442399665d2SSong Gao } 2443399665d2SSong Gao 2444399665d2SSong Gao DO_FTINT(float32, int32, uint32_t, uint32_t) 2445399665d2SSong Gao DO_FTINT(float64, int64, uint64_t, uint64_t) 2446399665d2SSong Gao DO_FTINT(float32, uint32, uint32_t, uint32_t) 2447399665d2SSong Gao DO_FTINT(float64, uint64, uint64_t, uint64_t) 2448399665d2SSong Gao DO_FTINT(float64, int32, uint64_t, uint32_t) 2449399665d2SSong Gao DO_FTINT(float32, int64, uint32_t, uint64_t) 2450399665d2SSong Gao 2451399665d2SSong Gao FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) 2452399665d2SSong Gao FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) 2453399665d2SSong Gao FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) 2454399665d2SSong Gao FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) 2455399665d2SSong Gao FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) 2456399665d2SSong Gao FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) 2457399665d2SSong Gao FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) 2458399665d2SSong Gao FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) 2459399665d2SSong Gao 2460399665d2SSong Gao DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) 2461399665d2SSong Gao DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) 2462399665d2SSong Gao DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) 2463399665d2SSong Gao DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) 2464399665d2SSong Gao DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) 2465399665d2SSong Gao DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) 2466399665d2SSong Gao DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) 2467399665d2SSong Gao DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) 2468399665d2SSong Gao DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) 2469399665d2SSong Gao DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) 2470399665d2SSong Gao 2471399665d2SSong Gao FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) 2472399665d2SSong Gao FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) 2473399665d2SSong Gao 2474399665d2SSong Gao DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) 2475399665d2SSong Gao DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) 2476399665d2SSong Gao DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) 2477399665d2SSong Gao DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) 2478399665d2SSong Gao 2479399665d2SSong Gao FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) 2480399665d2SSong Gao FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) 2481399665d2SSong Gao FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) 2482399665d2SSong Gao FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) 2483399665d2SSong Gao 2484399665d2SSong Gao #define FTINT_W_D(NAME, FN) \ 2485399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2486399665d2SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2487399665d2SSong Gao { \ 2488399665d2SSong Gao int i; \ 2489399665d2SSong Gao VReg temp; \ 2490399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2491399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2492399665d2SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2493399665d2SSong Gao \ 2494399665d2SSong Gao vec_clear_cause(env); \ 2495399665d2SSong Gao for (i = 0; i < 2; i++) { \ 2496399665d2SSong Gao temp.W(i + 2) = FN(env, Vj->UD(i)); \ 2497399665d2SSong Gao temp.W(i) = FN(env, Vk->UD(i)); \ 2498399665d2SSong Gao } \ 2499399665d2SSong Gao *Vd = temp; \ 2500399665d2SSong Gao } 2501399665d2SSong Gao 2502399665d2SSong Gao FTINT_W_D(vftint_w_d, do_float64_to_int32) 2503399665d2SSong Gao FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) 2504399665d2SSong Gao FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) 2505399665d2SSong Gao FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) 2506399665d2SSong Gao FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) 2507399665d2SSong Gao 2508399665d2SSong Gao FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) 2509399665d2SSong Gao FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) 2510399665d2SSong Gao FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) 2511399665d2SSong Gao FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) 2512399665d2SSong Gao FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) 2513399665d2SSong Gao FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) 2514399665d2SSong Gao FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) 2515399665d2SSong Gao FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) 2516399665d2SSong Gao 2517399665d2SSong Gao #define FTINTL_L_S(NAME, FN) \ 2518399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 2519399665d2SSong Gao { \ 2520399665d2SSong Gao int i; \ 2521399665d2SSong Gao VReg temp; \ 2522399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2523399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2524399665d2SSong Gao \ 2525399665d2SSong Gao vec_clear_cause(env); \ 2526399665d2SSong Gao for (i = 0; i < 2; i++) { \ 2527399665d2SSong Gao temp.D(i) = FN(env, Vj->UW(i)); \ 2528399665d2SSong Gao } \ 2529399665d2SSong Gao *Vd = temp; \ 2530399665d2SSong Gao } 2531399665d2SSong Gao 2532399665d2SSong Gao FTINTL_L_S(vftintl_l_s, do_float32_to_int64) 2533399665d2SSong Gao FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) 2534399665d2SSong Gao FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) 2535399665d2SSong Gao FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) 2536399665d2SSong Gao FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) 2537399665d2SSong Gao 2538399665d2SSong Gao #define FTINTH_L_S(NAME, FN) \ 2539399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ 2540399665d2SSong Gao { \ 2541399665d2SSong Gao int i; \ 2542399665d2SSong Gao VReg temp; \ 2543399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2544399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2545399665d2SSong Gao \ 2546399665d2SSong Gao vec_clear_cause(env); \ 2547399665d2SSong Gao for (i = 0; i < 2; i++) { \ 2548399665d2SSong Gao temp.D(i) = FN(env, Vj->UW(i + 2)); \ 2549399665d2SSong Gao } \ 2550399665d2SSong Gao *Vd = temp; \ 2551399665d2SSong Gao } 2552399665d2SSong Gao 2553399665d2SSong Gao FTINTH_L_S(vftinth_l_s, do_float32_to_int64) 2554399665d2SSong Gao FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) 2555399665d2SSong Gao FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) 2556399665d2SSong Gao FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) 2557399665d2SSong Gao FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) 2558399665d2SSong Gao 2559399665d2SSong Gao #define FFINT(NAME, FMT1, FMT2, T1, T2) \ 2560399665d2SSong Gao static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ 2561399665d2SSong Gao { \ 2562399665d2SSong Gao T2 fd; \ 2563399665d2SSong Gao \ 2564399665d2SSong Gao fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ 2565399665d2SSong Gao vec_update_fcsr0(env, GETPC()); \ 2566399665d2SSong Gao return fd; \ 2567399665d2SSong Gao } 2568399665d2SSong Gao 2569399665d2SSong Gao FFINT(s_w, int32, float32, int32_t, uint32_t) 2570399665d2SSong Gao FFINT(d_l, int64, float64, int64_t, uint64_t) 2571399665d2SSong Gao FFINT(s_wu, uint32, float32, uint32_t, uint32_t) 2572399665d2SSong Gao FFINT(d_lu, uint64, float64, uint64_t, uint64_t) 2573399665d2SSong Gao 2574399665d2SSong Gao DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) 2575399665d2SSong Gao DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) 2576399665d2SSong Gao DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) 2577399665d2SSong Gao DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) 2578399665d2SSong Gao 2579399665d2SSong Gao void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2580399665d2SSong Gao { 2581399665d2SSong Gao int i; 2582399665d2SSong Gao VReg temp; 2583399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2584399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2585399665d2SSong Gao 2586399665d2SSong Gao vec_clear_cause(env); 2587399665d2SSong Gao for (i = 0; i < 2; i++) { 2588399665d2SSong Gao temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status); 2589399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2590399665d2SSong Gao } 2591399665d2SSong Gao *Vd = temp; 2592399665d2SSong Gao } 2593399665d2SSong Gao 2594399665d2SSong Gao void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj) 2595399665d2SSong Gao { 2596399665d2SSong Gao int i; 2597399665d2SSong Gao VReg temp; 2598399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2599399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2600399665d2SSong Gao 2601399665d2SSong Gao vec_clear_cause(env); 2602399665d2SSong Gao for (i = 0; i < 2; i++) { 2603399665d2SSong Gao temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status); 2604399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2605399665d2SSong Gao } 2606399665d2SSong Gao *Vd = temp; 2607399665d2SSong Gao } 2608399665d2SSong Gao 2609399665d2SSong Gao void HELPER(vffint_s_l)(CPULoongArchState *env, 2610399665d2SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) 2611399665d2SSong Gao { 2612399665d2SSong Gao int i; 2613399665d2SSong Gao VReg temp; 2614399665d2SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2615399665d2SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2616399665d2SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 2617399665d2SSong Gao 2618399665d2SSong Gao vec_clear_cause(env); 2619399665d2SSong Gao for (i = 0; i < 2; i++) { 2620399665d2SSong Gao temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status); 2621399665d2SSong Gao temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status); 2622399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2623399665d2SSong Gao } 2624399665d2SSong Gao *Vd = temp; 2625399665d2SSong Gao } 2626f435e1e5SSong Gao 2627f435e1e5SSong Gao #define VSEQ(a, b) (a == b ? -1 : 0) 2628f435e1e5SSong Gao #define VSLE(a, b) (a <= b ? -1 : 0) 2629f435e1e5SSong Gao #define VSLT(a, b) (a < b ? -1 : 0) 2630f435e1e5SSong Gao 2631f435e1e5SSong Gao #define VCMPI(NAME, BIT, E, DO_OP) \ 2632f435e1e5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ 2633f435e1e5SSong Gao { \ 2634f435e1e5SSong Gao int i; \ 2635f435e1e5SSong Gao VReg *Vd = (VReg *)vd; \ 2636f435e1e5SSong Gao VReg *Vj = (VReg *)vj; \ 2637f435e1e5SSong Gao typedef __typeof(Vd->E(0)) TD; \ 2638f435e1e5SSong Gao \ 2639f435e1e5SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2640f435e1e5SSong Gao Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ 2641f435e1e5SSong Gao } \ 2642f435e1e5SSong Gao } 2643f435e1e5SSong Gao 2644f435e1e5SSong Gao VCMPI(vseqi_b, 8, B, VSEQ) 2645f435e1e5SSong Gao VCMPI(vseqi_h, 16, H, VSEQ) 2646f435e1e5SSong Gao VCMPI(vseqi_w, 32, W, VSEQ) 2647f435e1e5SSong Gao VCMPI(vseqi_d, 64, D, VSEQ) 2648f435e1e5SSong Gao VCMPI(vslei_b, 8, B, VSLE) 2649f435e1e5SSong Gao VCMPI(vslei_h, 16, H, VSLE) 2650f435e1e5SSong Gao VCMPI(vslei_w, 32, W, VSLE) 2651f435e1e5SSong Gao VCMPI(vslei_d, 64, D, VSLE) 2652f435e1e5SSong Gao VCMPI(vslei_bu, 8, UB, VSLE) 2653f435e1e5SSong Gao VCMPI(vslei_hu, 16, UH, VSLE) 2654f435e1e5SSong Gao VCMPI(vslei_wu, 32, UW, VSLE) 2655f435e1e5SSong Gao VCMPI(vslei_du, 64, UD, VSLE) 2656f435e1e5SSong Gao VCMPI(vslti_b, 8, B, VSLT) 2657f435e1e5SSong Gao VCMPI(vslti_h, 16, H, VSLT) 2658f435e1e5SSong Gao VCMPI(vslti_w, 32, W, VSLT) 2659f435e1e5SSong Gao VCMPI(vslti_d, 64, D, VSLT) 2660f435e1e5SSong Gao VCMPI(vslti_bu, 8, UB, VSLT) 2661f435e1e5SSong Gao VCMPI(vslti_hu, 16, UH, VSLT) 2662f435e1e5SSong Gao VCMPI(vslti_wu, 32, UW, VSLT) 2663f435e1e5SSong Gao VCMPI(vslti_du, 64, UD, VSLT) 2664386c4e86SSong Gao 2665386c4e86SSong Gao static uint64_t vfcmp_common(CPULoongArchState *env, 2666386c4e86SSong Gao FloatRelation cmp, uint32_t flags) 2667386c4e86SSong Gao { 2668386c4e86SSong Gao uint64_t ret = 0; 2669386c4e86SSong Gao 2670386c4e86SSong Gao switch (cmp) { 2671386c4e86SSong Gao case float_relation_less: 2672386c4e86SSong Gao ret = (flags & FCMP_LT); 2673386c4e86SSong Gao break; 2674386c4e86SSong Gao case float_relation_equal: 2675386c4e86SSong Gao ret = (flags & FCMP_EQ); 2676386c4e86SSong Gao break; 2677386c4e86SSong Gao case float_relation_greater: 2678386c4e86SSong Gao ret = (flags & FCMP_GT); 2679386c4e86SSong Gao break; 2680386c4e86SSong Gao case float_relation_unordered: 2681386c4e86SSong Gao ret = (flags & FCMP_UN); 2682386c4e86SSong Gao break; 2683386c4e86SSong Gao default: 2684386c4e86SSong Gao g_assert_not_reached(); 2685386c4e86SSong Gao } 2686386c4e86SSong Gao 2687386c4e86SSong Gao if (ret) { 2688386c4e86SSong Gao ret = -1; 2689386c4e86SSong Gao } 2690386c4e86SSong Gao 2691386c4e86SSong Gao return ret; 2692386c4e86SSong Gao } 2693386c4e86SSong Gao 2694386c4e86SSong Gao #define VFCMP(NAME, BIT, E, FN) \ 2695386c4e86SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2696386c4e86SSong Gao uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ 2697386c4e86SSong Gao { \ 2698386c4e86SSong Gao int i; \ 2699386c4e86SSong Gao VReg t; \ 2700386c4e86SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2701386c4e86SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2702386c4e86SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2703386c4e86SSong Gao \ 2704386c4e86SSong Gao vec_clear_cause(env); \ 2705386c4e86SSong Gao for (i = 0; i < LSX_LEN/BIT ; i++) { \ 2706386c4e86SSong Gao FloatRelation cmp; \ 2707386c4e86SSong Gao cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ 2708386c4e86SSong Gao t.E(i) = vfcmp_common(env, cmp, flags); \ 2709386c4e86SSong Gao vec_update_fcsr0(env, GETPC()); \ 2710386c4e86SSong Gao } \ 2711386c4e86SSong Gao *Vd = t; \ 2712386c4e86SSong Gao } 2713386c4e86SSong Gao 2714386c4e86SSong Gao VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) 2715386c4e86SSong Gao VFCMP(vfcmp_s_s, 32, UW, float32_compare) 2716386c4e86SSong Gao VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) 2717386c4e86SSong Gao VFCMP(vfcmp_s_d, 64, UD, float64_compare) 2718d0dfa19aSSong Gao 2719d0dfa19aSSong Gao void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v) 2720d0dfa19aSSong Gao { 2721d0dfa19aSSong Gao int i; 2722d0dfa19aSSong Gao VReg *Vd = (VReg *)vd; 2723d0dfa19aSSong Gao VReg *Vj = (VReg *)vj; 2724d0dfa19aSSong Gao 2725d0dfa19aSSong Gao for (i = 0; i < 16; i++) { 2726d0dfa19aSSong Gao Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); 2727d0dfa19aSSong Gao } 2728d0dfa19aSSong Gao } 2729d0dfa19aSSong Gao 2730d0dfa19aSSong Gao /* Copy from target/arm/tcg/sve_helper.c */ 2731d0dfa19aSSong Gao static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) 2732d0dfa19aSSong Gao { 2733d0dfa19aSSong Gao uint64_t bits = 8 << esz; 2734d0dfa19aSSong Gao uint64_t ones = dup_const(esz, 1); 2735d0dfa19aSSong Gao uint64_t signs = ones << (bits - 1); 2736d0dfa19aSSong Gao uint64_t cmp0, cmp1; 2737d0dfa19aSSong Gao 2738d0dfa19aSSong Gao cmp1 = dup_const(esz, n); 2739d0dfa19aSSong Gao cmp0 = cmp1 ^ m0; 2740d0dfa19aSSong Gao cmp1 = cmp1 ^ m1; 2741d0dfa19aSSong Gao cmp0 = (cmp0 - ones) & ~cmp0; 2742d0dfa19aSSong Gao cmp1 = (cmp1 - ones) & ~cmp1; 2743d0dfa19aSSong Gao return (cmp0 | cmp1) & signs; 2744d0dfa19aSSong Gao } 2745d0dfa19aSSong Gao 2746d0dfa19aSSong Gao #define SETANYEQZ(NAME, MO) \ 2747d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ 2748d0dfa19aSSong Gao { \ 2749d0dfa19aSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2750d0dfa19aSSong Gao \ 2751d0dfa19aSSong Gao env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ 2752d0dfa19aSSong Gao } 2753d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_b, MO_8) 2754d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_h, MO_16) 2755d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_w, MO_32) 2756d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_d, MO_64) 2757d0dfa19aSSong Gao 2758d0dfa19aSSong Gao #define SETALLNEZ(NAME, MO) \ 2759d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ 2760d0dfa19aSSong Gao { \ 2761d0dfa19aSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2762d0dfa19aSSong Gao \ 2763d0dfa19aSSong Gao env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ 2764d0dfa19aSSong Gao } 2765d0dfa19aSSong Gao SETALLNEZ(vsetallnez_b, MO_8) 2766d0dfa19aSSong Gao SETALLNEZ(vsetallnez_h, MO_16) 2767d0dfa19aSSong Gao SETALLNEZ(vsetallnez_w, MO_32) 2768d0dfa19aSSong Gao SETALLNEZ(vsetallnez_d, MO_64) 2769d5e5563cSSong Gao 2770d5e5563cSSong Gao #define VPACKEV(NAME, BIT, E) \ 2771d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2772d5e5563cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2773d5e5563cSSong Gao { \ 2774d5e5563cSSong Gao int i; \ 2775d5e5563cSSong Gao VReg temp; \ 2776d5e5563cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2777d5e5563cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2778d5e5563cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2779d5e5563cSSong Gao \ 2780d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2781d5e5563cSSong Gao temp.E(2 * i + 1) = Vj->E(2 * i); \ 2782d5e5563cSSong Gao temp.E(2 *i) = Vk->E(2 * i); \ 2783d5e5563cSSong Gao } \ 2784d5e5563cSSong Gao *Vd = temp; \ 2785d5e5563cSSong Gao } 2786d5e5563cSSong Gao 2787d5e5563cSSong Gao VPACKEV(vpackev_b, 16, B) 2788d5e5563cSSong Gao VPACKEV(vpackev_h, 32, H) 2789d5e5563cSSong Gao VPACKEV(vpackev_w, 64, W) 2790d5e5563cSSong Gao VPACKEV(vpackev_d, 128, D) 2791d5e5563cSSong Gao 2792d5e5563cSSong Gao #define VPACKOD(NAME, BIT, E) \ 2793d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2794d5e5563cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2795d5e5563cSSong Gao { \ 2796d5e5563cSSong Gao int i; \ 2797d5e5563cSSong Gao VReg temp; \ 2798d5e5563cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2799d5e5563cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2800d5e5563cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2801d5e5563cSSong Gao \ 2802d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2803d5e5563cSSong Gao temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ 2804d5e5563cSSong Gao temp.E(2 * i) = Vk->E(2 * i + 1); \ 2805d5e5563cSSong Gao } \ 2806d5e5563cSSong Gao *Vd = temp; \ 2807d5e5563cSSong Gao } 2808d5e5563cSSong Gao 2809d5e5563cSSong Gao VPACKOD(vpackod_b, 16, B) 2810d5e5563cSSong Gao VPACKOD(vpackod_h, 32, H) 2811d5e5563cSSong Gao VPACKOD(vpackod_w, 64, W) 2812d5e5563cSSong Gao VPACKOD(vpackod_d, 128, D) 2813d5e5563cSSong Gao 2814d5e5563cSSong Gao #define VPICKEV(NAME, BIT, E) \ 2815d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2816d5e5563cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2817d5e5563cSSong Gao { \ 2818d5e5563cSSong Gao int i; \ 2819d5e5563cSSong Gao VReg temp; \ 2820d5e5563cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2821d5e5563cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2822d5e5563cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2823d5e5563cSSong Gao \ 2824d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2825d5e5563cSSong Gao temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ 2826d5e5563cSSong Gao temp.E(i) = Vk->E(2 * i); \ 2827d5e5563cSSong Gao } \ 2828d5e5563cSSong Gao *Vd = temp; \ 2829d5e5563cSSong Gao } 2830d5e5563cSSong Gao 2831d5e5563cSSong Gao VPICKEV(vpickev_b, 16, B) 2832d5e5563cSSong Gao VPICKEV(vpickev_h, 32, H) 2833d5e5563cSSong Gao VPICKEV(vpickev_w, 64, W) 2834d5e5563cSSong Gao VPICKEV(vpickev_d, 128, D) 2835d5e5563cSSong Gao 2836d5e5563cSSong Gao #define VPICKOD(NAME, BIT, E) \ 2837d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2838d5e5563cSSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2839d5e5563cSSong Gao { \ 2840d5e5563cSSong Gao int i; \ 2841d5e5563cSSong Gao VReg temp; \ 2842d5e5563cSSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2843d5e5563cSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2844d5e5563cSSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2845d5e5563cSSong Gao \ 2846d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2847d5e5563cSSong Gao temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ 2848d5e5563cSSong Gao temp.E(i) = Vk->E(2 * i + 1); \ 2849d5e5563cSSong Gao } \ 2850d5e5563cSSong Gao *Vd = temp; \ 2851d5e5563cSSong Gao } 2852d5e5563cSSong Gao 2853d5e5563cSSong Gao VPICKOD(vpickod_b, 16, B) 2854d5e5563cSSong Gao VPICKOD(vpickod_h, 32, H) 2855d5e5563cSSong Gao VPICKOD(vpickod_w, 64, W) 2856d5e5563cSSong Gao VPICKOD(vpickod_d, 128, D) 2857e93dd431SSong Gao 2858e93dd431SSong Gao #define VILVL(NAME, BIT, E) \ 2859e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2860e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2861e93dd431SSong Gao { \ 2862e93dd431SSong Gao int i; \ 2863e93dd431SSong Gao VReg temp; \ 2864e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2865e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2866e93dd431SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2867e93dd431SSong Gao \ 2868e93dd431SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2869e93dd431SSong Gao temp.E(2 * i + 1) = Vj->E(i); \ 2870e93dd431SSong Gao temp.E(2 * i) = Vk->E(i); \ 2871e93dd431SSong Gao } \ 2872e93dd431SSong Gao *Vd = temp; \ 2873e93dd431SSong Gao } 2874e93dd431SSong Gao 2875e93dd431SSong Gao VILVL(vilvl_b, 16, B) 2876e93dd431SSong Gao VILVL(vilvl_h, 32, H) 2877e93dd431SSong Gao VILVL(vilvl_w, 64, W) 2878e93dd431SSong Gao VILVL(vilvl_d, 128, D) 2879e93dd431SSong Gao 2880e93dd431SSong Gao #define VILVH(NAME, BIT, E) \ 2881e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2882e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2883e93dd431SSong Gao { \ 2884e93dd431SSong Gao int i; \ 2885e93dd431SSong Gao VReg temp; \ 2886e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2887e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2888e93dd431SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2889e93dd431SSong Gao \ 2890e93dd431SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2891e93dd431SSong Gao temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ 2892e93dd431SSong Gao temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ 2893e93dd431SSong Gao } \ 2894e93dd431SSong Gao *Vd = temp; \ 2895e93dd431SSong Gao } 2896e93dd431SSong Gao 2897e93dd431SSong Gao VILVH(vilvh_b, 16, B) 2898e93dd431SSong Gao VILVH(vilvh_h, 32, H) 2899e93dd431SSong Gao VILVH(vilvh_w, 64, W) 2900e93dd431SSong Gao VILVH(vilvh_d, 128, D) 2901e93dd431SSong Gao 2902e93dd431SSong Gao void HELPER(vshuf_b)(CPULoongArchState *env, 2903e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) 2904e93dd431SSong Gao { 2905e93dd431SSong Gao int i, m; 2906e93dd431SSong Gao VReg temp; 2907e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2908e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2909e93dd431SSong Gao VReg *Vk = &(env->fpr[vk].vreg); 2910e93dd431SSong Gao VReg *Va = &(env->fpr[va].vreg); 2911e93dd431SSong Gao 2912e93dd431SSong Gao m = LSX_LEN/8; 2913e93dd431SSong Gao for (i = 0; i < m ; i++) { 2914e93dd431SSong Gao uint64_t k = (uint8_t)Va->B(i) % (2 * m); 2915e93dd431SSong Gao temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m); 2916e93dd431SSong Gao } 2917e93dd431SSong Gao *Vd = temp; 2918e93dd431SSong Gao } 2919e93dd431SSong Gao 2920e93dd431SSong Gao #define VSHUF(NAME, BIT, E) \ 2921e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2922e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t vk) \ 2923e93dd431SSong Gao { \ 2924e93dd431SSong Gao int i, m; \ 2925e93dd431SSong Gao VReg temp; \ 2926e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2927e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2928e93dd431SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2929e93dd431SSong Gao \ 2930e93dd431SSong Gao m = LSX_LEN/BIT; \ 2931e93dd431SSong Gao for (i = 0; i < m; i++) { \ 2932e93dd431SSong Gao uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ 2933e93dd431SSong Gao temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ 2934e93dd431SSong Gao } \ 2935e93dd431SSong Gao *Vd = temp; \ 2936e93dd431SSong Gao } 2937e93dd431SSong Gao 2938e93dd431SSong Gao VSHUF(vshuf_h, 16, H) 2939e93dd431SSong Gao VSHUF(vshuf_w, 32, W) 2940e93dd431SSong Gao VSHUF(vshuf_d, 64, D) 2941e93dd431SSong Gao 2942e93dd431SSong Gao #define VSHUF4I(NAME, BIT, E) \ 2943e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2944e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 2945e93dd431SSong Gao { \ 2946e93dd431SSong Gao int i; \ 2947e93dd431SSong Gao VReg temp; \ 2948e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2949e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2950e93dd431SSong Gao \ 2951e93dd431SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2952e93dd431SSong Gao temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ 2953e93dd431SSong Gao (2 * ((i) & 0x03))) & 0x03)); \ 2954e93dd431SSong Gao } \ 2955e93dd431SSong Gao *Vd = temp; \ 2956e93dd431SSong Gao } 2957e93dd431SSong Gao 2958e93dd431SSong Gao VSHUF4I(vshuf4i_b, 8, B) 2959e93dd431SSong Gao VSHUF4I(vshuf4i_h, 16, H) 2960e93dd431SSong Gao VSHUF4I(vshuf4i_w, 32, W) 2961e93dd431SSong Gao 2962e93dd431SSong Gao void HELPER(vshuf4i_d)(CPULoongArchState *env, 2963e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 2964e93dd431SSong Gao { 2965e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2966e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2967e93dd431SSong Gao 2968e93dd431SSong Gao VReg temp; 2969e93dd431SSong Gao temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); 2970e93dd431SSong Gao temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1); 2971e93dd431SSong Gao *Vd = temp; 2972e93dd431SSong Gao } 2973e93dd431SSong Gao 2974e93dd431SSong Gao void HELPER(vpermi_w)(CPULoongArchState *env, 2975e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) 2976e93dd431SSong Gao { 2977e93dd431SSong Gao VReg temp; 2978e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); 2979e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); 2980e93dd431SSong Gao 2981e93dd431SSong Gao temp.W(0) = Vj->W(imm & 0x3); 2982e93dd431SSong Gao temp.W(1) = Vj->W((imm >> 2) & 0x3); 2983e93dd431SSong Gao temp.W(2) = Vd->W((imm >> 4) & 0x3); 2984e93dd431SSong Gao temp.W(3) = Vd->W((imm >> 6) & 0x3); 2985e93dd431SSong Gao *Vd = temp; 2986e93dd431SSong Gao } 2987e93dd431SSong Gao 2988e93dd431SSong Gao #define VEXTRINS(NAME, BIT, E, MASK) \ 2989e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2990e93dd431SSong Gao uint32_t vd, uint32_t vj, uint32_t imm) \ 2991e93dd431SSong Gao { \ 2992e93dd431SSong Gao int ins, extr; \ 2993e93dd431SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2994e93dd431SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2995e93dd431SSong Gao \ 2996e93dd431SSong Gao ins = (imm >> 4) & MASK; \ 2997e93dd431SSong Gao extr = imm & MASK; \ 2998e93dd431SSong Gao Vd->E(ins) = Vj->E(extr); \ 2999e93dd431SSong Gao } 3000e93dd431SSong Gao 3001e93dd431SSong Gao VEXTRINS(vextrins_b, 8, B, 0xf) 3002e93dd431SSong Gao VEXTRINS(vextrins_h, 16, H, 0x7) 3003e93dd431SSong Gao VEXTRINS(vextrins_w, 32, W, 0x3) 3004e93dd431SSong Gao VEXTRINS(vextrins_d, 64, D, 0x1) 3005