1a0c9400aSSong Gao /* SPDX-License-Identifier: GPL-2.0-or-later */ 2a0c9400aSSong Gao /* 31dc33f26SSong Gao * QEMU LoongArch vector helper functions. 4a0c9400aSSong Gao * 5a0c9400aSSong Gao * Copyright (c) 2022-2023 Loongson Technology Corporation Limited 6a0c9400aSSong Gao */ 7c037fbc9SSong Gao 8c037fbc9SSong Gao #include "qemu/osdep.h" 9c037fbc9SSong Gao #include "cpu.h" 10c037fbc9SSong Gao #include "exec/exec-all.h" 11c037fbc9SSong Gao #include "exec/helper-proto.h" 12aca67472SSong Gao #include "fpu/softfloat.h" 13aca67472SSong Gao #include "internals.h" 14d0dfa19aSSong Gao #include "tcg/tcg.h" 15008a3b16SSong Gao #include "vec.h" 1664cf6b99SSong Gao #include "tcg/tcg-gvec-desc.h" 17c037fbc9SSong Gao 18c037fbc9SSong Gao #define DO_ADD(a, b) (a + b) 19c037fbc9SSong Gao #define DO_SUB(a, b) (a - b) 20c037fbc9SSong Gao 21c037fbc9SSong Gao #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ 2204711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 23c037fbc9SSong Gao { \ 24c037fbc9SSong Gao int i; \ 2504711da1SSong Gao VReg *Vd = (VReg *)vd; \ 2604711da1SSong Gao VReg *Vj = (VReg *)vj; \ 2704711da1SSong Gao VReg *Vk = (VReg *)vk; \ 28c037fbc9SSong Gao typedef __typeof(Vd->E1(0)) TD; \ 2964cf6b99SSong Gao int oprsz = simd_oprsz(desc); \ 30c037fbc9SSong Gao \ 3164cf6b99SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 32c037fbc9SSong Gao Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ 33c037fbc9SSong Gao } \ 34c037fbc9SSong Gao } 35c037fbc9SSong Gao 36c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) 37c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) 38c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) 39c037fbc9SSong Gao 4004711da1SSong Gao void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) 41c037fbc9SSong Gao { 4264cf6b99SSong Gao int i; 4304711da1SSong Gao VReg *Vd = (VReg *)vd; 4404711da1SSong Gao VReg *Vj = (VReg *)vj; 4504711da1SSong Gao VReg *Vk = (VReg *)vk; 4664cf6b99SSong Gao int oprsz = simd_oprsz(desc); 47c037fbc9SSong Gao 4864cf6b99SSong Gao for (i = 0; i < oprsz / 16 ; i++) { 4964cf6b99SSong Gao Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), 5064cf6b99SSong Gao int128_makes64(Vk->D(2 * i))); 5164cf6b99SSong Gao } 52c037fbc9SSong Gao } 53c037fbc9SSong Gao 54c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) 55c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) 56c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) 57c037fbc9SSong Gao 5804711da1SSong Gao void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) 59c037fbc9SSong Gao { 6064cf6b99SSong Gao int i; 6104711da1SSong Gao VReg *Vd = (VReg *)vd; 6204711da1SSong Gao VReg *Vj = (VReg *)vj; 6304711da1SSong Gao VReg *Vk = (VReg *)vk; 6464cf6b99SSong Gao int oprsz = simd_oprsz(desc); 65c037fbc9SSong Gao 6664cf6b99SSong Gao for (i = 0; i < oprsz / 16; i++) { 6764cf6b99SSong Gao Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), 6864cf6b99SSong Gao int128_makes64(Vk->D(2 * i))); 6964cf6b99SSong Gao } 70c037fbc9SSong Gao } 71c037fbc9SSong Gao 72c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) 73c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) 74c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) 75c037fbc9SSong Gao 7604711da1SSong Gao void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) 77c037fbc9SSong Gao { 7864cf6b99SSong Gao int i; 7904711da1SSong Gao VReg *Vd = (VReg *)vd; 8004711da1SSong Gao VReg *Vj = (VReg *)vj; 8104711da1SSong Gao VReg *Vk = (VReg *)vk; 8264cf6b99SSong Gao int oprsz = simd_oprsz(desc); 83c037fbc9SSong Gao 8464cf6b99SSong Gao for (i = 0; i < oprsz / 16; i ++) { 8564cf6b99SSong Gao Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), 8664cf6b99SSong Gao int128_make64(Vk->UD(2 * i))); 8764cf6b99SSong Gao } 88c037fbc9SSong Gao } 89c037fbc9SSong Gao 90c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) 91c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) 92c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) 93c037fbc9SSong Gao 9404711da1SSong Gao void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) 95c037fbc9SSong Gao { 9664cf6b99SSong Gao int i; 9704711da1SSong Gao VReg *Vd = (VReg *)vd; 9804711da1SSong Gao VReg *Vj = (VReg *)vj; 9904711da1SSong Gao VReg *Vk = (VReg *)vk; 10064cf6b99SSong Gao int oprsz = simd_oprsz(desc); 101c037fbc9SSong Gao 10264cf6b99SSong Gao for (i = 0; i < oprsz / 16; i++) { 10364cf6b99SSong Gao Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), 10464cf6b99SSong Gao int128_make64(Vk->UD(2 * i))); 10564cf6b99SSong Gao } 106c037fbc9SSong Gao } 1072d5f950cSSong Gao 1082d5f950cSSong Gao #define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ 10985995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1102d5f950cSSong Gao { \ 1112d5f950cSSong Gao int i; \ 1122d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 1132d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 1142d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 1152d5f950cSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 11685995f07SSong Gao int oprsz = simd_oprsz(desc); \ 11785995f07SSong Gao \ 11885995f07SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 1192d5f950cSSong Gao Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ 1202d5f950cSSong Gao } \ 1212d5f950cSSong Gao } 1222d5f950cSSong Gao 1232d5f950cSSong Gao #define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ 12485995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1252d5f950cSSong Gao { \ 1262d5f950cSSong Gao int i; \ 1272d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 1282d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 1292d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 1302d5f950cSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 13185995f07SSong Gao int oprsz = simd_oprsz(desc); \ 13285995f07SSong Gao \ 13385995f07SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 1342d5f950cSSong Gao Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ 1352d5f950cSSong Gao } \ 1362d5f950cSSong Gao } 1372d5f950cSSong Gao 13885995f07SSong Gao void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) 1392d5f950cSSong Gao { 14085995f07SSong Gao int i; 1412d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1422d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1432d5f950cSSong Gao VReg *Vk = (VReg *)vk; 14485995f07SSong Gao int oprsz = simd_oprsz(desc); 1452d5f950cSSong Gao 14685995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 14785995f07SSong Gao Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), 14885995f07SSong Gao int128_makes64(Vk->D(2 * i))); 14985995f07SSong Gao } 1502d5f950cSSong Gao } 1512d5f950cSSong Gao 1522d5f950cSSong Gao DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) 1532d5f950cSSong Gao DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) 1542d5f950cSSong Gao DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) 1552d5f950cSSong Gao 15685995f07SSong Gao void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) 1572d5f950cSSong Gao { 15885995f07SSong Gao int i; 1592d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1602d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1612d5f950cSSong Gao VReg *Vk = (VReg *)vk; 16285995f07SSong Gao int oprsz = simd_oprsz(desc); 1632d5f950cSSong Gao 16485995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 16585995f07SSong Gao Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), 16685995f07SSong Gao int128_makes64(Vk->D(2 * i +1))); 16785995f07SSong Gao } 1682d5f950cSSong Gao } 1692d5f950cSSong Gao 1702d5f950cSSong Gao DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) 1712d5f950cSSong Gao DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) 1722d5f950cSSong Gao DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) 1732d5f950cSSong Gao 17485995f07SSong Gao void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) 1752d5f950cSSong Gao { 17685995f07SSong Gao int i; 1772d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1782d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1792d5f950cSSong Gao VReg *Vk = (VReg *)vk; 18085995f07SSong Gao int oprsz = simd_oprsz(desc); 1812d5f950cSSong Gao 18285995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 18385995f07SSong Gao Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), 18485995f07SSong Gao int128_makes64(Vk->D(2 * i))); 18585995f07SSong Gao } 1862d5f950cSSong Gao } 1872d5f950cSSong Gao 1882d5f950cSSong Gao DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) 1892d5f950cSSong Gao DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) 1902d5f950cSSong Gao DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) 1912d5f950cSSong Gao 19285995f07SSong Gao void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) 1932d5f950cSSong Gao { 19485995f07SSong Gao int i; 1952d5f950cSSong Gao VReg *Vd = (VReg *)vd; 1962d5f950cSSong Gao VReg *Vj = (VReg *)vj; 1972d5f950cSSong Gao VReg *Vk = (VReg *)vk; 19885995f07SSong Gao int oprsz = simd_oprsz(desc); 1992d5f950cSSong Gao 20085995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 20185995f07SSong Gao Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), 20285995f07SSong Gao int128_makes64(Vk->D(2 * i + 1))); 20385995f07SSong Gao } 2042d5f950cSSong Gao } 2052d5f950cSSong Gao 2062d5f950cSSong Gao DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) 2072d5f950cSSong Gao DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) 2082d5f950cSSong Gao DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) 2092d5f950cSSong Gao 21085995f07SSong Gao void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) 2112d5f950cSSong Gao { 21285995f07SSong Gao int i; 2132d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2142d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2152d5f950cSSong Gao VReg *Vk = (VReg *)vk; 21685995f07SSong Gao int oprsz = simd_oprsz(desc); 2172d5f950cSSong Gao 21885995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 21985995f07SSong Gao Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), 22085995f07SSong Gao int128_make64(Vk->UD(2 * i))); 22185995f07SSong Gao } 2222d5f950cSSong Gao } 2232d5f950cSSong Gao 2242d5f950cSSong Gao DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) 2252d5f950cSSong Gao DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) 2262d5f950cSSong Gao DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) 2272d5f950cSSong Gao 22885995f07SSong Gao void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) 2292d5f950cSSong Gao { 23085995f07SSong Gao int i; 2312d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2322d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2332d5f950cSSong Gao VReg *Vk = (VReg *)vk; 23485995f07SSong Gao int oprsz = simd_oprsz(desc); 2352d5f950cSSong Gao 23685995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 23785995f07SSong Gao Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), 23885995f07SSong Gao int128_make64(Vk->UD(2 * i + 1))); 23985995f07SSong Gao } 2402d5f950cSSong Gao } 2412d5f950cSSong Gao 2422d5f950cSSong Gao DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) 2432d5f950cSSong Gao DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) 2442d5f950cSSong Gao DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) 2452d5f950cSSong Gao 24685995f07SSong Gao void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) 2472d5f950cSSong Gao { 24885995f07SSong Gao int i; 2492d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2502d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2512d5f950cSSong Gao VReg *Vk = (VReg *)vk; 25285995f07SSong Gao int oprsz = simd_oprsz(desc); 2532d5f950cSSong Gao 25485995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 25585995f07SSong Gao Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), 25685995f07SSong Gao int128_make64(Vk->UD(2 * i))); 25785995f07SSong Gao } 2582d5f950cSSong Gao } 2592d5f950cSSong Gao 2602d5f950cSSong Gao DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) 2612d5f950cSSong Gao DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) 2622d5f950cSSong Gao DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) 2632d5f950cSSong Gao 26485995f07SSong Gao void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) 2652d5f950cSSong Gao { 26685995f07SSong Gao int i; 2672d5f950cSSong Gao VReg *Vd = (VReg *)vd; 2682d5f950cSSong Gao VReg *Vj = (VReg *)vj; 2692d5f950cSSong Gao VReg *Vk = (VReg *)vk; 27085995f07SSong Gao int oprsz = simd_oprsz(desc); 2712d5f950cSSong Gao 27285995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 27385995f07SSong Gao Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), 27485995f07SSong Gao int128_make64(Vk->UD(2 * i + 1))); 27585995f07SSong Gao } 2762d5f950cSSong Gao } 2772d5f950cSSong Gao 2782d5f950cSSong Gao DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) 2792d5f950cSSong Gao DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) 2802d5f950cSSong Gao DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) 2812d5f950cSSong Gao 2822d5f950cSSong Gao #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 28385995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2842d5f950cSSong Gao { \ 2852d5f950cSSong Gao int i; \ 2862d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 2872d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 2882d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 2892d5f950cSSong Gao typedef __typeof(Vd->ES1(0)) TDS; \ 2902d5f950cSSong Gao typedef __typeof(Vd->EU1(0)) TDU; \ 29185995f07SSong Gao int oprsz = simd_oprsz(desc); \ 29285995f07SSong Gao \ 29385995f07SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 2942d5f950cSSong Gao Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ 2952d5f950cSSong Gao } \ 2962d5f950cSSong Gao } 2972d5f950cSSong Gao 2982d5f950cSSong Gao #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 29985995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 3002d5f950cSSong Gao { \ 3012d5f950cSSong Gao int i; \ 3022d5f950cSSong Gao VReg *Vd = (VReg *)vd; \ 3032d5f950cSSong Gao VReg *Vj = (VReg *)vj; \ 3042d5f950cSSong Gao VReg *Vk = (VReg *)vk; \ 3052d5f950cSSong Gao typedef __typeof(Vd->ES1(0)) TDS; \ 3062d5f950cSSong Gao typedef __typeof(Vd->EU1(0)) TDU; \ 30785995f07SSong Gao int oprsz = simd_oprsz(desc); \ 30885995f07SSong Gao \ 30985995f07SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 3102d5f950cSSong Gao Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ 3112d5f950cSSong Gao } \ 3122d5f950cSSong Gao } 3132d5f950cSSong Gao 31485995f07SSong Gao void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) 3152d5f950cSSong Gao { 31685995f07SSong Gao int i; 3172d5f950cSSong Gao VReg *Vd = (VReg *)vd; 3182d5f950cSSong Gao VReg *Vj = (VReg *)vj; 3192d5f950cSSong Gao VReg *Vk = (VReg *)vk; 32085995f07SSong Gao int oprsz = simd_oprsz(desc); 3212d5f950cSSong Gao 32285995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 32385995f07SSong Gao Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), 32485995f07SSong Gao int128_makes64(Vk->D(2 * i))); 32585995f07SSong Gao } 3262d5f950cSSong Gao } 3272d5f950cSSong Gao 3282d5f950cSSong Gao DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) 3292d5f950cSSong Gao DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) 3302d5f950cSSong Gao DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) 3312d5f950cSSong Gao 33285995f07SSong Gao void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) 3332d5f950cSSong Gao { 33485995f07SSong Gao int i; 3352d5f950cSSong Gao VReg *Vd = (VReg *)vd; 3362d5f950cSSong Gao VReg *Vj = (VReg *)vj; 3372d5f950cSSong Gao VReg *Vk = (VReg *)vk; 33885995f07SSong Gao int oprsz = simd_oprsz(desc); 3392d5f950cSSong Gao 34085995f07SSong Gao for (i = 0; i < oprsz / 16; i++) { 34185995f07SSong Gao Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), 34285995f07SSong Gao int128_makes64(Vk->D(2 * i + 1))); 34385995f07SSong Gao } 3442d5f950cSSong Gao } 3452d5f950cSSong Gao 3462d5f950cSSong Gao DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) 3472d5f950cSSong Gao DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) 3482d5f950cSSong Gao DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) 34939e9b0a7SSong Gao 35039e9b0a7SSong Gao #define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1)) 35139e9b0a7SSong Gao #define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) 35239e9b0a7SSong Gao 35339e9b0a7SSong Gao #define DO_3OP(NAME, BIT, E, DO_OP) \ 354ee7250d0SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 35539e9b0a7SSong Gao { \ 35639e9b0a7SSong Gao int i; \ 35739e9b0a7SSong Gao VReg *Vd = (VReg *)vd; \ 35839e9b0a7SSong Gao VReg *Vj = (VReg *)vj; \ 35939e9b0a7SSong Gao VReg *Vk = (VReg *)vk; \ 360ee7250d0SSong Gao int oprsz = simd_oprsz(desc); \ 361ee7250d0SSong Gao \ 362ee7250d0SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 36339e9b0a7SSong Gao Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ 36439e9b0a7SSong Gao } \ 36539e9b0a7SSong Gao } 36639e9b0a7SSong Gao 36739e9b0a7SSong Gao DO_3OP(vavg_b, 8, B, DO_VAVG) 36839e9b0a7SSong Gao DO_3OP(vavg_h, 16, H, DO_VAVG) 36939e9b0a7SSong Gao DO_3OP(vavg_w, 32, W, DO_VAVG) 37039e9b0a7SSong Gao DO_3OP(vavg_d, 64, D, DO_VAVG) 37139e9b0a7SSong Gao DO_3OP(vavgr_b, 8, B, DO_VAVGR) 37239e9b0a7SSong Gao DO_3OP(vavgr_h, 16, H, DO_VAVGR) 37339e9b0a7SSong Gao DO_3OP(vavgr_w, 32, W, DO_VAVGR) 37439e9b0a7SSong Gao DO_3OP(vavgr_d, 64, D, DO_VAVGR) 37539e9b0a7SSong Gao DO_3OP(vavg_bu, 8, UB, DO_VAVG) 37639e9b0a7SSong Gao DO_3OP(vavg_hu, 16, UH, DO_VAVG) 37739e9b0a7SSong Gao DO_3OP(vavg_wu, 32, UW, DO_VAVG) 37839e9b0a7SSong Gao DO_3OP(vavg_du, 64, UD, DO_VAVG) 37939e9b0a7SSong Gao DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) 38039e9b0a7SSong Gao DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) 38139e9b0a7SSong Gao DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) 38239e9b0a7SSong Gao DO_3OP(vavgr_du, 64, UD, DO_VAVGR) 38349725659SSong Gao 38449725659SSong Gao #define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a)) 38549725659SSong Gao 38649725659SSong Gao DO_3OP(vabsd_b, 8, B, DO_VABSD) 38749725659SSong Gao DO_3OP(vabsd_h, 16, H, DO_VABSD) 38849725659SSong Gao DO_3OP(vabsd_w, 32, W, DO_VABSD) 38949725659SSong Gao DO_3OP(vabsd_d, 64, D, DO_VABSD) 39049725659SSong Gao DO_3OP(vabsd_bu, 8, UB, DO_VABSD) 39149725659SSong Gao DO_3OP(vabsd_hu, 16, UH, DO_VABSD) 39249725659SSong Gao DO_3OP(vabsd_wu, 32, UW, DO_VABSD) 39349725659SSong Gao DO_3OP(vabsd_du, 64, UD, DO_VABSD) 394af448cb3SSong Gao 395af448cb3SSong Gao #define DO_VABS(a) ((a < 0) ? (-a) : (a)) 396af448cb3SSong Gao 39727f5485dSSong Gao #define DO_VADDA(NAME, BIT, E) \ 39827f5485dSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 399af448cb3SSong Gao { \ 400af448cb3SSong Gao int i; \ 401af448cb3SSong Gao VReg *Vd = (VReg *)vd; \ 402af448cb3SSong Gao VReg *Vj = (VReg *)vj; \ 403af448cb3SSong Gao VReg *Vk = (VReg *)vk; \ 40427f5485dSSong Gao int oprsz = simd_oprsz(desc); \ 40527f5485dSSong Gao \ 40627f5485dSSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 40727f5485dSSong Gao Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \ 408af448cb3SSong Gao } \ 409af448cb3SSong Gao } 410af448cb3SSong Gao 41127f5485dSSong Gao DO_VADDA(vadda_b, 8, B) 41227f5485dSSong Gao DO_VADDA(vadda_h, 16, H) 41327f5485dSSong Gao DO_VADDA(vadda_w, 32, W) 41427f5485dSSong Gao DO_VADDA(vadda_d, 64, D) 4159ab29520SSong Gao 4169ab29520SSong Gao #define DO_MIN(a, b) (a < b ? a : b) 4179ab29520SSong Gao #define DO_MAX(a, b) (a > b ? a : b) 4189ab29520SSong Gao 4199ab29520SSong Gao #define VMINMAXI(NAME, BIT, E, DO_OP) \ 420c09360faSSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 4219ab29520SSong Gao { \ 4229ab29520SSong Gao int i; \ 4239ab29520SSong Gao VReg *Vd = (VReg *)vd; \ 4249ab29520SSong Gao VReg *Vj = (VReg *)vj; \ 4259ab29520SSong Gao typedef __typeof(Vd->E(0)) TD; \ 426c09360faSSong Gao int oprsz = simd_oprsz(desc); \ 4279ab29520SSong Gao \ 428c09360faSSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 4299ab29520SSong Gao Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ 4309ab29520SSong Gao } \ 4319ab29520SSong Gao } 4329ab29520SSong Gao 4339ab29520SSong Gao VMINMAXI(vmini_b, 8, B, DO_MIN) 4349ab29520SSong Gao VMINMAXI(vmini_h, 16, H, DO_MIN) 4359ab29520SSong Gao VMINMAXI(vmini_w, 32, W, DO_MIN) 4369ab29520SSong Gao VMINMAXI(vmini_d, 64, D, DO_MIN) 4379ab29520SSong Gao VMINMAXI(vmaxi_b, 8, B, DO_MAX) 4389ab29520SSong Gao VMINMAXI(vmaxi_h, 16, H, DO_MAX) 4399ab29520SSong Gao VMINMAXI(vmaxi_w, 32, W, DO_MAX) 4409ab29520SSong Gao VMINMAXI(vmaxi_d, 64, D, DO_MAX) 4419ab29520SSong Gao VMINMAXI(vmini_bu, 8, UB, DO_MIN) 4429ab29520SSong Gao VMINMAXI(vmini_hu, 16, UH, DO_MIN) 4439ab29520SSong Gao VMINMAXI(vmini_wu, 32, UW, DO_MIN) 4449ab29520SSong Gao VMINMAXI(vmini_du, 64, UD, DO_MIN) 4459ab29520SSong Gao VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) 4469ab29520SSong Gao VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) 4479ab29520SSong Gao VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) 4489ab29520SSong Gao VMINMAXI(vmaxi_du, 64, UD, DO_MAX) 449cd1c49adSSong Gao 450cd1c49adSSong Gao #define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ 451342dc1cfSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 452cd1c49adSSong Gao { \ 453cd1c49adSSong Gao int i; \ 454cd1c49adSSong Gao VReg *Vd = (VReg *)vd; \ 455cd1c49adSSong Gao VReg *Vj = (VReg *)vj; \ 456cd1c49adSSong Gao VReg *Vk = (VReg *)vk; \ 457cd1c49adSSong Gao typedef __typeof(Vd->E1(0)) T; \ 458342dc1cfSSong Gao int oprsz = simd_oprsz(desc); \ 459cd1c49adSSong Gao \ 460342dc1cfSSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 461cd1c49adSSong Gao Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ 462cd1c49adSSong Gao } \ 463cd1c49adSSong Gao } 464cd1c49adSSong Gao 465342dc1cfSSong Gao void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc) 466cd1c49adSSong Gao { 467342dc1cfSSong Gao int i; 468342dc1cfSSong Gao uint64_t l, h; 469cd1c49adSSong Gao VReg *Vd = (VReg *)vd; 470cd1c49adSSong Gao VReg *Vj = (VReg *)vj; 471cd1c49adSSong Gao VReg *Vk = (VReg *)vk; 472342dc1cfSSong Gao int oprsz = simd_oprsz(desc); 473cd1c49adSSong Gao 474342dc1cfSSong Gao for (i = 0; i < oprsz / 8; i++) { 475342dc1cfSSong Gao muls64(&l, &h, Vj->D(i), Vk->D(i)); 476342dc1cfSSong Gao Vd->D(i) = h; 477342dc1cfSSong Gao } 478cd1c49adSSong Gao } 479cd1c49adSSong Gao 480cd1c49adSSong Gao DO_VMUH(vmuh_b, 8, H, B, DO_MUH) 481cd1c49adSSong Gao DO_VMUH(vmuh_h, 16, W, H, DO_MUH) 482cd1c49adSSong Gao DO_VMUH(vmuh_w, 32, D, W, DO_MUH) 483cd1c49adSSong Gao 484342dc1cfSSong Gao void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc) 485cd1c49adSSong Gao { 486342dc1cfSSong Gao int i; 487342dc1cfSSong Gao uint64_t l, h; 488cd1c49adSSong Gao VReg *Vd = (VReg *)vd; 489cd1c49adSSong Gao VReg *Vj = (VReg *)vj; 490cd1c49adSSong Gao VReg *Vk = (VReg *)vk; 491342dc1cfSSong Gao int oprsz = simd_oprsz(desc); 492cd1c49adSSong Gao 493342dc1cfSSong Gao for (i = 0; i < oprsz / 8; i++) { 494342dc1cfSSong Gao mulu64(&l, &h, Vj->D(i), Vk->D(i)); 495342dc1cfSSong Gao Vd->D(i) = h; 496342dc1cfSSong Gao } 497cd1c49adSSong Gao } 498cd1c49adSSong Gao 499cd1c49adSSong Gao DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) 500cd1c49adSSong Gao DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) 501cd1c49adSSong Gao DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) 502cd1c49adSSong Gao 503cd1c49adSSong Gao #define DO_MUL(a, b) (a * b) 504cd1c49adSSong Gao 505cd1c49adSSong Gao DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) 506cd1c49adSSong Gao DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) 507cd1c49adSSong Gao DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) 508cd1c49adSSong Gao 509cd1c49adSSong Gao DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) 510cd1c49adSSong Gao DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) 511cd1c49adSSong Gao DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) 512cd1c49adSSong Gao 513cd1c49adSSong Gao DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) 514cd1c49adSSong Gao DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) 515cd1c49adSSong Gao DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) 516cd1c49adSSong Gao 517cd1c49adSSong Gao DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) 518cd1c49adSSong Gao DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) 519cd1c49adSSong Gao DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) 520cd1c49adSSong Gao 521cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) 522cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) 523cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) 524cd1c49adSSong Gao 525cd1c49adSSong Gao DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) 526cd1c49adSSong Gao DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) 527cd1c49adSSong Gao DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) 528d3aec65bSSong Gao 529d3aec65bSSong Gao #define DO_MADD(a, b, c) (a + b * c) 530d3aec65bSSong Gao #define DO_MSUB(a, b, c) (a - b * c) 531d3aec65bSSong Gao 532d3aec65bSSong Gao #define VMADDSUB(NAME, BIT, E, DO_OP) \ 5333f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 534d3aec65bSSong Gao { \ 535d3aec65bSSong Gao int i; \ 536d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 537d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 538d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 5393f450c17SSong Gao int oprsz = simd_oprsz(desc); \ 5403f450c17SSong Gao \ 5413f450c17SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 542d3aec65bSSong Gao Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ 543d3aec65bSSong Gao } \ 544d3aec65bSSong Gao } 545d3aec65bSSong Gao 546d3aec65bSSong Gao VMADDSUB(vmadd_b, 8, B, DO_MADD) 547d3aec65bSSong Gao VMADDSUB(vmadd_h, 16, H, DO_MADD) 548d3aec65bSSong Gao VMADDSUB(vmadd_w, 32, W, DO_MADD) 549d3aec65bSSong Gao VMADDSUB(vmadd_d, 64, D, DO_MADD) 550d3aec65bSSong Gao VMADDSUB(vmsub_b, 8, B, DO_MSUB) 551d3aec65bSSong Gao VMADDSUB(vmsub_h, 16, H, DO_MSUB) 552d3aec65bSSong Gao VMADDSUB(vmsub_w, 32, W, DO_MSUB) 553d3aec65bSSong Gao VMADDSUB(vmsub_d, 64, D, DO_MSUB) 554d3aec65bSSong Gao 555d3aec65bSSong Gao #define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ 5563f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 557d3aec65bSSong Gao { \ 558d3aec65bSSong Gao int i; \ 559d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 560d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 561d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 562d3aec65bSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 5633f450c17SSong Gao int oprsz = simd_oprsz(desc); \ 564d3aec65bSSong Gao \ 5653f450c17SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 566d3aec65bSSong Gao Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ 567d3aec65bSSong Gao } \ 568d3aec65bSSong Gao } 569d3aec65bSSong Gao 570d3aec65bSSong Gao VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) 571d3aec65bSSong Gao VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) 572d3aec65bSSong Gao VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) 573d3aec65bSSong Gao VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) 574d3aec65bSSong Gao VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) 575d3aec65bSSong Gao VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) 576d3aec65bSSong Gao 577d3aec65bSSong Gao #define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ 5783f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 579d3aec65bSSong Gao { \ 580d3aec65bSSong Gao int i; \ 581d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 582d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 583d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 584d3aec65bSSong Gao typedef __typeof(Vd->E1(0)) TD; \ 5853f450c17SSong Gao int oprsz = simd_oprsz(desc); \ 586d3aec65bSSong Gao \ 5873f450c17SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 588d3aec65bSSong Gao Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ 589d3aec65bSSong Gao (TD)Vk->E2(2 * i + 1)); \ 590d3aec65bSSong Gao } \ 591d3aec65bSSong Gao } 592d3aec65bSSong Gao 593d3aec65bSSong Gao VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) 594d3aec65bSSong Gao VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) 595d3aec65bSSong Gao VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) 596d3aec65bSSong Gao VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) 597d3aec65bSSong Gao VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) 598d3aec65bSSong Gao VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) 599d3aec65bSSong Gao 600d3aec65bSSong Gao #define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 6013f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 602d3aec65bSSong Gao { \ 603d3aec65bSSong Gao int i; \ 604d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 605d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 606d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 607d3aec65bSSong Gao typedef __typeof(Vd->ES1(0)) TS1; \ 608d3aec65bSSong Gao typedef __typeof(Vd->EU1(0)) TU1; \ 6093f450c17SSong Gao int oprsz = simd_oprsz(desc); \ 610d3aec65bSSong Gao \ 6113f450c17SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 612d3aec65bSSong Gao Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ 613d3aec65bSSong Gao (TS1)Vk->ES2(2 * i)); \ 614d3aec65bSSong Gao } \ 615d3aec65bSSong Gao } 616d3aec65bSSong Gao 617d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) 618d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) 619d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) 620d3aec65bSSong Gao 621d3aec65bSSong Gao #define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ 6223f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 623d3aec65bSSong Gao { \ 624d3aec65bSSong Gao int i; \ 625d3aec65bSSong Gao VReg *Vd = (VReg *)vd; \ 626d3aec65bSSong Gao VReg *Vj = (VReg *)vj; \ 627d3aec65bSSong Gao VReg *Vk = (VReg *)vk; \ 628d3aec65bSSong Gao typedef __typeof(Vd->ES1(0)) TS1; \ 629d3aec65bSSong Gao typedef __typeof(Vd->EU1(0)) TU1; \ 6303f450c17SSong Gao int oprsz = simd_oprsz(desc); \ 631d3aec65bSSong Gao \ 6323f450c17SSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 633d3aec65bSSong Gao Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ 634d3aec65bSSong Gao (TS1)Vk->ES2(2 * i + 1)); \ 635d3aec65bSSong Gao } \ 636d3aec65bSSong Gao } 637d3aec65bSSong Gao 638d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) 639d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) 640d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) 6414cc4c0f7SSong Gao 6424cc4c0f7SSong Gao #define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M) 6434cc4c0f7SSong Gao #define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M) 6444cc4c0f7SSong Gao #define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\ 6454cc4c0f7SSong Gao unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 6464cc4c0f7SSong Gao #define DO_REM(N, M) (unlikely(M == 0) ? 0 :\ 6474cc4c0f7SSong Gao unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 6484cc4c0f7SSong Gao 6494cc4c0f7SSong Gao #define VDIV(NAME, BIT, E, DO_OP) \ 65004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 6514cc4c0f7SSong Gao { \ 6524cc4c0f7SSong Gao int i; \ 65304711da1SSong Gao VReg *Vd = (VReg *)vd; \ 65404711da1SSong Gao VReg *Vj = (VReg *)vj; \ 65504711da1SSong Gao VReg *Vk = (VReg *)vk; \ 656abb693deSSong Gao int oprsz = simd_oprsz(desc); \ 657abb693deSSong Gao \ 658abb693deSSong Gao for (i = 0; i < oprsz / (BIT / 8); i++) { \ 6594cc4c0f7SSong Gao Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ 6604cc4c0f7SSong Gao } \ 6614cc4c0f7SSong Gao } 6624cc4c0f7SSong Gao 6634cc4c0f7SSong Gao VDIV(vdiv_b, 8, B, DO_DIV) 6644cc4c0f7SSong Gao VDIV(vdiv_h, 16, H, DO_DIV) 6654cc4c0f7SSong Gao VDIV(vdiv_w, 32, W, DO_DIV) 6664cc4c0f7SSong Gao VDIV(vdiv_d, 64, D, DO_DIV) 6674cc4c0f7SSong Gao VDIV(vdiv_bu, 8, UB, DO_DIVU) 6684cc4c0f7SSong Gao VDIV(vdiv_hu, 16, UH, DO_DIVU) 6694cc4c0f7SSong Gao VDIV(vdiv_wu, 32, UW, DO_DIVU) 6704cc4c0f7SSong Gao VDIV(vdiv_du, 64, UD, DO_DIVU) 6714cc4c0f7SSong Gao VDIV(vmod_b, 8, B, DO_REM) 6724cc4c0f7SSong Gao VDIV(vmod_h, 16, H, DO_REM) 6734cc4c0f7SSong Gao VDIV(vmod_w, 32, W, DO_REM) 6744cc4c0f7SSong Gao VDIV(vmod_d, 64, D, DO_REM) 6754cc4c0f7SSong Gao VDIV(vmod_bu, 8, UB, DO_REMU) 6764cc4c0f7SSong Gao VDIV(vmod_hu, 16, UH, DO_REMU) 6774cc4c0f7SSong Gao VDIV(vmod_wu, 32, UW, DO_REMU) 6784cc4c0f7SSong Gao VDIV(vmod_du, 64, UD, DO_REMU) 679cbe44190SSong Gao 680cbe44190SSong Gao #define VSAT_S(NAME, BIT, E) \ 681cbe44190SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ 682cbe44190SSong Gao { \ 683cbe44190SSong Gao int i; \ 684cbe44190SSong Gao VReg *Vd = (VReg *)vd; \ 685cbe44190SSong Gao VReg *Vj = (VReg *)vj; \ 686cbe44190SSong Gao typedef __typeof(Vd->E(0)) TD; \ 687cbe44190SSong Gao \ 688cbe44190SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 689cbe44190SSong Gao Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ 690cbe44190SSong Gao Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ 691cbe44190SSong Gao } \ 692cbe44190SSong Gao } 693cbe44190SSong Gao 694cbe44190SSong Gao VSAT_S(vsat_b, 8, B) 695cbe44190SSong Gao VSAT_S(vsat_h, 16, H) 696cbe44190SSong Gao VSAT_S(vsat_w, 32, W) 697cbe44190SSong Gao VSAT_S(vsat_d, 64, D) 698cbe44190SSong Gao 699cbe44190SSong Gao #define VSAT_U(NAME, BIT, E) \ 700cbe44190SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \ 701cbe44190SSong Gao { \ 702cbe44190SSong Gao int i; \ 703cbe44190SSong Gao VReg *Vd = (VReg *)vd; \ 704cbe44190SSong Gao VReg *Vj = (VReg *)vj; \ 705cbe44190SSong Gao typedef __typeof(Vd->E(0)) TD; \ 706cbe44190SSong Gao \ 707cbe44190SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 708cbe44190SSong Gao Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ 709cbe44190SSong Gao } \ 710cbe44190SSong Gao } 711cbe44190SSong Gao 712cbe44190SSong Gao VSAT_U(vsat_bu, 8, UB) 713cbe44190SSong Gao VSAT_U(vsat_hu, 16, UH) 714cbe44190SSong Gao VSAT_U(vsat_wu, 32, UW) 715cbe44190SSong Gao VSAT_U(vsat_du, 64, UD) 7163734ad93SSong Gao 7173734ad93SSong Gao #define VEXTH(NAME, BIT, E1, E2) \ 718ff27e335SSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ 7193734ad93SSong Gao { \ 7203734ad93SSong Gao int i; \ 721ff27e335SSong Gao VReg *Vd = (VReg *)vd; \ 722ff27e335SSong Gao VReg *Vj = (VReg *)vj; \ 7233734ad93SSong Gao \ 7243734ad93SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 7253734ad93SSong Gao Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \ 7263734ad93SSong Gao } \ 7273734ad93SSong Gao } 7283734ad93SSong Gao 729ff27e335SSong Gao void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) 7303734ad93SSong Gao { 731ff27e335SSong Gao VReg *Vd = (VReg *)vd; 732ff27e335SSong Gao VReg *Vj = (VReg *)vj; 7333734ad93SSong Gao 7343734ad93SSong Gao Vd->Q(0) = int128_makes64(Vj->D(1)); 7353734ad93SSong Gao } 7363734ad93SSong Gao 737ff27e335SSong Gao void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) 7383734ad93SSong Gao { 739ff27e335SSong Gao VReg *Vd = (VReg *)vd; 740ff27e335SSong Gao VReg *Vj = (VReg *)vj; 7413734ad93SSong Gao 7423734ad93SSong Gao Vd->Q(0) = int128_make64((uint64_t)Vj->D(1)); 7433734ad93SSong Gao } 7443734ad93SSong Gao 7453734ad93SSong Gao VEXTH(vexth_h_b, 16, H, B) 7463734ad93SSong Gao VEXTH(vexth_w_h, 32, W, H) 7473734ad93SSong Gao VEXTH(vexth_d_w, 64, D, W) 7483734ad93SSong Gao VEXTH(vexth_hu_bu, 16, UH, UB) 7493734ad93SSong Gao VEXTH(vexth_wu_hu, 32, UW, UH) 7503734ad93SSong Gao VEXTH(vexth_du_wu, 64, UD, UW) 751f0e395dfSSong Gao 752f0e395dfSSong Gao #define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b) 753f0e395dfSSong Gao 754f0e395dfSSong Gao DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) 755f0e395dfSSong Gao DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) 756f0e395dfSSong Gao DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) 757f0e395dfSSong Gao DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) 758789f4a4cSSong Gao 759789f4a4cSSong Gao static uint64_t do_vmskltz_b(int64_t val) 760789f4a4cSSong Gao { 761789f4a4cSSong Gao uint64_t m = 0x8080808080808080ULL; 762789f4a4cSSong Gao uint64_t c = val & m; 763789f4a4cSSong Gao c |= c << 7; 764789f4a4cSSong Gao c |= c << 14; 765789f4a4cSSong Gao c |= c << 28; 766789f4a4cSSong Gao return c >> 56; 767789f4a4cSSong Gao } 768789f4a4cSSong Gao 769ff27e335SSong Gao void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) 770789f4a4cSSong Gao { 771789f4a4cSSong Gao uint16_t temp = 0; 772ff27e335SSong Gao VReg *Vd = (VReg *)vd; 773ff27e335SSong Gao VReg *Vj = (VReg *)vj; 774789f4a4cSSong Gao 775789f4a4cSSong Gao temp = do_vmskltz_b(Vj->D(0)); 776789f4a4cSSong Gao temp |= (do_vmskltz_b(Vj->D(1)) << 8); 777789f4a4cSSong Gao Vd->D(0) = temp; 778789f4a4cSSong Gao Vd->D(1) = 0; 779789f4a4cSSong Gao } 780789f4a4cSSong Gao 781789f4a4cSSong Gao static uint64_t do_vmskltz_h(int64_t val) 782789f4a4cSSong Gao { 783789f4a4cSSong Gao uint64_t m = 0x8000800080008000ULL; 784789f4a4cSSong Gao uint64_t c = val & m; 785789f4a4cSSong Gao c |= c << 15; 786789f4a4cSSong Gao c |= c << 30; 787789f4a4cSSong Gao return c >> 60; 788789f4a4cSSong Gao } 789789f4a4cSSong Gao 790ff27e335SSong Gao void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) 791789f4a4cSSong Gao { 792789f4a4cSSong Gao uint16_t temp = 0; 793ff27e335SSong Gao VReg *Vd = (VReg *)vd; 794ff27e335SSong Gao VReg *Vj = (VReg *)vj; 795789f4a4cSSong Gao 796789f4a4cSSong Gao temp = do_vmskltz_h(Vj->D(0)); 797789f4a4cSSong Gao temp |= (do_vmskltz_h(Vj->D(1)) << 4); 798789f4a4cSSong Gao Vd->D(0) = temp; 799789f4a4cSSong Gao Vd->D(1) = 0; 800789f4a4cSSong Gao } 801789f4a4cSSong Gao 802789f4a4cSSong Gao static uint64_t do_vmskltz_w(int64_t val) 803789f4a4cSSong Gao { 804789f4a4cSSong Gao uint64_t m = 0x8000000080000000ULL; 805789f4a4cSSong Gao uint64_t c = val & m; 806789f4a4cSSong Gao c |= c << 31; 807789f4a4cSSong Gao return c >> 62; 808789f4a4cSSong Gao } 809789f4a4cSSong Gao 810ff27e335SSong Gao void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) 811789f4a4cSSong Gao { 812789f4a4cSSong Gao uint16_t temp = 0; 813ff27e335SSong Gao VReg *Vd = (VReg *)vd; 814ff27e335SSong Gao VReg *Vj = (VReg *)vj; 815789f4a4cSSong Gao 816789f4a4cSSong Gao temp = do_vmskltz_w(Vj->D(0)); 817789f4a4cSSong Gao temp |= (do_vmskltz_w(Vj->D(1)) << 2); 818789f4a4cSSong Gao Vd->D(0) = temp; 819789f4a4cSSong Gao Vd->D(1) = 0; 820789f4a4cSSong Gao } 821789f4a4cSSong Gao 822789f4a4cSSong Gao static uint64_t do_vmskltz_d(int64_t val) 823789f4a4cSSong Gao { 824789f4a4cSSong Gao return (uint64_t)val >> 63; 825789f4a4cSSong Gao } 826ff27e335SSong Gao void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) 827789f4a4cSSong Gao { 828789f4a4cSSong Gao uint16_t temp = 0; 829ff27e335SSong Gao VReg *Vd = (VReg *)vd; 830ff27e335SSong Gao VReg *Vj = (VReg *)vj; 831789f4a4cSSong Gao 832789f4a4cSSong Gao temp = do_vmskltz_d(Vj->D(0)); 833789f4a4cSSong Gao temp |= (do_vmskltz_d(Vj->D(1)) << 1); 834789f4a4cSSong Gao Vd->D(0) = temp; 835789f4a4cSSong Gao Vd->D(1) = 0; 836789f4a4cSSong Gao } 837789f4a4cSSong Gao 838ff27e335SSong Gao void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) 839789f4a4cSSong Gao { 840789f4a4cSSong Gao uint16_t temp = 0; 841ff27e335SSong Gao VReg *Vd = (VReg *)vd; 842ff27e335SSong Gao VReg *Vj = (VReg *)vj; 843789f4a4cSSong Gao 844789f4a4cSSong Gao temp = do_vmskltz_b(Vj->D(0)); 845789f4a4cSSong Gao temp |= (do_vmskltz_b(Vj->D(1)) << 8); 846789f4a4cSSong Gao Vd->D(0) = (uint16_t)(~temp); 847789f4a4cSSong Gao Vd->D(1) = 0; 848789f4a4cSSong Gao } 849789f4a4cSSong Gao 850789f4a4cSSong Gao static uint64_t do_vmskez_b(uint64_t a) 851789f4a4cSSong Gao { 852789f4a4cSSong Gao uint64_t m = 0x7f7f7f7f7f7f7f7fULL; 853789f4a4cSSong Gao uint64_t c = ~(((a & m) + m) | a | m); 854789f4a4cSSong Gao c |= c << 7; 855789f4a4cSSong Gao c |= c << 14; 856789f4a4cSSong Gao c |= c << 28; 857789f4a4cSSong Gao return c >> 56; 858789f4a4cSSong Gao } 859789f4a4cSSong Gao 860ff27e335SSong Gao void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) 861789f4a4cSSong Gao { 862789f4a4cSSong Gao uint16_t temp = 0; 863ff27e335SSong Gao VReg *Vd = (VReg *)vd; 864ff27e335SSong Gao VReg *Vj = (VReg *)vj; 865789f4a4cSSong Gao 866789f4a4cSSong Gao temp = do_vmskez_b(Vj->D(0)); 867789f4a4cSSong Gao temp |= (do_vmskez_b(Vj->D(1)) << 8); 868789f4a4cSSong Gao Vd->D(0) = (uint16_t)(~temp); 869789f4a4cSSong Gao Vd->D(1) = 0; 870789f4a4cSSong Gao } 871f205a539SSong Gao 872f205a539SSong Gao void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v) 873f205a539SSong Gao { 874f205a539SSong Gao int i; 875f205a539SSong Gao VReg *Vd = (VReg *)vd; 876f205a539SSong Gao VReg *Vj = (VReg *)vj; 877f205a539SSong Gao 878f205a539SSong Gao for (i = 0; i < LSX_LEN/8; i++) { 879f205a539SSong Gao Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); 880f205a539SSong Gao } 881f205a539SSong Gao } 8829b21a7a5SSong Gao 8839b21a7a5SSong Gao #define VSLLWIL(NAME, BIT, E1, E2) \ 884329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 8859b21a7a5SSong Gao { \ 8869b21a7a5SSong Gao int i; \ 8879b21a7a5SSong Gao VReg temp; \ 888329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 889329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 8909b21a7a5SSong Gao typedef __typeof(temp.E1(0)) TD; \ 8919b21a7a5SSong Gao \ 8929b21a7a5SSong Gao temp.D(0) = 0; \ 8939b21a7a5SSong Gao temp.D(1) = 0; \ 8949b21a7a5SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 8959b21a7a5SSong Gao temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \ 8969b21a7a5SSong Gao } \ 8979b21a7a5SSong Gao *Vd = temp; \ 8989b21a7a5SSong Gao } 8999b21a7a5SSong Gao 900ff27e335SSong Gao void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) 9019b21a7a5SSong Gao { 902ff27e335SSong Gao VReg *Vd = (VReg *)vd; 903ff27e335SSong Gao VReg *Vj = (VReg *)vj; 9049b21a7a5SSong Gao 9059b21a7a5SSong Gao Vd->Q(0) = int128_makes64(Vj->D(0)); 9069b21a7a5SSong Gao } 9079b21a7a5SSong Gao 908ff27e335SSong Gao void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) 9099b21a7a5SSong Gao { 910ff27e335SSong Gao VReg *Vd = (VReg *)vd; 911ff27e335SSong Gao VReg *Vj = (VReg *)vj; 9129b21a7a5SSong Gao 9139b21a7a5SSong Gao Vd->Q(0) = int128_make64(Vj->D(0)); 9149b21a7a5SSong Gao } 9159b21a7a5SSong Gao 9169b21a7a5SSong Gao VSLLWIL(vsllwil_h_b, 16, H, B) 9179b21a7a5SSong Gao VSLLWIL(vsllwil_w_h, 32, W, H) 9189b21a7a5SSong Gao VSLLWIL(vsllwil_d_w, 64, D, W) 9199b21a7a5SSong Gao VSLLWIL(vsllwil_hu_bu, 16, UH, UB) 9209b21a7a5SSong Gao VSLLWIL(vsllwil_wu_hu, 32, UW, UH) 9219b21a7a5SSong Gao VSLLWIL(vsllwil_du_wu, 64, UD, UW) 922ecb93716SSong Gao 923ecb93716SSong Gao #define do_vsrlr(E, T) \ 924ecb93716SSong Gao static T do_vsrlr_ ##E(T s1, int sh) \ 925ecb93716SSong Gao { \ 926ecb93716SSong Gao if (sh == 0) { \ 927ecb93716SSong Gao return s1; \ 928ecb93716SSong Gao } else { \ 929ecb93716SSong Gao return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ 930ecb93716SSong Gao } \ 931ecb93716SSong Gao } 932ecb93716SSong Gao 933ecb93716SSong Gao do_vsrlr(B, uint8_t) 934ecb93716SSong Gao do_vsrlr(H, uint16_t) 935ecb93716SSong Gao do_vsrlr(W, uint32_t) 936ecb93716SSong Gao do_vsrlr(D, uint64_t) 937ecb93716SSong Gao 938ecb93716SSong Gao #define VSRLR(NAME, BIT, T, E) \ 93904711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 940ecb93716SSong Gao { \ 941ecb93716SSong Gao int i; \ 94204711da1SSong Gao VReg *Vd = (VReg *)vd; \ 94304711da1SSong Gao VReg *Vj = (VReg *)vj; \ 94404711da1SSong Gao VReg *Vk = (VReg *)vk; \ 945ecb93716SSong Gao \ 946ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 947ecb93716SSong Gao Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ 948ecb93716SSong Gao } \ 949ecb93716SSong Gao } 950ecb93716SSong Gao 951ecb93716SSong Gao VSRLR(vsrlr_b, 8, uint8_t, B) 952ecb93716SSong Gao VSRLR(vsrlr_h, 16, uint16_t, H) 953ecb93716SSong Gao VSRLR(vsrlr_w, 32, uint32_t, W) 954ecb93716SSong Gao VSRLR(vsrlr_d, 64, uint64_t, D) 955ecb93716SSong Gao 956ecb93716SSong Gao #define VSRLRI(NAME, BIT, E) \ 957329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 958ecb93716SSong Gao { \ 959ecb93716SSong Gao int i; \ 960329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 961329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 962ecb93716SSong Gao \ 963ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 964ecb93716SSong Gao Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ 965ecb93716SSong Gao } \ 966ecb93716SSong Gao } 967ecb93716SSong Gao 968ecb93716SSong Gao VSRLRI(vsrlri_b, 8, B) 969ecb93716SSong Gao VSRLRI(vsrlri_h, 16, H) 970ecb93716SSong Gao VSRLRI(vsrlri_w, 32, W) 971ecb93716SSong Gao VSRLRI(vsrlri_d, 64, D) 972ecb93716SSong Gao 973ecb93716SSong Gao #define do_vsrar(E, T) \ 974ecb93716SSong Gao static T do_vsrar_ ##E(T s1, int sh) \ 975ecb93716SSong Gao { \ 976ecb93716SSong Gao if (sh == 0) { \ 977ecb93716SSong Gao return s1; \ 978ecb93716SSong Gao } else { \ 979ecb93716SSong Gao return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ 980ecb93716SSong Gao } \ 981ecb93716SSong Gao } 982ecb93716SSong Gao 983ecb93716SSong Gao do_vsrar(B, int8_t) 984ecb93716SSong Gao do_vsrar(H, int16_t) 985ecb93716SSong Gao do_vsrar(W, int32_t) 986ecb93716SSong Gao do_vsrar(D, int64_t) 987ecb93716SSong Gao 988ecb93716SSong Gao #define VSRAR(NAME, BIT, T, E) \ 98904711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 990ecb93716SSong Gao { \ 991ecb93716SSong Gao int i; \ 99204711da1SSong Gao VReg *Vd = (VReg *)vd; \ 99304711da1SSong Gao VReg *Vj = (VReg *)vj; \ 99404711da1SSong Gao VReg *Vk = (VReg *)vk; \ 995ecb93716SSong Gao \ 996ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 997ecb93716SSong Gao Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ 998ecb93716SSong Gao } \ 999ecb93716SSong Gao } 1000ecb93716SSong Gao 1001ecb93716SSong Gao VSRAR(vsrar_b, 8, uint8_t, B) 1002ecb93716SSong Gao VSRAR(vsrar_h, 16, uint16_t, H) 1003ecb93716SSong Gao VSRAR(vsrar_w, 32, uint32_t, W) 1004ecb93716SSong Gao VSRAR(vsrar_d, 64, uint64_t, D) 1005ecb93716SSong Gao 1006ecb93716SSong Gao #define VSRARI(NAME, BIT, E) \ 1007329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1008ecb93716SSong Gao { \ 1009ecb93716SSong Gao int i; \ 1010329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1011329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1012ecb93716SSong Gao \ 1013ecb93716SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1014ecb93716SSong Gao Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ 1015ecb93716SSong Gao } \ 1016ecb93716SSong Gao } 1017ecb93716SSong Gao 1018ecb93716SSong Gao VSRARI(vsrari_b, 8, B) 1019ecb93716SSong Gao VSRARI(vsrari_h, 16, H) 1020ecb93716SSong Gao VSRARI(vsrari_w, 32, W) 1021ecb93716SSong Gao VSRARI(vsrari_d, 64, D) 1022d79fb8ddSSong Gao 1023d79fb8ddSSong Gao #define R_SHIFT(a, b) (a >> b) 1024d79fb8ddSSong Gao 1025d79fb8ddSSong Gao #define VSRLN(NAME, BIT, T, E1, E2) \ 102604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1027d79fb8ddSSong Gao { \ 1028d79fb8ddSSong Gao int i; \ 102904711da1SSong Gao VReg *Vd = (VReg *)vd; \ 103004711da1SSong Gao VReg *Vj = (VReg *)vj; \ 103104711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1032d79fb8ddSSong Gao \ 1033d79fb8ddSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1034d79fb8ddSSong Gao Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \ 1035d79fb8ddSSong Gao } \ 1036d79fb8ddSSong Gao Vd->D(1) = 0; \ 1037d79fb8ddSSong Gao } 1038d79fb8ddSSong Gao 1039d79fb8ddSSong Gao VSRLN(vsrln_b_h, 16, uint16_t, B, H) 1040d79fb8ddSSong Gao VSRLN(vsrln_h_w, 32, uint32_t, H, W) 1041d79fb8ddSSong Gao VSRLN(vsrln_w_d, 64, uint64_t, W, D) 1042d79fb8ddSSong Gao 1043d79fb8ddSSong Gao #define VSRAN(NAME, BIT, T, E1, E2) \ 104404711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1045d79fb8ddSSong Gao { \ 1046d79fb8ddSSong Gao int i; \ 104704711da1SSong Gao VReg *Vd = (VReg *)vd; \ 104804711da1SSong Gao VReg *Vj = (VReg *)vj; \ 104904711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1050d79fb8ddSSong Gao \ 1051d79fb8ddSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1052d79fb8ddSSong Gao Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \ 1053d79fb8ddSSong Gao } \ 1054d79fb8ddSSong Gao Vd->D(1) = 0; \ 1055d79fb8ddSSong Gao } 1056d79fb8ddSSong Gao 1057d79fb8ddSSong Gao VSRAN(vsran_b_h, 16, uint16_t, B, H) 1058d79fb8ddSSong Gao VSRAN(vsran_h_w, 32, uint32_t, H, W) 1059d79fb8ddSSong Gao VSRAN(vsran_w_d, 64, uint64_t, W, D) 1060d79fb8ddSSong Gao 1061d79fb8ddSSong Gao #define VSRLNI(NAME, BIT, T, E1, E2) \ 1062329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1063d79fb8ddSSong Gao { \ 1064d79fb8ddSSong Gao int i, max; \ 1065d79fb8ddSSong Gao VReg temp; \ 1066329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1067329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1068d79fb8ddSSong Gao \ 1069d79fb8ddSSong Gao temp.D(0) = 0; \ 1070d79fb8ddSSong Gao temp.D(1) = 0; \ 1071d79fb8ddSSong Gao max = LSX_LEN/BIT; \ 1072d79fb8ddSSong Gao for (i = 0; i < max; i++) { \ 1073d79fb8ddSSong Gao temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \ 1074d79fb8ddSSong Gao temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \ 1075d79fb8ddSSong Gao } \ 1076d79fb8ddSSong Gao *Vd = temp; \ 1077d79fb8ddSSong Gao } 1078d79fb8ddSSong Gao 1079329517d5SSong Gao void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 1080d79fb8ddSSong Gao { 1081d79fb8ddSSong Gao VReg temp; 1082329517d5SSong Gao VReg *Vd = (VReg *)vd; 1083329517d5SSong Gao VReg *Vj = (VReg *)vj; 1084d79fb8ddSSong Gao 1085d79fb8ddSSong Gao temp.D(0) = 0; 1086d79fb8ddSSong Gao temp.D(1) = 0; 1087d79fb8ddSSong Gao temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128)); 1088d79fb8ddSSong Gao temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128)); 1089d79fb8ddSSong Gao *Vd = temp; 1090d79fb8ddSSong Gao } 1091d79fb8ddSSong Gao 1092d79fb8ddSSong Gao VSRLNI(vsrlni_b_h, 16, uint16_t, B, H) 1093d79fb8ddSSong Gao VSRLNI(vsrlni_h_w, 32, uint32_t, H, W) 1094d79fb8ddSSong Gao VSRLNI(vsrlni_w_d, 64, uint64_t, W, D) 1095d79fb8ddSSong Gao 1096d79fb8ddSSong Gao #define VSRANI(NAME, BIT, E1, E2) \ 1097329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1098d79fb8ddSSong Gao { \ 1099d79fb8ddSSong Gao int i, max; \ 1100d79fb8ddSSong Gao VReg temp; \ 1101329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1102329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1103d79fb8ddSSong Gao \ 1104d79fb8ddSSong Gao temp.D(0) = 0; \ 1105d79fb8ddSSong Gao temp.D(1) = 0; \ 1106d79fb8ddSSong Gao max = LSX_LEN/BIT; \ 1107d79fb8ddSSong Gao for (i = 0; i < max; i++) { \ 1108d79fb8ddSSong Gao temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \ 1109d79fb8ddSSong Gao temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \ 1110d79fb8ddSSong Gao } \ 1111d79fb8ddSSong Gao *Vd = temp; \ 1112d79fb8ddSSong Gao } 1113d79fb8ddSSong Gao 1114329517d5SSong Gao void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 1115d79fb8ddSSong Gao { 1116d79fb8ddSSong Gao VReg temp; 1117329517d5SSong Gao VReg *Vd = (VReg *)vd; 1118329517d5SSong Gao VReg *Vj = (VReg *)vj; 1119d79fb8ddSSong Gao 1120d79fb8ddSSong Gao temp.D(0) = 0; 1121d79fb8ddSSong Gao temp.D(1) = 0; 1122d79fb8ddSSong Gao temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128)); 1123d79fb8ddSSong Gao temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128)); 1124d79fb8ddSSong Gao *Vd = temp; 1125d79fb8ddSSong Gao } 1126d79fb8ddSSong Gao 1127d79fb8ddSSong Gao VSRANI(vsrani_b_h, 16, B, H) 1128d79fb8ddSSong Gao VSRANI(vsrani_h_w, 32, H, W) 1129d79fb8ddSSong Gao VSRANI(vsrani_w_d, 64, W, D) 1130a5200a17SSong Gao 1131a5200a17SSong Gao #define VSRLRN(NAME, BIT, T, E1, E2) \ 113204711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1133a5200a17SSong Gao { \ 1134a5200a17SSong Gao int i; \ 113504711da1SSong Gao VReg *Vd = (VReg *)vd; \ 113604711da1SSong Gao VReg *Vj = (VReg *)vj; \ 113704711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1138a5200a17SSong Gao \ 1139a5200a17SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1140a5200a17SSong Gao Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ 1141a5200a17SSong Gao } \ 1142a5200a17SSong Gao Vd->D(1) = 0; \ 1143a5200a17SSong Gao } 1144a5200a17SSong Gao 1145a5200a17SSong Gao VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H) 1146a5200a17SSong Gao VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W) 1147a5200a17SSong Gao VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D) 1148a5200a17SSong Gao 1149a5200a17SSong Gao #define VSRARN(NAME, BIT, T, E1, E2) \ 115004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1151a5200a17SSong Gao { \ 1152a5200a17SSong Gao int i; \ 115304711da1SSong Gao VReg *Vd = (VReg *)vd; \ 115404711da1SSong Gao VReg *Vj = (VReg *)vj; \ 115504711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1156a5200a17SSong Gao \ 1157a5200a17SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1158a5200a17SSong Gao Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \ 1159a5200a17SSong Gao } \ 1160a5200a17SSong Gao Vd->D(1) = 0; \ 1161a5200a17SSong Gao } 1162a5200a17SSong Gao 1163a5200a17SSong Gao VSRARN(vsrarn_b_h, 16, uint8_t, B, H) 1164a5200a17SSong Gao VSRARN(vsrarn_h_w, 32, uint16_t, H, W) 1165a5200a17SSong Gao VSRARN(vsrarn_w_d, 64, uint32_t, W, D) 1166a5200a17SSong Gao 1167a5200a17SSong Gao #define VSRLRNI(NAME, BIT, E1, E2) \ 1168329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1169a5200a17SSong Gao { \ 1170a5200a17SSong Gao int i, max; \ 1171a5200a17SSong Gao VReg temp; \ 1172329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1173329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1174a5200a17SSong Gao \ 1175a5200a17SSong Gao temp.D(0) = 0; \ 1176a5200a17SSong Gao temp.D(1) = 0; \ 1177a5200a17SSong Gao max = LSX_LEN/BIT; \ 1178a5200a17SSong Gao for (i = 0; i < max; i++) { \ 1179a5200a17SSong Gao temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \ 1180a5200a17SSong Gao temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \ 1181a5200a17SSong Gao } \ 1182a5200a17SSong Gao *Vd = temp; \ 1183a5200a17SSong Gao } 1184a5200a17SSong Gao 1185329517d5SSong Gao void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 1186a5200a17SSong Gao { 1187a5200a17SSong Gao VReg temp; 1188329517d5SSong Gao VReg *Vd = (VReg *)vd; 1189329517d5SSong Gao VReg *Vj = (VReg *)vj; 1190a5200a17SSong Gao Int128 r1, r2; 1191a5200a17SSong Gao 1192a5200a17SSong Gao if (imm == 0) { 1193a5200a17SSong Gao temp.D(0) = int128_getlo(Vj->Q(0)); 1194a5200a17SSong Gao temp.D(1) = int128_getlo(Vd->Q(0)); 1195a5200a17SSong Gao } else { 1196a5200a17SSong Gao r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); 1197a5200a17SSong Gao r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); 1198a5200a17SSong Gao 1199a5200a17SSong Gao temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1)); 1200a5200a17SSong Gao temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2)); 1201a5200a17SSong Gao } 1202a5200a17SSong Gao *Vd = temp; 1203a5200a17SSong Gao } 1204a5200a17SSong Gao 1205a5200a17SSong Gao VSRLRNI(vsrlrni_b_h, 16, B, H) 1206a5200a17SSong Gao VSRLRNI(vsrlrni_h_w, 32, H, W) 1207a5200a17SSong Gao VSRLRNI(vsrlrni_w_d, 64, W, D) 1208a5200a17SSong Gao 1209a5200a17SSong Gao #define VSRARNI(NAME, BIT, E1, E2) \ 1210329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1211a5200a17SSong Gao { \ 1212a5200a17SSong Gao int i, max; \ 1213a5200a17SSong Gao VReg temp; \ 1214329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1215329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1216a5200a17SSong Gao \ 1217a5200a17SSong Gao temp.D(0) = 0; \ 1218a5200a17SSong Gao temp.D(1) = 0; \ 1219a5200a17SSong Gao max = LSX_LEN/BIT; \ 1220a5200a17SSong Gao for (i = 0; i < max; i++) { \ 1221a5200a17SSong Gao temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \ 1222a5200a17SSong Gao temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \ 1223a5200a17SSong Gao } \ 1224a5200a17SSong Gao *Vd = temp; \ 1225a5200a17SSong Gao } 1226a5200a17SSong Gao 1227329517d5SSong Gao void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 1228a5200a17SSong Gao { 1229a5200a17SSong Gao VReg temp; 1230329517d5SSong Gao VReg *Vd = (VReg *)vd; 1231329517d5SSong Gao VReg *Vj = (VReg *)vj; 1232a5200a17SSong Gao Int128 r1, r2; 1233a5200a17SSong Gao 1234a5200a17SSong Gao if (imm == 0) { 1235a5200a17SSong Gao temp.D(0) = int128_getlo(Vj->Q(0)); 1236a5200a17SSong Gao temp.D(1) = int128_getlo(Vd->Q(0)); 1237a5200a17SSong Gao } else { 1238a5200a17SSong Gao r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); 1239a5200a17SSong Gao r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); 1240a5200a17SSong Gao 1241a5200a17SSong Gao temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1)); 1242a5200a17SSong Gao temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2)); 1243a5200a17SSong Gao } 1244a5200a17SSong Gao *Vd = temp; 1245a5200a17SSong Gao } 1246a5200a17SSong Gao 1247a5200a17SSong Gao VSRARNI(vsrarni_b_h, 16, B, H) 1248a5200a17SSong Gao VSRARNI(vsrarni_h_w, 32, H, W) 1249a5200a17SSong Gao VSRARNI(vsrarni_w_d, 64, W, D) 125083b3815dSSong Gao 125183b3815dSSong Gao #define SSRLNS(NAME, T1, T2, T3) \ 125283b3815dSSong Gao static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ 125383b3815dSSong Gao { \ 125483b3815dSSong Gao T1 shft_res; \ 125583b3815dSSong Gao if (sa == 0) { \ 125683b3815dSSong Gao shft_res = e2; \ 125783b3815dSSong Gao } else { \ 125883b3815dSSong Gao shft_res = (((T1)e2) >> sa); \ 125983b3815dSSong Gao } \ 126083b3815dSSong Gao T3 mask; \ 126183b3815dSSong Gao mask = (1ull << sh) -1; \ 126283b3815dSSong Gao if (shft_res > mask) { \ 126383b3815dSSong Gao return mask; \ 126483b3815dSSong Gao } else { \ 126583b3815dSSong Gao return shft_res; \ 126683b3815dSSong Gao } \ 126783b3815dSSong Gao } 126883b3815dSSong Gao 126983b3815dSSong Gao SSRLNS(B, uint16_t, int16_t, uint8_t) 127083b3815dSSong Gao SSRLNS(H, uint32_t, int32_t, uint16_t) 127183b3815dSSong Gao SSRLNS(W, uint64_t, int64_t, uint32_t) 127283b3815dSSong Gao 127383b3815dSSong Gao #define VSSRLN(NAME, BIT, T, E1, E2) \ 127404711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 127583b3815dSSong Gao { \ 127683b3815dSSong Gao int i; \ 127704711da1SSong Gao VReg *Vd = (VReg *)vd; \ 127804711da1SSong Gao VReg *Vj = (VReg *)vj; \ 127904711da1SSong Gao VReg *Vk = (VReg *)vk; \ 128083b3815dSSong Gao \ 128183b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 128283b3815dSSong Gao Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \ 128383b3815dSSong Gao } \ 128483b3815dSSong Gao Vd->D(1) = 0; \ 128583b3815dSSong Gao } 128683b3815dSSong Gao 128783b3815dSSong Gao VSSRLN(vssrln_b_h, 16, uint16_t, B, H) 128883b3815dSSong Gao VSSRLN(vssrln_h_w, 32, uint32_t, H, W) 128983b3815dSSong Gao VSSRLN(vssrln_w_d, 64, uint64_t, W, D) 129083b3815dSSong Gao 129183b3815dSSong Gao #define SSRANS(E, T1, T2) \ 129283b3815dSSong Gao static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ 129383b3815dSSong Gao { \ 129483b3815dSSong Gao T1 shft_res; \ 129583b3815dSSong Gao if (sa == 0) { \ 129683b3815dSSong Gao shft_res = e2; \ 129783b3815dSSong Gao } else { \ 129883b3815dSSong Gao shft_res = e2 >> sa; \ 129983b3815dSSong Gao } \ 130083b3815dSSong Gao T2 mask; \ 130183b3815dSSong Gao mask = (1ll << sh) -1; \ 130283b3815dSSong Gao if (shft_res > mask) { \ 130383b3815dSSong Gao return mask; \ 130483b3815dSSong Gao } else if (shft_res < -(mask +1)) { \ 130583b3815dSSong Gao return ~mask; \ 130683b3815dSSong Gao } else { \ 130783b3815dSSong Gao return shft_res; \ 130883b3815dSSong Gao } \ 130983b3815dSSong Gao } 131083b3815dSSong Gao 131183b3815dSSong Gao SSRANS(B, int16_t, int8_t) 131283b3815dSSong Gao SSRANS(H, int32_t, int16_t) 131383b3815dSSong Gao SSRANS(W, int64_t, int32_t) 131483b3815dSSong Gao 131583b3815dSSong Gao #define VSSRAN(NAME, BIT, T, E1, E2) \ 131604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 131783b3815dSSong Gao { \ 131883b3815dSSong Gao int i; \ 131904711da1SSong Gao VReg *Vd = (VReg *)vd; \ 132004711da1SSong Gao VReg *Vj = (VReg *)vj; \ 132104711da1SSong Gao VReg *Vk = (VReg *)vk; \ 132283b3815dSSong Gao \ 132383b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 132483b3815dSSong Gao Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ 132583b3815dSSong Gao } \ 132683b3815dSSong Gao Vd->D(1) = 0; \ 132783b3815dSSong Gao } 132883b3815dSSong Gao 132983b3815dSSong Gao VSSRAN(vssran_b_h, 16, uint16_t, B, H) 133083b3815dSSong Gao VSSRAN(vssran_h_w, 32, uint32_t, H, W) 133183b3815dSSong Gao VSSRAN(vssran_w_d, 64, uint64_t, W, D) 133283b3815dSSong Gao 133383b3815dSSong Gao #define SSRLNU(E, T1, T2, T3) \ 133483b3815dSSong Gao static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ 133583b3815dSSong Gao { \ 133683b3815dSSong Gao T1 shft_res; \ 133783b3815dSSong Gao if (sa == 0) { \ 133883b3815dSSong Gao shft_res = e2; \ 133983b3815dSSong Gao } else { \ 134083b3815dSSong Gao shft_res = (((T1)e2) >> sa); \ 134183b3815dSSong Gao } \ 134283b3815dSSong Gao T2 mask; \ 134383b3815dSSong Gao mask = (1ull << sh) -1; \ 134483b3815dSSong Gao if (shft_res > mask) { \ 134583b3815dSSong Gao return mask; \ 134683b3815dSSong Gao } else { \ 134783b3815dSSong Gao return shft_res; \ 134883b3815dSSong Gao } \ 134983b3815dSSong Gao } 135083b3815dSSong Gao 135183b3815dSSong Gao SSRLNU(B, uint16_t, uint8_t, int16_t) 135283b3815dSSong Gao SSRLNU(H, uint32_t, uint16_t, int32_t) 135383b3815dSSong Gao SSRLNU(W, uint64_t, uint32_t, int64_t) 135483b3815dSSong Gao 135583b3815dSSong Gao #define VSSRLNU(NAME, BIT, T, E1, E2) \ 135604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 135783b3815dSSong Gao { \ 135883b3815dSSong Gao int i; \ 135904711da1SSong Gao VReg *Vd = (VReg *)vd; \ 136004711da1SSong Gao VReg *Vj = (VReg *)vj; \ 136104711da1SSong Gao VReg *Vk = (VReg *)vk; \ 136283b3815dSSong Gao \ 136383b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 136483b3815dSSong Gao Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 136583b3815dSSong Gao } \ 136683b3815dSSong Gao Vd->D(1) = 0; \ 136783b3815dSSong Gao } 136883b3815dSSong Gao 136983b3815dSSong Gao VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H) 137083b3815dSSong Gao VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W) 137183b3815dSSong Gao VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D) 137283b3815dSSong Gao 137383b3815dSSong Gao #define SSRANU(E, T1, T2, T3) \ 137483b3815dSSong Gao static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ 137583b3815dSSong Gao { \ 137683b3815dSSong Gao T1 shft_res; \ 137783b3815dSSong Gao if (sa == 0) { \ 137883b3815dSSong Gao shft_res = e2; \ 137983b3815dSSong Gao } else { \ 138083b3815dSSong Gao shft_res = e2 >> sa; \ 138183b3815dSSong Gao } \ 138283b3815dSSong Gao if (e2 < 0) { \ 138383b3815dSSong Gao shft_res = 0; \ 138483b3815dSSong Gao } \ 138583b3815dSSong Gao T2 mask; \ 138683b3815dSSong Gao mask = (1ull << sh) -1; \ 138783b3815dSSong Gao if (shft_res > mask) { \ 138883b3815dSSong Gao return mask; \ 138983b3815dSSong Gao } else { \ 139083b3815dSSong Gao return shft_res; \ 139183b3815dSSong Gao } \ 139283b3815dSSong Gao } 139383b3815dSSong Gao 139483b3815dSSong Gao SSRANU(B, uint16_t, uint8_t, int16_t) 139583b3815dSSong Gao SSRANU(H, uint32_t, uint16_t, int32_t) 139683b3815dSSong Gao SSRANU(W, uint64_t, uint32_t, int64_t) 139783b3815dSSong Gao 139883b3815dSSong Gao #define VSSRANU(NAME, BIT, T, E1, E2) \ 139904711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 140083b3815dSSong Gao { \ 140183b3815dSSong Gao int i; \ 140204711da1SSong Gao VReg *Vd = (VReg *)vd; \ 140304711da1SSong Gao VReg *Vj = (VReg *)vj; \ 140404711da1SSong Gao VReg *Vk = (VReg *)vk; \ 140583b3815dSSong Gao \ 140683b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 140783b3815dSSong Gao Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 140883b3815dSSong Gao } \ 140983b3815dSSong Gao Vd->D(1) = 0; \ 141083b3815dSSong Gao } 141183b3815dSSong Gao 141283b3815dSSong Gao VSSRANU(vssran_bu_h, 16, uint16_t, B, H) 141383b3815dSSong Gao VSSRANU(vssran_hu_w, 32, uint32_t, H, W) 141483b3815dSSong Gao VSSRANU(vssran_wu_d, 64, uint64_t, W, D) 141583b3815dSSong Gao 141683b3815dSSong Gao #define VSSRLNI(NAME, BIT, E1, E2) \ 1417329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 141883b3815dSSong Gao { \ 141983b3815dSSong Gao int i; \ 142083b3815dSSong Gao VReg temp; \ 1421329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1422329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 142383b3815dSSong Gao \ 142483b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 142583b3815dSSong Gao temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 142683b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ 142783b3815dSSong Gao } \ 142883b3815dSSong Gao *Vd = temp; \ 142983b3815dSSong Gao } 143083b3815dSSong Gao 1431329517d5SSong Gao void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 143283b3815dSSong Gao { 143383b3815dSSong Gao Int128 shft_res1, shft_res2, mask; 1434329517d5SSong Gao VReg *Vd = (VReg *)vd; 1435329517d5SSong Gao VReg *Vj = (VReg *)vj; 143683b3815dSSong Gao 143783b3815dSSong Gao if (imm == 0) { 143883b3815dSSong Gao shft_res1 = Vj->Q(0); 143983b3815dSSong Gao shft_res2 = Vd->Q(0); 144083b3815dSSong Gao } else { 144183b3815dSSong Gao shft_res1 = int128_urshift(Vj->Q(0), imm); 144283b3815dSSong Gao shft_res2 = int128_urshift(Vd->Q(0), imm); 144383b3815dSSong Gao } 144483b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); 144583b3815dSSong Gao 144683b3815dSSong Gao if (int128_ult(mask, shft_res1)) { 144783b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 144883b3815dSSong Gao }else { 144983b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 145083b3815dSSong Gao } 145183b3815dSSong Gao 145283b3815dSSong Gao if (int128_ult(mask, shft_res2)) { 145383b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 145483b3815dSSong Gao }else { 145583b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 145683b3815dSSong Gao } 145783b3815dSSong Gao } 145883b3815dSSong Gao 145983b3815dSSong Gao VSSRLNI(vssrlni_b_h, 16, B, H) 146083b3815dSSong Gao VSSRLNI(vssrlni_h_w, 32, H, W) 146183b3815dSSong Gao VSSRLNI(vssrlni_w_d, 64, W, D) 146283b3815dSSong Gao 146383b3815dSSong Gao #define VSSRANI(NAME, BIT, E1, E2) \ 1464329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 146583b3815dSSong Gao { \ 146683b3815dSSong Gao int i; \ 146783b3815dSSong Gao VReg temp; \ 1468329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1469329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 147083b3815dSSong Gao \ 147183b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 147283b3815dSSong Gao temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 147383b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ 147483b3815dSSong Gao } \ 147583b3815dSSong Gao *Vd = temp; \ 147683b3815dSSong Gao } 147783b3815dSSong Gao 1478329517d5SSong Gao void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 147983b3815dSSong Gao { 148083b3815dSSong Gao Int128 shft_res1, shft_res2, mask, min; 1481329517d5SSong Gao VReg *Vd = (VReg *)vd; 1482329517d5SSong Gao VReg *Vj = (VReg *)vj; 148383b3815dSSong Gao 148483b3815dSSong Gao if (imm == 0) { 148583b3815dSSong Gao shft_res1 = Vj->Q(0); 148683b3815dSSong Gao shft_res2 = Vd->Q(0); 148783b3815dSSong Gao } else { 148883b3815dSSong Gao shft_res1 = int128_rshift(Vj->Q(0), imm); 148983b3815dSSong Gao shft_res2 = int128_rshift(Vd->Q(0), imm); 149083b3815dSSong Gao } 149183b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); 149283b3815dSSong Gao min = int128_lshift(int128_one(), 63); 149383b3815dSSong Gao 149483b3815dSSong Gao if (int128_gt(shft_res1, mask)) { 149583b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 149683b3815dSSong Gao } else if (int128_lt(shft_res1, int128_neg(min))) { 149783b3815dSSong Gao Vd->D(0) = int128_getlo(min); 149883b3815dSSong Gao } else { 149983b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 150083b3815dSSong Gao } 150183b3815dSSong Gao 150283b3815dSSong Gao if (int128_gt(shft_res2, mask)) { 150383b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 150483b3815dSSong Gao } else if (int128_lt(shft_res2, int128_neg(min))) { 150583b3815dSSong Gao Vd->D(1) = int128_getlo(min); 150683b3815dSSong Gao } else { 150783b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 150883b3815dSSong Gao } 150983b3815dSSong Gao } 151083b3815dSSong Gao 151183b3815dSSong Gao VSSRANI(vssrani_b_h, 16, B, H) 151283b3815dSSong Gao VSSRANI(vssrani_h_w, 32, H, W) 151383b3815dSSong Gao VSSRANI(vssrani_w_d, 64, W, D) 151483b3815dSSong Gao 151583b3815dSSong Gao #define VSSRLNUI(NAME, BIT, E1, E2) \ 1516329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 151783b3815dSSong Gao { \ 151883b3815dSSong Gao int i; \ 151983b3815dSSong Gao VReg temp; \ 1520329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1521329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 152283b3815dSSong Gao \ 152383b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 152483b3815dSSong Gao temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \ 152583b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \ 152683b3815dSSong Gao } \ 152783b3815dSSong Gao *Vd = temp; \ 152883b3815dSSong Gao } 152983b3815dSSong Gao 1530329517d5SSong Gao void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 153183b3815dSSong Gao { 153283b3815dSSong Gao Int128 shft_res1, shft_res2, mask; 1533329517d5SSong Gao VReg *Vd = (VReg *)vd; 1534329517d5SSong Gao VReg *Vj = (VReg *)vj; 153583b3815dSSong Gao 153683b3815dSSong Gao if (imm == 0) { 153783b3815dSSong Gao shft_res1 = Vj->Q(0); 153883b3815dSSong Gao shft_res2 = Vd->Q(0); 153983b3815dSSong Gao } else { 154083b3815dSSong Gao shft_res1 = int128_urshift(Vj->Q(0), imm); 154183b3815dSSong Gao shft_res2 = int128_urshift(Vd->Q(0), imm); 154283b3815dSSong Gao } 154383b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); 154483b3815dSSong Gao 154583b3815dSSong Gao if (int128_ult(mask, shft_res1)) { 154683b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 154783b3815dSSong Gao }else { 154883b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 154983b3815dSSong Gao } 155083b3815dSSong Gao 155183b3815dSSong Gao if (int128_ult(mask, shft_res2)) { 155283b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 155383b3815dSSong Gao }else { 155483b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 155583b3815dSSong Gao } 155683b3815dSSong Gao } 155783b3815dSSong Gao 155883b3815dSSong Gao VSSRLNUI(vssrlni_bu_h, 16, B, H) 155983b3815dSSong Gao VSSRLNUI(vssrlni_hu_w, 32, H, W) 156083b3815dSSong Gao VSSRLNUI(vssrlni_wu_d, 64, W, D) 156183b3815dSSong Gao 156283b3815dSSong Gao #define VSSRANUI(NAME, BIT, E1, E2) \ 1563329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 156483b3815dSSong Gao { \ 156583b3815dSSong Gao int i; \ 156683b3815dSSong Gao VReg temp; \ 1567329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1568329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 156983b3815dSSong Gao \ 157083b3815dSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 157183b3815dSSong Gao temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \ 157283b3815dSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \ 157383b3815dSSong Gao } \ 157483b3815dSSong Gao *Vd = temp; \ 157583b3815dSSong Gao } 157683b3815dSSong Gao 1577329517d5SSong Gao void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 157883b3815dSSong Gao { 157983b3815dSSong Gao Int128 shft_res1, shft_res2, mask; 1580329517d5SSong Gao VReg *Vd = (VReg *)vd; 1581329517d5SSong Gao VReg *Vj = (VReg *)vj; 158283b3815dSSong Gao 158383b3815dSSong Gao if (imm == 0) { 158483b3815dSSong Gao shft_res1 = Vj->Q(0); 158583b3815dSSong Gao shft_res2 = Vd->Q(0); 158683b3815dSSong Gao } else { 158783b3815dSSong Gao shft_res1 = int128_rshift(Vj->Q(0), imm); 158883b3815dSSong Gao shft_res2 = int128_rshift(Vd->Q(0), imm); 158983b3815dSSong Gao } 159083b3815dSSong Gao 159183b3815dSSong Gao if (int128_lt(Vj->Q(0), int128_zero())) { 159283b3815dSSong Gao shft_res1 = int128_zero(); 159383b3815dSSong Gao } 159483b3815dSSong Gao 159583b3815dSSong Gao if (int128_lt(Vd->Q(0), int128_zero())) { 159683b3815dSSong Gao shft_res2 = int128_zero(); 159783b3815dSSong Gao } 159883b3815dSSong Gao 159983b3815dSSong Gao mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); 160083b3815dSSong Gao 160183b3815dSSong Gao if (int128_ult(mask, shft_res1)) { 160283b3815dSSong Gao Vd->D(0) = int128_getlo(mask); 160383b3815dSSong Gao }else { 160483b3815dSSong Gao Vd->D(0) = int128_getlo(shft_res1); 160583b3815dSSong Gao } 160683b3815dSSong Gao 160783b3815dSSong Gao if (int128_ult(mask, shft_res2)) { 160883b3815dSSong Gao Vd->D(1) = int128_getlo(mask); 160983b3815dSSong Gao }else { 161083b3815dSSong Gao Vd->D(1) = int128_getlo(shft_res2); 161183b3815dSSong Gao } 161283b3815dSSong Gao } 161383b3815dSSong Gao 161483b3815dSSong Gao VSSRANUI(vssrani_bu_h, 16, B, H) 161583b3815dSSong Gao VSSRANUI(vssrani_hu_w, 32, H, W) 161683b3815dSSong Gao VSSRANUI(vssrani_wu_d, 64, W, D) 1617162cd32cSSong Gao 1618162cd32cSSong Gao #define SSRLRNS(E1, E2, T1, T2, T3) \ 1619162cd32cSSong Gao static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ 1620162cd32cSSong Gao { \ 1621162cd32cSSong Gao T1 shft_res; \ 1622162cd32cSSong Gao \ 1623162cd32cSSong Gao shft_res = do_vsrlr_ ## E2(e2, sa); \ 1624162cd32cSSong Gao T1 mask; \ 1625162cd32cSSong Gao mask = (1ull << sh) -1; \ 1626162cd32cSSong Gao if (shft_res > mask) { \ 1627162cd32cSSong Gao return mask; \ 1628162cd32cSSong Gao } else { \ 1629162cd32cSSong Gao return shft_res; \ 1630162cd32cSSong Gao } \ 1631162cd32cSSong Gao } 1632162cd32cSSong Gao 1633162cd32cSSong Gao SSRLRNS(B, H, uint16_t, int16_t, uint8_t) 1634162cd32cSSong Gao SSRLRNS(H, W, uint32_t, int32_t, uint16_t) 1635162cd32cSSong Gao SSRLRNS(W, D, uint64_t, int64_t, uint32_t) 1636162cd32cSSong Gao 1637162cd32cSSong Gao #define VSSRLRN(NAME, BIT, T, E1, E2) \ 163804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1639162cd32cSSong Gao { \ 1640162cd32cSSong Gao int i; \ 164104711da1SSong Gao VReg *Vd = (VReg *)vd; \ 164204711da1SSong Gao VReg *Vj = (VReg *)vj; \ 164304711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1644162cd32cSSong Gao \ 1645162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1646162cd32cSSong Gao Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ 1647162cd32cSSong Gao } \ 1648162cd32cSSong Gao Vd->D(1) = 0; \ 1649162cd32cSSong Gao } 1650162cd32cSSong Gao 1651162cd32cSSong Gao VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H) 1652162cd32cSSong Gao VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W) 1653162cd32cSSong Gao VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D) 1654162cd32cSSong Gao 1655162cd32cSSong Gao #define SSRARNS(E1, E2, T1, T2) \ 1656162cd32cSSong Gao static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ 1657162cd32cSSong Gao { \ 1658162cd32cSSong Gao T1 shft_res; \ 1659162cd32cSSong Gao \ 1660162cd32cSSong Gao shft_res = do_vsrar_ ## E2(e2, sa); \ 1661162cd32cSSong Gao T2 mask; \ 1662162cd32cSSong Gao mask = (1ll << sh) -1; \ 1663162cd32cSSong Gao if (shft_res > mask) { \ 1664162cd32cSSong Gao return mask; \ 1665162cd32cSSong Gao } else if (shft_res < -(mask +1)) { \ 1666162cd32cSSong Gao return ~mask; \ 1667162cd32cSSong Gao } else { \ 1668162cd32cSSong Gao return shft_res; \ 1669162cd32cSSong Gao } \ 1670162cd32cSSong Gao } 1671162cd32cSSong Gao 1672162cd32cSSong Gao SSRARNS(B, H, int16_t, int8_t) 1673162cd32cSSong Gao SSRARNS(H, W, int32_t, int16_t) 1674162cd32cSSong Gao SSRARNS(W, D, int64_t, int32_t) 1675162cd32cSSong Gao 1676162cd32cSSong Gao #define VSSRARN(NAME, BIT, T, E1, E2) \ 167704711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1678162cd32cSSong Gao { \ 1679162cd32cSSong Gao int i; \ 168004711da1SSong Gao VReg *Vd = (VReg *)vd; \ 168104711da1SSong Gao VReg *Vj = (VReg *)vj; \ 168204711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1683162cd32cSSong Gao \ 1684162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1685162cd32cSSong Gao Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \ 1686162cd32cSSong Gao } \ 1687162cd32cSSong Gao Vd->D(1) = 0; \ 1688162cd32cSSong Gao } 1689162cd32cSSong Gao 1690162cd32cSSong Gao VSSRARN(vssrarn_b_h, 16, uint16_t, B, H) 1691162cd32cSSong Gao VSSRARN(vssrarn_h_w, 32, uint32_t, H, W) 1692162cd32cSSong Gao VSSRARN(vssrarn_w_d, 64, uint64_t, W, D) 1693162cd32cSSong Gao 1694162cd32cSSong Gao #define SSRLRNU(E1, E2, T1, T2, T3) \ 1695162cd32cSSong Gao static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ 1696162cd32cSSong Gao { \ 1697162cd32cSSong Gao T1 shft_res; \ 1698162cd32cSSong Gao \ 1699162cd32cSSong Gao shft_res = do_vsrlr_ ## E2(e2, sa); \ 1700162cd32cSSong Gao \ 1701162cd32cSSong Gao T2 mask; \ 1702162cd32cSSong Gao mask = (1ull << sh) -1; \ 1703162cd32cSSong Gao if (shft_res > mask) { \ 1704162cd32cSSong Gao return mask; \ 1705162cd32cSSong Gao } else { \ 1706162cd32cSSong Gao return shft_res; \ 1707162cd32cSSong Gao } \ 1708162cd32cSSong Gao } 1709162cd32cSSong Gao 1710162cd32cSSong Gao SSRLRNU(B, H, uint16_t, uint8_t, int16_t) 1711162cd32cSSong Gao SSRLRNU(H, W, uint32_t, uint16_t, int32_t) 1712162cd32cSSong Gao SSRLRNU(W, D, uint64_t, uint32_t, int64_t) 1713162cd32cSSong Gao 1714162cd32cSSong Gao #define VSSRLRNU(NAME, BIT, T, E1, E2) \ 171504711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1716162cd32cSSong Gao { \ 1717162cd32cSSong Gao int i; \ 171804711da1SSong Gao VReg *Vd = (VReg *)vd; \ 171904711da1SSong Gao VReg *Vj = (VReg *)vj; \ 172004711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1721162cd32cSSong Gao \ 1722162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1723162cd32cSSong Gao Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 1724162cd32cSSong Gao } \ 1725162cd32cSSong Gao Vd->D(1) = 0; \ 1726162cd32cSSong Gao } 1727162cd32cSSong Gao 1728162cd32cSSong Gao VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H) 1729162cd32cSSong Gao VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W) 1730162cd32cSSong Gao VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D) 1731162cd32cSSong Gao 1732162cd32cSSong Gao #define SSRARNU(E1, E2, T1, T2, T3) \ 1733162cd32cSSong Gao static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ 1734162cd32cSSong Gao { \ 1735162cd32cSSong Gao T1 shft_res; \ 1736162cd32cSSong Gao \ 1737162cd32cSSong Gao if (e2 < 0) { \ 1738162cd32cSSong Gao shft_res = 0; \ 1739162cd32cSSong Gao } else { \ 1740162cd32cSSong Gao shft_res = do_vsrar_ ## E2(e2, sa); \ 1741162cd32cSSong Gao } \ 1742162cd32cSSong Gao T2 mask; \ 1743162cd32cSSong Gao mask = (1ull << sh) -1; \ 1744162cd32cSSong Gao if (shft_res > mask) { \ 1745162cd32cSSong Gao return mask; \ 1746162cd32cSSong Gao } else { \ 1747162cd32cSSong Gao return shft_res; \ 1748162cd32cSSong Gao } \ 1749162cd32cSSong Gao } 1750162cd32cSSong Gao 1751162cd32cSSong Gao SSRARNU(B, H, uint16_t, uint8_t, int16_t) 1752162cd32cSSong Gao SSRARNU(H, W, uint32_t, uint16_t, int32_t) 1753162cd32cSSong Gao SSRARNU(W, D, uint64_t, uint32_t, int64_t) 1754162cd32cSSong Gao 1755162cd32cSSong Gao #define VSSRARNU(NAME, BIT, T, E1, E2) \ 175604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 1757162cd32cSSong Gao { \ 1758162cd32cSSong Gao int i; \ 175904711da1SSong Gao VReg *Vd = (VReg *)vd; \ 176004711da1SSong Gao VReg *Vj = (VReg *)vj; \ 176104711da1SSong Gao VReg *Vk = (VReg *)vk; \ 1762162cd32cSSong Gao \ 1763162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1764162cd32cSSong Gao Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \ 1765162cd32cSSong Gao } \ 1766162cd32cSSong Gao Vd->D(1) = 0; \ 1767162cd32cSSong Gao } 1768162cd32cSSong Gao 1769162cd32cSSong Gao VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H) 1770162cd32cSSong Gao VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W) 1771162cd32cSSong Gao VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D) 1772162cd32cSSong Gao 1773162cd32cSSong Gao #define VSSRLRNI(NAME, BIT, E1, E2) \ 1774329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1775162cd32cSSong Gao { \ 1776162cd32cSSong Gao int i; \ 1777162cd32cSSong Gao VReg temp; \ 1778329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1779329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1780162cd32cSSong Gao \ 1781162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1782162cd32cSSong Gao temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 1783162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\ 1784162cd32cSSong Gao } \ 1785162cd32cSSong Gao *Vd = temp; \ 1786162cd32cSSong Gao } 1787162cd32cSSong Gao 1788162cd32cSSong Gao #define VSSRLRNI_Q(NAME, sh) \ 1789329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1790162cd32cSSong Gao { \ 1791162cd32cSSong Gao Int128 shft_res1, shft_res2, mask, r1, r2; \ 1792329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1793329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1794162cd32cSSong Gao \ 1795162cd32cSSong Gao if (imm == 0) { \ 1796162cd32cSSong Gao shft_res1 = Vj->Q(0); \ 1797162cd32cSSong Gao shft_res2 = Vd->Q(0); \ 1798162cd32cSSong Gao } else { \ 1799162cd32cSSong Gao r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \ 1800162cd32cSSong Gao r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \ 1801162cd32cSSong Gao \ 1802162cd32cSSong Gao shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \ 1803162cd32cSSong Gao shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \ 1804162cd32cSSong Gao } \ 1805162cd32cSSong Gao \ 1806162cd32cSSong Gao mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \ 1807162cd32cSSong Gao \ 1808162cd32cSSong Gao if (int128_ult(mask, shft_res1)) { \ 1809162cd32cSSong Gao Vd->D(0) = int128_getlo(mask); \ 1810162cd32cSSong Gao }else { \ 1811162cd32cSSong Gao Vd->D(0) = int128_getlo(shft_res1); \ 1812162cd32cSSong Gao } \ 1813162cd32cSSong Gao \ 1814162cd32cSSong Gao if (int128_ult(mask, shft_res2)) { \ 1815162cd32cSSong Gao Vd->D(1) = int128_getlo(mask); \ 1816162cd32cSSong Gao }else { \ 1817162cd32cSSong Gao Vd->D(1) = int128_getlo(shft_res2); \ 1818162cd32cSSong Gao } \ 1819162cd32cSSong Gao } 1820162cd32cSSong Gao 1821162cd32cSSong Gao VSSRLRNI(vssrlrni_b_h, 16, B, H) 1822162cd32cSSong Gao VSSRLRNI(vssrlrni_h_w, 32, H, W) 1823162cd32cSSong Gao VSSRLRNI(vssrlrni_w_d, 64, W, D) 1824162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_d_q, 63) 1825162cd32cSSong Gao 1826162cd32cSSong Gao #define VSSRARNI(NAME, BIT, E1, E2) \ 1827329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1828162cd32cSSong Gao { \ 1829162cd32cSSong Gao int i; \ 1830162cd32cSSong Gao VReg temp; \ 1831329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1832329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1833162cd32cSSong Gao \ 1834162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1835162cd32cSSong Gao temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \ 1836162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \ 1837162cd32cSSong Gao } \ 1838162cd32cSSong Gao *Vd = temp; \ 1839162cd32cSSong Gao } 1840162cd32cSSong Gao 1841329517d5SSong Gao void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 1842162cd32cSSong Gao { 1843162cd32cSSong Gao Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; 1844329517d5SSong Gao VReg *Vd = (VReg *)vd; 1845329517d5SSong Gao VReg *Vj = (VReg *)vj; 1846162cd32cSSong Gao 1847162cd32cSSong Gao if (imm == 0) { 1848162cd32cSSong Gao shft_res1 = Vj->Q(0); 1849162cd32cSSong Gao shft_res2 = Vd->Q(0); 1850162cd32cSSong Gao } else { 1851162cd32cSSong Gao r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); 1852162cd32cSSong Gao r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); 1853162cd32cSSong Gao 1854162cd32cSSong Gao shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); 1855162cd32cSSong Gao shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); 1856162cd32cSSong Gao } 1857162cd32cSSong Gao 1858162cd32cSSong Gao mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); 1859162cd32cSSong Gao mask2 = int128_lshift(int128_one(), 63); 1860162cd32cSSong Gao 1861162cd32cSSong Gao if (int128_gt(shft_res1, mask1)) { 1862162cd32cSSong Gao Vd->D(0) = int128_getlo(mask1); 1863162cd32cSSong Gao } else if (int128_lt(shft_res1, int128_neg(mask2))) { 1864162cd32cSSong Gao Vd->D(0) = int128_getlo(mask2); 1865162cd32cSSong Gao } else { 1866162cd32cSSong Gao Vd->D(0) = int128_getlo(shft_res1); 1867162cd32cSSong Gao } 1868162cd32cSSong Gao 1869162cd32cSSong Gao if (int128_gt(shft_res2, mask1)) { 1870162cd32cSSong Gao Vd->D(1) = int128_getlo(mask1); 1871162cd32cSSong Gao } else if (int128_lt(shft_res2, int128_neg(mask2))) { 1872162cd32cSSong Gao Vd->D(1) = int128_getlo(mask2); 1873162cd32cSSong Gao } else { 1874162cd32cSSong Gao Vd->D(1) = int128_getlo(shft_res2); 1875162cd32cSSong Gao } 1876162cd32cSSong Gao } 1877162cd32cSSong Gao 1878162cd32cSSong Gao VSSRARNI(vssrarni_b_h, 16, B, H) 1879162cd32cSSong Gao VSSRARNI(vssrarni_h_w, 32, H, W) 1880162cd32cSSong Gao VSSRARNI(vssrarni_w_d, 64, W, D) 1881162cd32cSSong Gao 1882162cd32cSSong Gao #define VSSRLRNUI(NAME, BIT, E1, E2) \ 1883329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1884162cd32cSSong Gao { \ 1885162cd32cSSong Gao int i; \ 1886162cd32cSSong Gao VReg temp; \ 1887329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1888329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1889162cd32cSSong Gao \ 1890162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1891162cd32cSSong Gao temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \ 1892162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \ 1893162cd32cSSong Gao } \ 1894162cd32cSSong Gao *Vd = temp; \ 1895162cd32cSSong Gao } 1896162cd32cSSong Gao 1897162cd32cSSong Gao VSSRLRNUI(vssrlrni_bu_h, 16, B, H) 1898162cd32cSSong Gao VSSRLRNUI(vssrlrni_hu_w, 32, H, W) 1899162cd32cSSong Gao VSSRLRNUI(vssrlrni_wu_d, 64, W, D) 1900162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_du_q, 64) 1901162cd32cSSong Gao 1902162cd32cSSong Gao #define VSSRARNUI(NAME, BIT, E1, E2) \ 1903329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 1904162cd32cSSong Gao { \ 1905162cd32cSSong Gao int i; \ 1906162cd32cSSong Gao VReg temp; \ 1907329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 1908329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 1909162cd32cSSong Gao \ 1910162cd32cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 1911162cd32cSSong Gao temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \ 1912162cd32cSSong Gao temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \ 1913162cd32cSSong Gao } \ 1914162cd32cSSong Gao *Vd = temp; \ 1915162cd32cSSong Gao } 1916162cd32cSSong Gao 1917329517d5SSong Gao void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) 1918162cd32cSSong Gao { 1919162cd32cSSong Gao Int128 shft_res1, shft_res2, mask1, mask2, r1, r2; 1920329517d5SSong Gao VReg *Vd = (VReg *)vd; 1921329517d5SSong Gao VReg *Vj = (VReg *)vj; 1922162cd32cSSong Gao 1923162cd32cSSong Gao if (imm == 0) { 1924162cd32cSSong Gao shft_res1 = Vj->Q(0); 1925162cd32cSSong Gao shft_res2 = Vd->Q(0); 1926162cd32cSSong Gao } else { 1927162cd32cSSong Gao r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one()); 1928162cd32cSSong Gao r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one()); 1929162cd32cSSong Gao 1930162cd32cSSong Gao shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1); 1931162cd32cSSong Gao shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2); 1932162cd32cSSong Gao } 1933162cd32cSSong Gao 1934162cd32cSSong Gao if (int128_lt(Vj->Q(0), int128_zero())) { 1935162cd32cSSong Gao shft_res1 = int128_zero(); 1936162cd32cSSong Gao } 1937162cd32cSSong Gao if (int128_lt(Vd->Q(0), int128_zero())) { 1938162cd32cSSong Gao shft_res2 = int128_zero(); 1939162cd32cSSong Gao } 1940162cd32cSSong Gao 1941162cd32cSSong Gao mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); 1942162cd32cSSong Gao mask2 = int128_lshift(int128_one(), 64); 1943162cd32cSSong Gao 1944162cd32cSSong Gao if (int128_gt(shft_res1, mask1)) { 1945162cd32cSSong Gao Vd->D(0) = int128_getlo(mask1); 1946162cd32cSSong Gao } else if (int128_lt(shft_res1, int128_neg(mask2))) { 1947162cd32cSSong Gao Vd->D(0) = int128_getlo(mask2); 1948162cd32cSSong Gao } else { 1949162cd32cSSong Gao Vd->D(0) = int128_getlo(shft_res1); 1950162cd32cSSong Gao } 1951162cd32cSSong Gao 1952162cd32cSSong Gao if (int128_gt(shft_res2, mask1)) { 1953162cd32cSSong Gao Vd->D(1) = int128_getlo(mask1); 1954162cd32cSSong Gao } else if (int128_lt(shft_res2, int128_neg(mask2))) { 1955162cd32cSSong Gao Vd->D(1) = int128_getlo(mask2); 1956162cd32cSSong Gao } else { 1957162cd32cSSong Gao Vd->D(1) = int128_getlo(shft_res2); 1958162cd32cSSong Gao } 1959162cd32cSSong Gao } 1960162cd32cSSong Gao 1961162cd32cSSong Gao VSSRARNUI(vssrarni_bu_h, 16, B, H) 1962162cd32cSSong Gao VSSRARNUI(vssrarni_hu_w, 32, H, W) 1963162cd32cSSong Gao VSSRARNUI(vssrarni_wu_d, 64, W, D) 19642e105e12SSong Gao 19652e105e12SSong Gao #define DO_2OP(NAME, BIT, E, DO_OP) \ 1966ff27e335SSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ 19672e105e12SSong Gao { \ 19682e105e12SSong Gao int i; \ 1969ff27e335SSong Gao VReg *Vd = (VReg *)vd; \ 1970ff27e335SSong Gao VReg *Vj = (VReg *)vj; \ 19712e105e12SSong Gao \ 19722e105e12SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) \ 19732e105e12SSong Gao { \ 19742e105e12SSong Gao Vd->E(i) = DO_OP(Vj->E(i)); \ 19752e105e12SSong Gao } \ 19762e105e12SSong Gao } 19772e105e12SSong Gao 19782e105e12SSong Gao #define DO_CLO_B(N) (clz32(~N & 0xff) - 24) 19792e105e12SSong Gao #define DO_CLO_H(N) (clz32(~N & 0xffff) - 16) 19802e105e12SSong Gao #define DO_CLO_W(N) (clz32(~N)) 19812e105e12SSong Gao #define DO_CLO_D(N) (clz64(~N)) 19822e105e12SSong Gao #define DO_CLZ_B(N) (clz32(N) - 24) 19832e105e12SSong Gao #define DO_CLZ_H(N) (clz32(N) - 16) 19842e105e12SSong Gao #define DO_CLZ_W(N) (clz32(N)) 19852e105e12SSong Gao #define DO_CLZ_D(N) (clz64(N)) 19862e105e12SSong Gao 19872e105e12SSong Gao DO_2OP(vclo_b, 8, UB, DO_CLO_B) 19882e105e12SSong Gao DO_2OP(vclo_h, 16, UH, DO_CLO_H) 19892e105e12SSong Gao DO_2OP(vclo_w, 32, UW, DO_CLO_W) 19902e105e12SSong Gao DO_2OP(vclo_d, 64, UD, DO_CLO_D) 19912e105e12SSong Gao DO_2OP(vclz_b, 8, UB, DO_CLZ_B) 19922e105e12SSong Gao DO_2OP(vclz_h, 16, UH, DO_CLZ_H) 19932e105e12SSong Gao DO_2OP(vclz_w, 32, UW, DO_CLZ_W) 19942e105e12SSong Gao DO_2OP(vclz_d, 64, UD, DO_CLZ_D) 1995bb22ee57SSong Gao 1996bb22ee57SSong Gao #define VPCNT(NAME, BIT, E, FN) \ 1997ff27e335SSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ 1998bb22ee57SSong Gao { \ 1999bb22ee57SSong Gao int i; \ 2000ff27e335SSong Gao VReg *Vd = (VReg *)vd; \ 2001ff27e335SSong Gao VReg *Vj = (VReg *)vj; \ 2002bb22ee57SSong Gao \ 2003bb22ee57SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) \ 2004bb22ee57SSong Gao { \ 2005bb22ee57SSong Gao Vd->E(i) = FN(Vj->E(i)); \ 2006bb22ee57SSong Gao } \ 2007bb22ee57SSong Gao } 2008bb22ee57SSong Gao 2009bb22ee57SSong Gao VPCNT(vpcnt_b, 8, UB, ctpop8) 2010bb22ee57SSong Gao VPCNT(vpcnt_h, 16, UH, ctpop16) 2011bb22ee57SSong Gao VPCNT(vpcnt_w, 32, UW, ctpop32) 2012bb22ee57SSong Gao VPCNT(vpcnt_d, 64, UD, ctpop64) 20130b1e6705SSong Gao 20140b1e6705SSong Gao #define DO_BITCLR(a, bit) (a & ~(1ull << bit)) 20150b1e6705SSong Gao #define DO_BITSET(a, bit) (a | 1ull << bit) 20160b1e6705SSong Gao #define DO_BITREV(a, bit) (a ^ (1ull << bit)) 20170b1e6705SSong Gao 20180b1e6705SSong Gao #define DO_BIT(NAME, BIT, E, DO_OP) \ 20190b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \ 20200b1e6705SSong Gao { \ 20210b1e6705SSong Gao int i; \ 20220b1e6705SSong Gao VReg *Vd = (VReg *)vd; \ 20230b1e6705SSong Gao VReg *Vj = (VReg *)vj; \ 20240b1e6705SSong Gao VReg *Vk = (VReg *)vk; \ 20250b1e6705SSong Gao \ 20260b1e6705SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 20270b1e6705SSong Gao Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ 20280b1e6705SSong Gao } \ 20290b1e6705SSong Gao } 20300b1e6705SSong Gao 20310b1e6705SSong Gao DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) 20320b1e6705SSong Gao DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) 20330b1e6705SSong Gao DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) 20340b1e6705SSong Gao DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) 20350b1e6705SSong Gao DO_BIT(vbitset_b, 8, UB, DO_BITSET) 20360b1e6705SSong Gao DO_BIT(vbitset_h, 16, UH, DO_BITSET) 20370b1e6705SSong Gao DO_BIT(vbitset_w, 32, UW, DO_BITSET) 20380b1e6705SSong Gao DO_BIT(vbitset_d, 64, UD, DO_BITSET) 20390b1e6705SSong Gao DO_BIT(vbitrev_b, 8, UB, DO_BITREV) 20400b1e6705SSong Gao DO_BIT(vbitrev_h, 16, UH, DO_BITREV) 20410b1e6705SSong Gao DO_BIT(vbitrev_w, 32, UW, DO_BITREV) 20420b1e6705SSong Gao DO_BIT(vbitrev_d, 64, UD, DO_BITREV) 20430b1e6705SSong Gao 20440b1e6705SSong Gao #define DO_BITI(NAME, BIT, E, DO_OP) \ 20450b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ 20460b1e6705SSong Gao { \ 20470b1e6705SSong Gao int i; \ 20480b1e6705SSong Gao VReg *Vd = (VReg *)vd; \ 20490b1e6705SSong Gao VReg *Vj = (VReg *)vj; \ 20500b1e6705SSong Gao \ 20510b1e6705SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 20520b1e6705SSong Gao Vd->E(i) = DO_OP(Vj->E(i), imm); \ 20530b1e6705SSong Gao } \ 20540b1e6705SSong Gao } 20550b1e6705SSong Gao 20560b1e6705SSong Gao DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) 20570b1e6705SSong Gao DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) 20580b1e6705SSong Gao DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) 20590b1e6705SSong Gao DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) 20600b1e6705SSong Gao DO_BITI(vbitseti_b, 8, UB, DO_BITSET) 20610b1e6705SSong Gao DO_BITI(vbitseti_h, 16, UH, DO_BITSET) 20620b1e6705SSong Gao DO_BITI(vbitseti_w, 32, UW, DO_BITSET) 20630b1e6705SSong Gao DO_BITI(vbitseti_d, 64, UD, DO_BITSET) 20640b1e6705SSong Gao DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) 20650b1e6705SSong Gao DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) 20660b1e6705SSong Gao DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) 20670b1e6705SSong Gao DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) 2068ac95a0b9SSong Gao 2069ac95a0b9SSong Gao #define VFRSTP(NAME, BIT, MASK, E) \ 207004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2071ac95a0b9SSong Gao { \ 2072ac95a0b9SSong Gao int i, m; \ 207304711da1SSong Gao VReg *Vd = (VReg *)vd; \ 207404711da1SSong Gao VReg *Vj = (VReg *)vj; \ 207504711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2076ac95a0b9SSong Gao \ 2077ac95a0b9SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2078ac95a0b9SSong Gao if (Vj->E(i) < 0) { \ 2079ac95a0b9SSong Gao break; \ 2080ac95a0b9SSong Gao } \ 2081ac95a0b9SSong Gao } \ 2082ac95a0b9SSong Gao m = Vk->E(0) & MASK; \ 2083ac95a0b9SSong Gao Vd->E(m) = i; \ 2084ac95a0b9SSong Gao } 2085ac95a0b9SSong Gao 2086ac95a0b9SSong Gao VFRSTP(vfrstp_b, 8, 0xf, B) 2087ac95a0b9SSong Gao VFRSTP(vfrstp_h, 16, 0x7, H) 2088ac95a0b9SSong Gao 2089ac95a0b9SSong Gao #define VFRSTPI(NAME, BIT, E) \ 2090329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 2091ac95a0b9SSong Gao { \ 2092ac95a0b9SSong Gao int i, m; \ 2093329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 2094329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 2095ac95a0b9SSong Gao \ 2096ac95a0b9SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2097ac95a0b9SSong Gao if (Vj->E(i) < 0) { \ 2098ac95a0b9SSong Gao break; \ 2099ac95a0b9SSong Gao } \ 2100ac95a0b9SSong Gao } \ 2101ac95a0b9SSong Gao m = imm % (LSX_LEN/BIT); \ 2102ac95a0b9SSong Gao Vd->E(m) = i; \ 2103ac95a0b9SSong Gao } 2104ac95a0b9SSong Gao 2105ac95a0b9SSong Gao VFRSTPI(vfrstpi_b, 8, B) 2106ac95a0b9SSong Gao VFRSTPI(vfrstpi_h, 16, H) 2107aca67472SSong Gao 2108aca67472SSong Gao static void vec_update_fcsr0_mask(CPULoongArchState *env, 2109aca67472SSong Gao uintptr_t pc, int mask) 2110aca67472SSong Gao { 2111aca67472SSong Gao int flags = get_float_exception_flags(&env->fp_status); 2112aca67472SSong Gao 2113aca67472SSong Gao set_float_exception_flags(0, &env->fp_status); 2114aca67472SSong Gao 2115aca67472SSong Gao flags &= ~mask; 2116aca67472SSong Gao 2117aca67472SSong Gao if (flags) { 2118aca67472SSong Gao flags = ieee_ex_to_loongarch(flags); 2119aca67472SSong Gao UPDATE_FP_CAUSE(env->fcsr0, flags); 2120aca67472SSong Gao } 2121aca67472SSong Gao 2122aca67472SSong Gao if (GET_FP_ENABLES(env->fcsr0) & flags) { 2123aca67472SSong Gao do_raise_exception(env, EXCCODE_FPE, pc); 2124aca67472SSong Gao } else { 2125aca67472SSong Gao UPDATE_FP_FLAGS(env->fcsr0, flags); 2126aca67472SSong Gao } 2127aca67472SSong Gao } 2128aca67472SSong Gao 2129aca67472SSong Gao static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) 2130aca67472SSong Gao { 2131aca67472SSong Gao vec_update_fcsr0_mask(env, pc, 0); 2132aca67472SSong Gao } 2133aca67472SSong Gao 2134aca67472SSong Gao static inline void vec_clear_cause(CPULoongArchState *env) 2135aca67472SSong Gao { 2136aca67472SSong Gao SET_FP_CAUSE(env->fcsr0, 0); 2137aca67472SSong Gao } 2138aca67472SSong Gao 2139aca67472SSong Gao #define DO_3OP_F(NAME, BIT, E, FN) \ 21403b286753SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, \ 21413b286753SSong Gao CPULoongArchState *env, uint32_t desc) \ 2142aca67472SSong Gao { \ 2143aca67472SSong Gao int i; \ 21443b286753SSong Gao VReg *Vd = (VReg *)vd; \ 21453b286753SSong Gao VReg *Vj = (VReg *)vj; \ 21463b286753SSong Gao VReg *Vk = (VReg *)vk; \ 2147aca67472SSong Gao \ 2148aca67472SSong Gao vec_clear_cause(env); \ 2149aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2150aca67472SSong Gao Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ 2151aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2152aca67472SSong Gao } \ 2153aca67472SSong Gao } 2154aca67472SSong Gao 2155aca67472SSong Gao DO_3OP_F(vfadd_s, 32, UW, float32_add) 2156aca67472SSong Gao DO_3OP_F(vfadd_d, 64, UD, float64_add) 2157aca67472SSong Gao DO_3OP_F(vfsub_s, 32, UW, float32_sub) 2158aca67472SSong Gao DO_3OP_F(vfsub_d, 64, UD, float64_sub) 2159aca67472SSong Gao DO_3OP_F(vfmul_s, 32, UW, float32_mul) 2160aca67472SSong Gao DO_3OP_F(vfmul_d, 64, UD, float64_mul) 2161aca67472SSong Gao DO_3OP_F(vfdiv_s, 32, UW, float32_div) 2162aca67472SSong Gao DO_3OP_F(vfdiv_d, 64, UD, float64_div) 2163aca67472SSong Gao DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) 2164aca67472SSong Gao DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) 2165aca67472SSong Gao DO_3OP_F(vfmin_s, 32, UW, float32_minnum) 2166aca67472SSong Gao DO_3OP_F(vfmin_d, 64, UD, float64_minnum) 2167aca67472SSong Gao DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) 2168aca67472SSong Gao DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) 2169aca67472SSong Gao DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) 2170aca67472SSong Gao DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) 2171aca67472SSong Gao 2172aca67472SSong Gao #define DO_4OP_F(NAME, BIT, E, FN, flags) \ 2173e2600dadSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ 2174e2600dadSSong Gao CPULoongArchState *env, uint32_t desc) \ 2175aca67472SSong Gao { \ 2176aca67472SSong Gao int i; \ 2177e2600dadSSong Gao VReg *Vd = (VReg *)vd; \ 2178e2600dadSSong Gao VReg *Vj = (VReg *)vj; \ 2179e2600dadSSong Gao VReg *Vk = (VReg *)vk; \ 2180e2600dadSSong Gao VReg *Va = (VReg *)va; \ 2181aca67472SSong Gao \ 2182aca67472SSong Gao vec_clear_cause(env); \ 2183aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2184aca67472SSong Gao Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ 2185aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2186aca67472SSong Gao } \ 2187aca67472SSong Gao } 2188aca67472SSong Gao 2189aca67472SSong Gao DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) 2190aca67472SSong Gao DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) 2191aca67472SSong Gao DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) 2192aca67472SSong Gao DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) 2193aca67472SSong Gao DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) 2194aca67472SSong Gao DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) 2195aca67472SSong Gao DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, 2196aca67472SSong Gao float_muladd_negate_c | float_muladd_negate_result) 2197aca67472SSong Gao DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, 2198aca67472SSong Gao float_muladd_negate_c | float_muladd_negate_result) 2199aca67472SSong Gao 2200aca67472SSong Gao #define DO_2OP_F(NAME, BIT, E, FN) \ 2201226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj, \ 2202226bf881SSong Gao CPULoongArchState *env, uint32_t desc) \ 2203aca67472SSong Gao { \ 2204aca67472SSong Gao int i; \ 2205226bf881SSong Gao VReg *Vd = (VReg *)vd; \ 2206226bf881SSong Gao VReg *Vj = (VReg *)vj; \ 2207aca67472SSong Gao \ 2208aca67472SSong Gao vec_clear_cause(env); \ 2209aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2210aca67472SSong Gao Vd->E(i) = FN(env, Vj->E(i)); \ 2211aca67472SSong Gao } \ 2212aca67472SSong Gao } 2213aca67472SSong Gao 2214aca67472SSong Gao #define FLOGB(BIT, T) \ 2215aca67472SSong Gao static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ 2216aca67472SSong Gao { \ 2217aca67472SSong Gao T fp, fd; \ 2218aca67472SSong Gao float_status *status = &env->fp_status; \ 2219aca67472SSong Gao FloatRoundMode old_mode = get_float_rounding_mode(status); \ 2220aca67472SSong Gao \ 2221aca67472SSong Gao set_float_rounding_mode(float_round_down, status); \ 2222aca67472SSong Gao fp = float ## BIT ##_log2(fj, status); \ 2223aca67472SSong Gao fd = float ## BIT ##_round_to_int(fp, status); \ 2224aca67472SSong Gao set_float_rounding_mode(old_mode, status); \ 2225aca67472SSong Gao vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ 2226aca67472SSong Gao return fd; \ 2227aca67472SSong Gao } 2228aca67472SSong Gao 2229aca67472SSong Gao FLOGB(32, uint32_t) 2230aca67472SSong Gao FLOGB(64, uint64_t) 2231aca67472SSong Gao 2232aca67472SSong Gao #define FCLASS(NAME, BIT, E, FN) \ 2233226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj, \ 2234226bf881SSong Gao CPULoongArchState *env, uint32_t desc) \ 2235aca67472SSong Gao { \ 2236aca67472SSong Gao int i; \ 2237226bf881SSong Gao VReg *Vd = (VReg *)vd; \ 2238226bf881SSong Gao VReg *Vj = (VReg *)vj; \ 2239aca67472SSong Gao \ 2240aca67472SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2241aca67472SSong Gao Vd->E(i) = FN(env, Vj->E(i)); \ 2242aca67472SSong Gao } \ 2243aca67472SSong Gao } 2244aca67472SSong Gao 2245aca67472SSong Gao FCLASS(vfclass_s, 32, UW, helper_fclass_s) 2246aca67472SSong Gao FCLASS(vfclass_d, 64, UD, helper_fclass_d) 2247aca67472SSong Gao 2248aca67472SSong Gao #define FSQRT(BIT, T) \ 2249aca67472SSong Gao static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ 2250aca67472SSong Gao { \ 2251aca67472SSong Gao T fd; \ 2252aca67472SSong Gao fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ 2253aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2254aca67472SSong Gao return fd; \ 2255aca67472SSong Gao } 2256aca67472SSong Gao 2257aca67472SSong Gao FSQRT(32, uint32_t) 2258aca67472SSong Gao FSQRT(64, uint64_t) 2259aca67472SSong Gao 2260aca67472SSong Gao #define FRECIP(BIT, T) \ 2261aca67472SSong Gao static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ 2262aca67472SSong Gao { \ 2263aca67472SSong Gao T fd; \ 2264aca67472SSong Gao fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ 2265aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2266aca67472SSong Gao return fd; \ 2267aca67472SSong Gao } 2268aca67472SSong Gao 2269aca67472SSong Gao FRECIP(32, uint32_t) 2270aca67472SSong Gao FRECIP(64, uint64_t) 2271aca67472SSong Gao 2272aca67472SSong Gao #define FRSQRT(BIT, T) \ 2273aca67472SSong Gao static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ 2274aca67472SSong Gao { \ 2275aca67472SSong Gao T fd, fp; \ 2276aca67472SSong Gao fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ 2277aca67472SSong Gao fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ 2278aca67472SSong Gao vec_update_fcsr0(env, GETPC()); \ 2279aca67472SSong Gao return fd; \ 2280aca67472SSong Gao } 2281aca67472SSong Gao 2282aca67472SSong Gao FRSQRT(32, uint32_t) 2283aca67472SSong Gao FRSQRT(64, uint64_t) 2284aca67472SSong Gao 2285aca67472SSong Gao DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) 2286aca67472SSong Gao DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) 2287aca67472SSong Gao DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) 2288aca67472SSong Gao DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) 2289aca67472SSong Gao DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) 2290aca67472SSong Gao DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) 2291aca67472SSong Gao DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) 2292aca67472SSong Gao DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) 2293399665d2SSong Gao 2294399665d2SSong Gao static uint32_t float16_cvt_float32(uint16_t h, float_status *status) 2295399665d2SSong Gao { 2296399665d2SSong Gao return float16_to_float32(h, true, status); 2297399665d2SSong Gao } 2298399665d2SSong Gao static uint64_t float32_cvt_float64(uint32_t s, float_status *status) 2299399665d2SSong Gao { 2300399665d2SSong Gao return float32_to_float64(s, status); 2301399665d2SSong Gao } 2302399665d2SSong Gao 2303399665d2SSong Gao static uint16_t float32_cvt_float16(uint32_t s, float_status *status) 2304399665d2SSong Gao { 2305399665d2SSong Gao return float32_to_float16(s, true, status); 2306399665d2SSong Gao } 2307399665d2SSong Gao static uint32_t float64_cvt_float32(uint64_t d, float_status *status) 2308399665d2SSong Gao { 2309399665d2SSong Gao return float64_to_float32(d, status); 2310399665d2SSong Gao } 2311399665d2SSong Gao 2312226bf881SSong Gao void HELPER(vfcvtl_s_h)(void *vd, void *vj, 2313226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2314399665d2SSong Gao { 2315399665d2SSong Gao int i; 2316399665d2SSong Gao VReg temp; 2317226bf881SSong Gao VReg *Vd = (VReg *)vd; 2318226bf881SSong Gao VReg *Vj = (VReg *)vj; 2319399665d2SSong Gao 2320399665d2SSong Gao vec_clear_cause(env); 2321399665d2SSong Gao for (i = 0; i < LSX_LEN/32; i++) { 2322399665d2SSong Gao temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status); 2323399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2324399665d2SSong Gao } 2325399665d2SSong Gao *Vd = temp; 2326399665d2SSong Gao } 2327399665d2SSong Gao 2328226bf881SSong Gao void HELPER(vfcvtl_d_s)(void *vd, void *vj, 2329226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2330399665d2SSong Gao { 2331399665d2SSong Gao int i; 2332399665d2SSong Gao VReg temp; 2333226bf881SSong Gao VReg *Vd = (VReg *)vd; 2334226bf881SSong Gao VReg *Vj = (VReg *)vj; 2335399665d2SSong Gao 2336399665d2SSong Gao vec_clear_cause(env); 2337399665d2SSong Gao for (i = 0; i < LSX_LEN/64; i++) { 2338399665d2SSong Gao temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status); 2339399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2340399665d2SSong Gao } 2341399665d2SSong Gao *Vd = temp; 2342399665d2SSong Gao } 2343399665d2SSong Gao 2344226bf881SSong Gao void HELPER(vfcvth_s_h)(void *vd, void *vj, 2345226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2346399665d2SSong Gao { 2347399665d2SSong Gao int i; 2348399665d2SSong Gao VReg temp; 2349226bf881SSong Gao VReg *Vd = (VReg *)vd; 2350226bf881SSong Gao VReg *Vj = (VReg *)vj; 2351399665d2SSong Gao 2352399665d2SSong Gao vec_clear_cause(env); 2353399665d2SSong Gao for (i = 0; i < LSX_LEN/32; i++) { 2354399665d2SSong Gao temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status); 2355399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2356399665d2SSong Gao } 2357399665d2SSong Gao *Vd = temp; 2358399665d2SSong Gao } 2359399665d2SSong Gao 2360226bf881SSong Gao void HELPER(vfcvth_d_s)(void *vd, void *vj, 2361226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2362399665d2SSong Gao { 2363399665d2SSong Gao int i; 2364399665d2SSong Gao VReg temp; 2365226bf881SSong Gao VReg *Vd = (VReg *)vd; 2366226bf881SSong Gao VReg *Vj = (VReg *)vj; 2367399665d2SSong Gao 2368399665d2SSong Gao vec_clear_cause(env); 2369399665d2SSong Gao for (i = 0; i < LSX_LEN/64; i++) { 2370399665d2SSong Gao temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status); 2371399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2372399665d2SSong Gao } 2373399665d2SSong Gao *Vd = temp; 2374399665d2SSong Gao } 2375399665d2SSong Gao 23763b286753SSong Gao void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, 23773b286753SSong Gao CPULoongArchState *env, uint32_t desc) 2378399665d2SSong Gao { 2379399665d2SSong Gao int i; 2380399665d2SSong Gao VReg temp; 23813b286753SSong Gao VReg *Vd = (VReg *)vd; 23823b286753SSong Gao VReg *Vj = (VReg *)vj; 23833b286753SSong Gao VReg *Vk = (VReg *)vk; 2384399665d2SSong Gao 2385399665d2SSong Gao vec_clear_cause(env); 2386399665d2SSong Gao for(i = 0; i < LSX_LEN/32; i++) { 2387399665d2SSong Gao temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status); 2388399665d2SSong Gao temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status); 2389399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2390399665d2SSong Gao } 2391399665d2SSong Gao *Vd = temp; 2392399665d2SSong Gao } 2393399665d2SSong Gao 23943b286753SSong Gao void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, 23953b286753SSong Gao CPULoongArchState *env, uint32_t desc) 2396399665d2SSong Gao { 2397399665d2SSong Gao int i; 2398399665d2SSong Gao VReg temp; 23993b286753SSong Gao VReg *Vd = (VReg *)vd; 24003b286753SSong Gao VReg *Vj = (VReg *)vj; 24013b286753SSong Gao VReg *Vk = (VReg *)vk; 2402399665d2SSong Gao 2403399665d2SSong Gao vec_clear_cause(env); 2404399665d2SSong Gao for(i = 0; i < LSX_LEN/64; i++) { 2405399665d2SSong Gao temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status); 2406399665d2SSong Gao temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status); 2407399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2408399665d2SSong Gao } 2409399665d2SSong Gao *Vd = temp; 2410399665d2SSong Gao } 2411399665d2SSong Gao 2412226bf881SSong Gao void HELPER(vfrint_s)(void *vd, void *vj, 2413226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2414399665d2SSong Gao { 2415399665d2SSong Gao int i; 2416226bf881SSong Gao VReg *Vd = (VReg *)vd; 2417226bf881SSong Gao VReg *Vj = (VReg *)vj; 2418399665d2SSong Gao 2419399665d2SSong Gao vec_clear_cause(env); 2420399665d2SSong Gao for (i = 0; i < 4; i++) { 2421399665d2SSong Gao Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); 2422399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2423399665d2SSong Gao } 2424399665d2SSong Gao } 2425399665d2SSong Gao 2426226bf881SSong Gao void HELPER(vfrint_d)(void *vd, void *vj, 2427226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2428399665d2SSong Gao { 2429399665d2SSong Gao int i; 2430226bf881SSong Gao VReg *Vd = (VReg *)vd; 2431226bf881SSong Gao VReg *Vj = (VReg *)vj; 2432399665d2SSong Gao 2433399665d2SSong Gao vec_clear_cause(env); 2434399665d2SSong Gao for (i = 0; i < 2; i++) { 2435399665d2SSong Gao Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); 2436399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2437399665d2SSong Gao } 2438399665d2SSong Gao } 2439399665d2SSong Gao 2440399665d2SSong Gao #define FCVT_2OP(NAME, BIT, E, MODE) \ 2441226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj, \ 2442226bf881SSong Gao CPULoongArchState *env, uint32_t desc) \ 2443399665d2SSong Gao { \ 2444399665d2SSong Gao int i; \ 2445226bf881SSong Gao VReg *Vd = (VReg *)vd; \ 2446226bf881SSong Gao VReg *Vj = (VReg *)vj; \ 2447399665d2SSong Gao \ 2448399665d2SSong Gao vec_clear_cause(env); \ 2449399665d2SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2450399665d2SSong Gao FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ 2451399665d2SSong Gao set_float_rounding_mode(MODE, &env->fp_status); \ 2452399665d2SSong Gao Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ 2453399665d2SSong Gao set_float_rounding_mode(old_mode, &env->fp_status); \ 2454399665d2SSong Gao vec_update_fcsr0(env, GETPC()); \ 2455399665d2SSong Gao } \ 2456399665d2SSong Gao } 2457399665d2SSong Gao 2458399665d2SSong Gao FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) 2459399665d2SSong Gao FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) 2460399665d2SSong Gao FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) 2461399665d2SSong Gao FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) 2462399665d2SSong Gao FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) 2463399665d2SSong Gao FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) 2464399665d2SSong Gao FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) 2465399665d2SSong Gao FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) 2466399665d2SSong Gao 2467399665d2SSong Gao #define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ 2468399665d2SSong Gao static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ 2469399665d2SSong Gao { \ 2470399665d2SSong Gao T2 fd; \ 2471399665d2SSong Gao FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ 2472399665d2SSong Gao \ 2473399665d2SSong Gao set_float_rounding_mode(MODE, &env->fp_status); \ 2474399665d2SSong Gao fd = do_## FMT1 ##_to_## FMT2(env, fj); \ 2475399665d2SSong Gao set_float_rounding_mode(old_mode, &env->fp_status); \ 2476399665d2SSong Gao return fd; \ 2477399665d2SSong Gao } 2478399665d2SSong Gao 2479399665d2SSong Gao #define DO_FTINT(FMT1, FMT2, T1, T2) \ 2480399665d2SSong Gao static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ 2481399665d2SSong Gao { \ 2482399665d2SSong Gao T2 fd; \ 2483399665d2SSong Gao \ 2484399665d2SSong Gao fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ 2485399665d2SSong Gao if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ 2486399665d2SSong Gao if (FMT1 ##_is_any_nan(fj)) { \ 2487399665d2SSong Gao fd = 0; \ 2488399665d2SSong Gao } \ 2489399665d2SSong Gao } \ 2490399665d2SSong Gao vec_update_fcsr0(env, GETPC()); \ 2491399665d2SSong Gao return fd; \ 2492399665d2SSong Gao } 2493399665d2SSong Gao 2494399665d2SSong Gao DO_FTINT(float32, int32, uint32_t, uint32_t) 2495399665d2SSong Gao DO_FTINT(float64, int64, uint64_t, uint64_t) 2496399665d2SSong Gao DO_FTINT(float32, uint32, uint32_t, uint32_t) 2497399665d2SSong Gao DO_FTINT(float64, uint64, uint64_t, uint64_t) 2498399665d2SSong Gao DO_FTINT(float64, int32, uint64_t, uint32_t) 2499399665d2SSong Gao DO_FTINT(float32, int64, uint32_t, uint64_t) 2500399665d2SSong Gao 2501399665d2SSong Gao FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) 2502399665d2SSong Gao FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) 2503399665d2SSong Gao FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) 2504399665d2SSong Gao FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) 2505399665d2SSong Gao FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) 2506399665d2SSong Gao FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) 2507399665d2SSong Gao FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) 2508399665d2SSong Gao FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) 2509399665d2SSong Gao 2510399665d2SSong Gao DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) 2511399665d2SSong Gao DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) 2512399665d2SSong Gao DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) 2513399665d2SSong Gao DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) 2514399665d2SSong Gao DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) 2515399665d2SSong Gao DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) 2516399665d2SSong Gao DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) 2517399665d2SSong Gao DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) 2518399665d2SSong Gao DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) 2519399665d2SSong Gao DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) 2520399665d2SSong Gao 2521399665d2SSong Gao FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) 2522399665d2SSong Gao FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) 2523399665d2SSong Gao 2524399665d2SSong Gao DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) 2525399665d2SSong Gao DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) 2526399665d2SSong Gao DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) 2527399665d2SSong Gao DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) 2528399665d2SSong Gao 2529399665d2SSong Gao FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) 2530399665d2SSong Gao FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) 2531399665d2SSong Gao FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) 2532399665d2SSong Gao FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) 2533399665d2SSong Gao 2534399665d2SSong Gao #define FTINT_W_D(NAME, FN) \ 25353b286753SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, \ 25363b286753SSong Gao CPULoongArchState *env, uint32_t desc) \ 2537399665d2SSong Gao { \ 2538399665d2SSong Gao int i; \ 2539399665d2SSong Gao VReg temp; \ 25403b286753SSong Gao VReg *Vd = (VReg *)vd; \ 25413b286753SSong Gao VReg *Vj = (VReg *)vj; \ 25423b286753SSong Gao VReg *Vk = (VReg *)vk; \ 2543399665d2SSong Gao \ 2544399665d2SSong Gao vec_clear_cause(env); \ 2545399665d2SSong Gao for (i = 0; i < 2; i++) { \ 2546399665d2SSong Gao temp.W(i + 2) = FN(env, Vj->UD(i)); \ 2547399665d2SSong Gao temp.W(i) = FN(env, Vk->UD(i)); \ 2548399665d2SSong Gao } \ 2549399665d2SSong Gao *Vd = temp; \ 2550399665d2SSong Gao } 2551399665d2SSong Gao 2552399665d2SSong Gao FTINT_W_D(vftint_w_d, do_float64_to_int32) 2553399665d2SSong Gao FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) 2554399665d2SSong Gao FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) 2555399665d2SSong Gao FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) 2556399665d2SSong Gao FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) 2557399665d2SSong Gao 2558399665d2SSong Gao FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) 2559399665d2SSong Gao FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) 2560399665d2SSong Gao FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) 2561399665d2SSong Gao FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) 2562399665d2SSong Gao FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) 2563399665d2SSong Gao FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) 2564399665d2SSong Gao FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) 2565399665d2SSong Gao FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) 2566399665d2SSong Gao 2567399665d2SSong Gao #define FTINTL_L_S(NAME, FN) \ 2568226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj, \ 2569226bf881SSong Gao CPULoongArchState *env, uint32_t desc) \ 2570399665d2SSong Gao { \ 2571399665d2SSong Gao int i; \ 2572399665d2SSong Gao VReg temp; \ 2573226bf881SSong Gao VReg *Vd = (VReg *)vd; \ 2574226bf881SSong Gao VReg *Vj = (VReg *)vj; \ 2575399665d2SSong Gao \ 2576399665d2SSong Gao vec_clear_cause(env); \ 2577399665d2SSong Gao for (i = 0; i < 2; i++) { \ 2578399665d2SSong Gao temp.D(i) = FN(env, Vj->UW(i)); \ 2579399665d2SSong Gao } \ 2580399665d2SSong Gao *Vd = temp; \ 2581399665d2SSong Gao } 2582399665d2SSong Gao 2583399665d2SSong Gao FTINTL_L_S(vftintl_l_s, do_float32_to_int64) 2584399665d2SSong Gao FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) 2585399665d2SSong Gao FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) 2586399665d2SSong Gao FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) 2587399665d2SSong Gao FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) 2588399665d2SSong Gao 2589399665d2SSong Gao #define FTINTH_L_S(NAME, FN) \ 2590226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj, \ 2591226bf881SSong Gao CPULoongArchState *env, uint32_t desc) \ 2592399665d2SSong Gao { \ 2593399665d2SSong Gao int i; \ 2594399665d2SSong Gao VReg temp; \ 2595226bf881SSong Gao VReg *Vd = (VReg *)vd; \ 2596226bf881SSong Gao VReg *Vj = (VReg *)vj; \ 2597399665d2SSong Gao \ 2598399665d2SSong Gao vec_clear_cause(env); \ 2599399665d2SSong Gao for (i = 0; i < 2; i++) { \ 2600399665d2SSong Gao temp.D(i) = FN(env, Vj->UW(i + 2)); \ 2601399665d2SSong Gao } \ 2602399665d2SSong Gao *Vd = temp; \ 2603399665d2SSong Gao } 2604399665d2SSong Gao 2605399665d2SSong Gao FTINTH_L_S(vftinth_l_s, do_float32_to_int64) 2606399665d2SSong Gao FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) 2607399665d2SSong Gao FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) 2608399665d2SSong Gao FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) 2609399665d2SSong Gao FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) 2610399665d2SSong Gao 2611399665d2SSong Gao #define FFINT(NAME, FMT1, FMT2, T1, T2) \ 2612399665d2SSong Gao static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ 2613399665d2SSong Gao { \ 2614399665d2SSong Gao T2 fd; \ 2615399665d2SSong Gao \ 2616399665d2SSong Gao fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ 2617399665d2SSong Gao vec_update_fcsr0(env, GETPC()); \ 2618399665d2SSong Gao return fd; \ 2619399665d2SSong Gao } 2620399665d2SSong Gao 2621399665d2SSong Gao FFINT(s_w, int32, float32, int32_t, uint32_t) 2622399665d2SSong Gao FFINT(d_l, int64, float64, int64_t, uint64_t) 2623399665d2SSong Gao FFINT(s_wu, uint32, float32, uint32_t, uint32_t) 2624399665d2SSong Gao FFINT(d_lu, uint64, float64, uint64_t, uint64_t) 2625399665d2SSong Gao 2626399665d2SSong Gao DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) 2627399665d2SSong Gao DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) 2628399665d2SSong Gao DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) 2629399665d2SSong Gao DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) 2630399665d2SSong Gao 2631226bf881SSong Gao void HELPER(vffintl_d_w)(void *vd, void *vj, 2632226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2633399665d2SSong Gao { 2634399665d2SSong Gao int i; 2635399665d2SSong Gao VReg temp; 2636226bf881SSong Gao VReg *Vd = (VReg *)vd; 2637226bf881SSong Gao VReg *Vj = (VReg *)vj; 2638399665d2SSong Gao 2639399665d2SSong Gao vec_clear_cause(env); 2640399665d2SSong Gao for (i = 0; i < 2; i++) { 2641399665d2SSong Gao temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status); 2642399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2643399665d2SSong Gao } 2644399665d2SSong Gao *Vd = temp; 2645399665d2SSong Gao } 2646399665d2SSong Gao 2647226bf881SSong Gao void HELPER(vffinth_d_w)(void *vd, void *vj, 2648226bf881SSong Gao CPULoongArchState *env, uint32_t desc) 2649399665d2SSong Gao { 2650399665d2SSong Gao int i; 2651399665d2SSong Gao VReg temp; 2652226bf881SSong Gao VReg *Vd = (VReg *)vd; 2653226bf881SSong Gao VReg *Vj = (VReg *)vj; 2654399665d2SSong Gao 2655399665d2SSong Gao vec_clear_cause(env); 2656399665d2SSong Gao for (i = 0; i < 2; i++) { 2657399665d2SSong Gao temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status); 2658399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2659399665d2SSong Gao } 2660399665d2SSong Gao *Vd = temp; 2661399665d2SSong Gao } 2662399665d2SSong Gao 26633b286753SSong Gao void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, 26643b286753SSong Gao CPULoongArchState *env, uint32_t desc) 2665399665d2SSong Gao { 2666399665d2SSong Gao int i; 2667399665d2SSong Gao VReg temp; 26683b286753SSong Gao VReg *Vd = (VReg *)vd; 26693b286753SSong Gao VReg *Vj = (VReg *)vj; 26703b286753SSong Gao VReg *Vk = (VReg *)vk; 2671399665d2SSong Gao 2672399665d2SSong Gao vec_clear_cause(env); 2673399665d2SSong Gao for (i = 0; i < 2; i++) { 2674399665d2SSong Gao temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status); 2675399665d2SSong Gao temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status); 2676399665d2SSong Gao vec_update_fcsr0(env, GETPC()); 2677399665d2SSong Gao } 2678399665d2SSong Gao *Vd = temp; 2679399665d2SSong Gao } 2680f435e1e5SSong Gao 2681f435e1e5SSong Gao #define VSEQ(a, b) (a == b ? -1 : 0) 2682f435e1e5SSong Gao #define VSLE(a, b) (a <= b ? -1 : 0) 2683f435e1e5SSong Gao #define VSLT(a, b) (a < b ? -1 : 0) 2684f435e1e5SSong Gao 2685f435e1e5SSong Gao #define VCMPI(NAME, BIT, E, DO_OP) \ 2686f435e1e5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \ 2687f435e1e5SSong Gao { \ 2688f435e1e5SSong Gao int i; \ 2689f435e1e5SSong Gao VReg *Vd = (VReg *)vd; \ 2690f435e1e5SSong Gao VReg *Vj = (VReg *)vj; \ 2691f435e1e5SSong Gao typedef __typeof(Vd->E(0)) TD; \ 2692f435e1e5SSong Gao \ 2693f435e1e5SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2694f435e1e5SSong Gao Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ 2695f435e1e5SSong Gao } \ 2696f435e1e5SSong Gao } 2697f435e1e5SSong Gao 2698f435e1e5SSong Gao VCMPI(vseqi_b, 8, B, VSEQ) 2699f435e1e5SSong Gao VCMPI(vseqi_h, 16, H, VSEQ) 2700f435e1e5SSong Gao VCMPI(vseqi_w, 32, W, VSEQ) 2701f435e1e5SSong Gao VCMPI(vseqi_d, 64, D, VSEQ) 2702f435e1e5SSong Gao VCMPI(vslei_b, 8, B, VSLE) 2703f435e1e5SSong Gao VCMPI(vslei_h, 16, H, VSLE) 2704f435e1e5SSong Gao VCMPI(vslei_w, 32, W, VSLE) 2705f435e1e5SSong Gao VCMPI(vslei_d, 64, D, VSLE) 2706f435e1e5SSong Gao VCMPI(vslei_bu, 8, UB, VSLE) 2707f435e1e5SSong Gao VCMPI(vslei_hu, 16, UH, VSLE) 2708f435e1e5SSong Gao VCMPI(vslei_wu, 32, UW, VSLE) 2709f435e1e5SSong Gao VCMPI(vslei_du, 64, UD, VSLE) 2710f435e1e5SSong Gao VCMPI(vslti_b, 8, B, VSLT) 2711f435e1e5SSong Gao VCMPI(vslti_h, 16, H, VSLT) 2712f435e1e5SSong Gao VCMPI(vslti_w, 32, W, VSLT) 2713f435e1e5SSong Gao VCMPI(vslti_d, 64, D, VSLT) 2714f435e1e5SSong Gao VCMPI(vslti_bu, 8, UB, VSLT) 2715f435e1e5SSong Gao VCMPI(vslti_hu, 16, UH, VSLT) 2716f435e1e5SSong Gao VCMPI(vslti_wu, 32, UW, VSLT) 2717f435e1e5SSong Gao VCMPI(vslti_du, 64, UD, VSLT) 2718386c4e86SSong Gao 2719386c4e86SSong Gao static uint64_t vfcmp_common(CPULoongArchState *env, 2720386c4e86SSong Gao FloatRelation cmp, uint32_t flags) 2721386c4e86SSong Gao { 2722386c4e86SSong Gao uint64_t ret = 0; 2723386c4e86SSong Gao 2724386c4e86SSong Gao switch (cmp) { 2725386c4e86SSong Gao case float_relation_less: 2726386c4e86SSong Gao ret = (flags & FCMP_LT); 2727386c4e86SSong Gao break; 2728386c4e86SSong Gao case float_relation_equal: 2729386c4e86SSong Gao ret = (flags & FCMP_EQ); 2730386c4e86SSong Gao break; 2731386c4e86SSong Gao case float_relation_greater: 2732386c4e86SSong Gao ret = (flags & FCMP_GT); 2733386c4e86SSong Gao break; 2734386c4e86SSong Gao case float_relation_unordered: 2735386c4e86SSong Gao ret = (flags & FCMP_UN); 2736386c4e86SSong Gao break; 2737386c4e86SSong Gao default: 2738386c4e86SSong Gao g_assert_not_reached(); 2739386c4e86SSong Gao } 2740386c4e86SSong Gao 2741386c4e86SSong Gao if (ret) { 2742386c4e86SSong Gao ret = -1; 2743386c4e86SSong Gao } 2744386c4e86SSong Gao 2745386c4e86SSong Gao return ret; 2746386c4e86SSong Gao } 2747386c4e86SSong Gao 2748386c4e86SSong Gao #define VFCMP(NAME, BIT, E, FN) \ 2749386c4e86SSong Gao void HELPER(NAME)(CPULoongArchState *env, \ 2750386c4e86SSong Gao uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ 2751386c4e86SSong Gao { \ 2752386c4e86SSong Gao int i; \ 2753386c4e86SSong Gao VReg t; \ 2754386c4e86SSong Gao VReg *Vd = &(env->fpr[vd].vreg); \ 2755386c4e86SSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2756386c4e86SSong Gao VReg *Vk = &(env->fpr[vk].vreg); \ 2757386c4e86SSong Gao \ 2758386c4e86SSong Gao vec_clear_cause(env); \ 2759386c4e86SSong Gao for (i = 0; i < LSX_LEN/BIT ; i++) { \ 2760386c4e86SSong Gao FloatRelation cmp; \ 2761386c4e86SSong Gao cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ 2762386c4e86SSong Gao t.E(i) = vfcmp_common(env, cmp, flags); \ 2763386c4e86SSong Gao vec_update_fcsr0(env, GETPC()); \ 2764386c4e86SSong Gao } \ 2765386c4e86SSong Gao *Vd = t; \ 2766386c4e86SSong Gao } 2767386c4e86SSong Gao 2768386c4e86SSong Gao VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) 2769386c4e86SSong Gao VFCMP(vfcmp_s_s, 32, UW, float32_compare) 2770386c4e86SSong Gao VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) 2771386c4e86SSong Gao VFCMP(vfcmp_s_d, 64, UD, float64_compare) 2772d0dfa19aSSong Gao 2773d0dfa19aSSong Gao void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v) 2774d0dfa19aSSong Gao { 2775d0dfa19aSSong Gao int i; 2776d0dfa19aSSong Gao VReg *Vd = (VReg *)vd; 2777d0dfa19aSSong Gao VReg *Vj = (VReg *)vj; 2778d0dfa19aSSong Gao 2779d0dfa19aSSong Gao for (i = 0; i < 16; i++) { 2780d0dfa19aSSong Gao Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); 2781d0dfa19aSSong Gao } 2782d0dfa19aSSong Gao } 2783d0dfa19aSSong Gao 2784d0dfa19aSSong Gao /* Copy from target/arm/tcg/sve_helper.c */ 2785d0dfa19aSSong Gao static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) 2786d0dfa19aSSong Gao { 2787d0dfa19aSSong Gao uint64_t bits = 8 << esz; 2788d0dfa19aSSong Gao uint64_t ones = dup_const(esz, 1); 2789d0dfa19aSSong Gao uint64_t signs = ones << (bits - 1); 2790d0dfa19aSSong Gao uint64_t cmp0, cmp1; 2791d0dfa19aSSong Gao 2792d0dfa19aSSong Gao cmp1 = dup_const(esz, n); 2793d0dfa19aSSong Gao cmp0 = cmp1 ^ m0; 2794d0dfa19aSSong Gao cmp1 = cmp1 ^ m1; 2795d0dfa19aSSong Gao cmp0 = (cmp0 - ones) & ~cmp0; 2796d0dfa19aSSong Gao cmp1 = (cmp1 - ones) & ~cmp1; 2797d0dfa19aSSong Gao return (cmp0 | cmp1) & signs; 2798d0dfa19aSSong Gao } 2799d0dfa19aSSong Gao 2800d0dfa19aSSong Gao #define SETANYEQZ(NAME, MO) \ 2801d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ 2802d0dfa19aSSong Gao { \ 2803d0dfa19aSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2804d0dfa19aSSong Gao \ 2805d0dfa19aSSong Gao env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ 2806d0dfa19aSSong Gao } 2807d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_b, MO_8) 2808d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_h, MO_16) 2809d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_w, MO_32) 2810d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_d, MO_64) 2811d0dfa19aSSong Gao 2812d0dfa19aSSong Gao #define SETALLNEZ(NAME, MO) \ 2813d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \ 2814d0dfa19aSSong Gao { \ 2815d0dfa19aSSong Gao VReg *Vj = &(env->fpr[vj].vreg); \ 2816d0dfa19aSSong Gao \ 2817d0dfa19aSSong Gao env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ 2818d0dfa19aSSong Gao } 2819d0dfa19aSSong Gao SETALLNEZ(vsetallnez_b, MO_8) 2820d0dfa19aSSong Gao SETALLNEZ(vsetallnez_h, MO_16) 2821d0dfa19aSSong Gao SETALLNEZ(vsetallnez_w, MO_32) 2822d0dfa19aSSong Gao SETALLNEZ(vsetallnez_d, MO_64) 2823d5e5563cSSong Gao 2824d5e5563cSSong Gao #define VPACKEV(NAME, BIT, E) \ 282504711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2826d5e5563cSSong Gao { \ 2827d5e5563cSSong Gao int i; \ 2828d5e5563cSSong Gao VReg temp; \ 282904711da1SSong Gao VReg *Vd = (VReg *)vd; \ 283004711da1SSong Gao VReg *Vj = (VReg *)vj; \ 283104711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2832d5e5563cSSong Gao \ 2833d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2834d5e5563cSSong Gao temp.E(2 * i + 1) = Vj->E(2 * i); \ 2835d5e5563cSSong Gao temp.E(2 *i) = Vk->E(2 * i); \ 2836d5e5563cSSong Gao } \ 2837d5e5563cSSong Gao *Vd = temp; \ 2838d5e5563cSSong Gao } 2839d5e5563cSSong Gao 2840d5e5563cSSong Gao VPACKEV(vpackev_b, 16, B) 2841d5e5563cSSong Gao VPACKEV(vpackev_h, 32, H) 2842d5e5563cSSong Gao VPACKEV(vpackev_w, 64, W) 2843d5e5563cSSong Gao VPACKEV(vpackev_d, 128, D) 2844d5e5563cSSong Gao 2845d5e5563cSSong Gao #define VPACKOD(NAME, BIT, E) \ 284604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2847d5e5563cSSong Gao { \ 2848d5e5563cSSong Gao int i; \ 2849d5e5563cSSong Gao VReg temp; \ 285004711da1SSong Gao VReg *Vd = (VReg *)vd; \ 285104711da1SSong Gao VReg *Vj = (VReg *)vj; \ 285204711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2853d5e5563cSSong Gao \ 2854d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2855d5e5563cSSong Gao temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ 2856d5e5563cSSong Gao temp.E(2 * i) = Vk->E(2 * i + 1); \ 2857d5e5563cSSong Gao } \ 2858d5e5563cSSong Gao *Vd = temp; \ 2859d5e5563cSSong Gao } 2860d5e5563cSSong Gao 2861d5e5563cSSong Gao VPACKOD(vpackod_b, 16, B) 2862d5e5563cSSong Gao VPACKOD(vpackod_h, 32, H) 2863d5e5563cSSong Gao VPACKOD(vpackod_w, 64, W) 2864d5e5563cSSong Gao VPACKOD(vpackod_d, 128, D) 2865d5e5563cSSong Gao 2866d5e5563cSSong Gao #define VPICKEV(NAME, BIT, E) \ 286704711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2868d5e5563cSSong Gao { \ 2869d5e5563cSSong Gao int i; \ 2870d5e5563cSSong Gao VReg temp; \ 287104711da1SSong Gao VReg *Vd = (VReg *)vd; \ 287204711da1SSong Gao VReg *Vj = (VReg *)vj; \ 287304711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2874d5e5563cSSong Gao \ 2875d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2876d5e5563cSSong Gao temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \ 2877d5e5563cSSong Gao temp.E(i) = Vk->E(2 * i); \ 2878d5e5563cSSong Gao } \ 2879d5e5563cSSong Gao *Vd = temp; \ 2880d5e5563cSSong Gao } 2881d5e5563cSSong Gao 2882d5e5563cSSong Gao VPICKEV(vpickev_b, 16, B) 2883d5e5563cSSong Gao VPICKEV(vpickev_h, 32, H) 2884d5e5563cSSong Gao VPICKEV(vpickev_w, 64, W) 2885d5e5563cSSong Gao VPICKEV(vpickev_d, 128, D) 2886d5e5563cSSong Gao 2887d5e5563cSSong Gao #define VPICKOD(NAME, BIT, E) \ 288804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2889d5e5563cSSong Gao { \ 2890d5e5563cSSong Gao int i; \ 2891d5e5563cSSong Gao VReg temp; \ 289204711da1SSong Gao VReg *Vd = (VReg *)vd; \ 289304711da1SSong Gao VReg *Vj = (VReg *)vj; \ 289404711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2895d5e5563cSSong Gao \ 2896d5e5563cSSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2897d5e5563cSSong Gao temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \ 2898d5e5563cSSong Gao temp.E(i) = Vk->E(2 * i + 1); \ 2899d5e5563cSSong Gao } \ 2900d5e5563cSSong Gao *Vd = temp; \ 2901d5e5563cSSong Gao } 2902d5e5563cSSong Gao 2903d5e5563cSSong Gao VPICKOD(vpickod_b, 16, B) 2904d5e5563cSSong Gao VPICKOD(vpickod_h, 32, H) 2905d5e5563cSSong Gao VPICKOD(vpickod_w, 64, W) 2906d5e5563cSSong Gao VPICKOD(vpickod_d, 128, D) 2907e93dd431SSong Gao 2908e93dd431SSong Gao #define VILVL(NAME, BIT, E) \ 290904711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2910e93dd431SSong Gao { \ 2911e93dd431SSong Gao int i; \ 2912e93dd431SSong Gao VReg temp; \ 291304711da1SSong Gao VReg *Vd = (VReg *)vd; \ 291404711da1SSong Gao VReg *Vj = (VReg *)vj; \ 291504711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2916e93dd431SSong Gao \ 2917e93dd431SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2918e93dd431SSong Gao temp.E(2 * i + 1) = Vj->E(i); \ 2919e93dd431SSong Gao temp.E(2 * i) = Vk->E(i); \ 2920e93dd431SSong Gao } \ 2921e93dd431SSong Gao *Vd = temp; \ 2922e93dd431SSong Gao } 2923e93dd431SSong Gao 2924e93dd431SSong Gao VILVL(vilvl_b, 16, B) 2925e93dd431SSong Gao VILVL(vilvl_h, 32, H) 2926e93dd431SSong Gao VILVL(vilvl_w, 64, W) 2927e93dd431SSong Gao VILVL(vilvl_d, 128, D) 2928e93dd431SSong Gao 2929e93dd431SSong Gao #define VILVH(NAME, BIT, E) \ 293004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2931e93dd431SSong Gao { \ 2932e93dd431SSong Gao int i; \ 2933e93dd431SSong Gao VReg temp; \ 293404711da1SSong Gao VReg *Vd = (VReg *)vd; \ 293504711da1SSong Gao VReg *Vj = (VReg *)vj; \ 293604711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2937e93dd431SSong Gao \ 2938e93dd431SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2939e93dd431SSong Gao temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \ 2940e93dd431SSong Gao temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \ 2941e93dd431SSong Gao } \ 2942e93dd431SSong Gao *Vd = temp; \ 2943e93dd431SSong Gao } 2944e93dd431SSong Gao 2945e93dd431SSong Gao VILVH(vilvh_b, 16, B) 2946e93dd431SSong Gao VILVH(vilvh_h, 32, H) 2947e93dd431SSong Gao VILVH(vilvh_w, 64, W) 2948e93dd431SSong Gao VILVH(vilvh_d, 128, D) 2949e93dd431SSong Gao 2950eb48ab22SSong Gao void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) 2951e93dd431SSong Gao { 2952e93dd431SSong Gao int i, m; 2953e93dd431SSong Gao VReg temp; 2954eb48ab22SSong Gao VReg *Vd = (VReg *)vd; 2955eb48ab22SSong Gao VReg *Vj = (VReg *)vj; 2956eb48ab22SSong Gao VReg *Vk = (VReg *)vk; 2957eb48ab22SSong Gao VReg *Va = (VReg *)va; 2958e93dd431SSong Gao 2959e93dd431SSong Gao m = LSX_LEN/8; 2960e93dd431SSong Gao for (i = 0; i < m ; i++) { 2961e93dd431SSong Gao uint64_t k = (uint8_t)Va->B(i) % (2 * m); 2962e93dd431SSong Gao temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m); 2963e93dd431SSong Gao } 2964e93dd431SSong Gao *Vd = temp; 2965e93dd431SSong Gao } 2966e93dd431SSong Gao 2967e93dd431SSong Gao #define VSHUF(NAME, BIT, E) \ 296804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ 2969e93dd431SSong Gao { \ 2970e93dd431SSong Gao int i, m; \ 2971e93dd431SSong Gao VReg temp; \ 297204711da1SSong Gao VReg *Vd = (VReg *)vd; \ 297304711da1SSong Gao VReg *Vj = (VReg *)vj; \ 297404711da1SSong Gao VReg *Vk = (VReg *)vk; \ 2975e93dd431SSong Gao \ 2976e93dd431SSong Gao m = LSX_LEN/BIT; \ 2977e93dd431SSong Gao for (i = 0; i < m; i++) { \ 2978e93dd431SSong Gao uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \ 2979e93dd431SSong Gao temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \ 2980e93dd431SSong Gao } \ 2981e93dd431SSong Gao *Vd = temp; \ 2982e93dd431SSong Gao } 2983e93dd431SSong Gao 2984e93dd431SSong Gao VSHUF(vshuf_h, 16, H) 2985e93dd431SSong Gao VSHUF(vshuf_w, 32, W) 2986e93dd431SSong Gao VSHUF(vshuf_d, 64, D) 2987e93dd431SSong Gao 2988e93dd431SSong Gao #define VSHUF4I(NAME, BIT, E) \ 2989329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 2990e93dd431SSong Gao { \ 2991e93dd431SSong Gao int i; \ 2992e93dd431SSong Gao VReg temp; \ 2993329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 2994329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 2995e93dd431SSong Gao \ 2996e93dd431SSong Gao for (i = 0; i < LSX_LEN/BIT; i++) { \ 2997e93dd431SSong Gao temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \ 2998e93dd431SSong Gao (2 * ((i) & 0x03))) & 0x03)); \ 2999e93dd431SSong Gao } \ 3000e93dd431SSong Gao *Vd = temp; \ 3001e93dd431SSong Gao } 3002e93dd431SSong Gao 3003e93dd431SSong Gao VSHUF4I(vshuf4i_b, 8, B) 3004e93dd431SSong Gao VSHUF4I(vshuf4i_h, 16, H) 3005e93dd431SSong Gao VSHUF4I(vshuf4i_w, 32, W) 3006e93dd431SSong Gao 3007329517d5SSong Gao void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) 3008e93dd431SSong Gao { 3009329517d5SSong Gao VReg *Vd = (VReg *)vd; 3010329517d5SSong Gao VReg *Vj = (VReg *)vj; 3011e93dd431SSong Gao 3012e93dd431SSong Gao VReg temp; 3013e93dd431SSong Gao temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1); 3014e93dd431SSong Gao temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1); 3015e93dd431SSong Gao *Vd = temp; 3016e93dd431SSong Gao } 3017e93dd431SSong Gao 3018329517d5SSong Gao void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) 3019e93dd431SSong Gao { 3020e93dd431SSong Gao VReg temp; 3021329517d5SSong Gao VReg *Vd = (VReg *)vd; 3022329517d5SSong Gao VReg *Vj = (VReg *)vj; 3023e93dd431SSong Gao 3024e93dd431SSong Gao temp.W(0) = Vj->W(imm & 0x3); 3025e93dd431SSong Gao temp.W(1) = Vj->W((imm >> 2) & 0x3); 3026e93dd431SSong Gao temp.W(2) = Vd->W((imm >> 4) & 0x3); 3027e93dd431SSong Gao temp.W(3) = Vd->W((imm >> 6) & 0x3); 3028e93dd431SSong Gao *Vd = temp; 3029e93dd431SSong Gao } 3030e93dd431SSong Gao 3031e93dd431SSong Gao #define VEXTRINS(NAME, BIT, E, MASK) \ 3032329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ 3033e93dd431SSong Gao { \ 3034e93dd431SSong Gao int ins, extr; \ 3035329517d5SSong Gao VReg *Vd = (VReg *)vd; \ 3036329517d5SSong Gao VReg *Vj = (VReg *)vj; \ 3037e93dd431SSong Gao \ 3038e93dd431SSong Gao ins = (imm >> 4) & MASK; \ 3039e93dd431SSong Gao extr = imm & MASK; \ 3040e93dd431SSong Gao Vd->E(ins) = Vj->E(extr); \ 3041e93dd431SSong Gao } 3042e93dd431SSong Gao 3043e93dd431SSong Gao VEXTRINS(vextrins_b, 8, B, 0xf) 3044e93dd431SSong Gao VEXTRINS(vextrins_h, 16, H, 0x7) 3045e93dd431SSong Gao VEXTRINS(vextrins_w, 32, W, 0x3) 3046e93dd431SSong Gao VEXTRINS(vextrins_d, 64, D, 0x1) 3047