xref: /qemu/target/loongarch/tcg/vec_helper.c (revision 790acb2a432ead067d6c1a0fc8430d29aa58e4ab)
1a0c9400aSSong Gao /* SPDX-License-Identifier: GPL-2.0-or-later */
2a0c9400aSSong Gao /*
31dc33f26SSong Gao  * QEMU LoongArch vector helper functions.
4a0c9400aSSong Gao  *
5a0c9400aSSong Gao  * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
6a0c9400aSSong Gao  */
7c037fbc9SSong Gao 
8c037fbc9SSong Gao #include "qemu/osdep.h"
9c037fbc9SSong Gao #include "cpu.h"
10c037fbc9SSong Gao #include "exec/exec-all.h"
11c037fbc9SSong Gao #include "exec/helper-proto.h"
12aca67472SSong Gao #include "fpu/softfloat.h"
13aca67472SSong Gao #include "internals.h"
14d0dfa19aSSong Gao #include "tcg/tcg.h"
15008a3b16SSong Gao #include "vec.h"
1664cf6b99SSong Gao #include "tcg/tcg-gvec-desc.h"
17c037fbc9SSong Gao 
18c037fbc9SSong Gao #define DO_ADD(a, b)  (a + b)
19c037fbc9SSong Gao #define DO_SUB(a, b)  (a - b)
20c037fbc9SSong Gao 
21c037fbc9SSong Gao #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
2204711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)       \
23c037fbc9SSong Gao {                                                                    \
24c037fbc9SSong Gao     int i;                                                           \
2504711da1SSong Gao     VReg *Vd = (VReg *)vd;                                           \
2604711da1SSong Gao     VReg *Vj = (VReg *)vj;                                           \
2704711da1SSong Gao     VReg *Vk = (VReg *)vk;                                           \
28c037fbc9SSong Gao     typedef __typeof(Vd->E1(0)) TD;                                  \
2964cf6b99SSong Gao     int oprsz = simd_oprsz(desc);                                    \
30c037fbc9SSong Gao                                                                      \
3164cf6b99SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                        \
32c037fbc9SSong Gao         Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
33c037fbc9SSong Gao     }                                                                \
34c037fbc9SSong Gao }
35c037fbc9SSong Gao 
36c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
37c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
38c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
39c037fbc9SSong Gao 
4004711da1SSong Gao void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
41c037fbc9SSong Gao {
4264cf6b99SSong Gao     int i;
4304711da1SSong Gao     VReg *Vd = (VReg *)vd;
4404711da1SSong Gao     VReg *Vj = (VReg *)vj;
4504711da1SSong Gao     VReg *Vk = (VReg *)vk;
4664cf6b99SSong Gao     int oprsz = simd_oprsz(desc);
47c037fbc9SSong Gao 
4864cf6b99SSong Gao     for (i = 0; i < oprsz / 16 ; i++) {
4964cf6b99SSong Gao         Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)),
5064cf6b99SSong Gao                               int128_makes64(Vk->D(2 * i)));
5164cf6b99SSong Gao     }
52c037fbc9SSong Gao }
53c037fbc9SSong Gao 
54c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
55c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
56c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
57c037fbc9SSong Gao 
5804711da1SSong Gao void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
59c037fbc9SSong Gao {
6064cf6b99SSong Gao     int i;
6104711da1SSong Gao     VReg *Vd = (VReg *)vd;
6204711da1SSong Gao     VReg *Vj = (VReg *)vj;
6304711da1SSong Gao     VReg *Vk = (VReg *)vk;
6464cf6b99SSong Gao     int oprsz = simd_oprsz(desc);
65c037fbc9SSong Gao 
6664cf6b99SSong Gao     for (i = 0; i < oprsz / 16; i++) {
6764cf6b99SSong Gao         Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
6864cf6b99SSong Gao                               int128_makes64(Vk->D(2 * i)));
6964cf6b99SSong Gao     }
70c037fbc9SSong Gao }
71c037fbc9SSong Gao 
72c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
73c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
74c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
75c037fbc9SSong Gao 
7604711da1SSong Gao void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
77c037fbc9SSong Gao {
7864cf6b99SSong Gao     int i;
7904711da1SSong Gao     VReg *Vd = (VReg *)vd;
8004711da1SSong Gao     VReg *Vj = (VReg *)vj;
8104711da1SSong Gao     VReg *Vk = (VReg *)vk;
8264cf6b99SSong Gao     int oprsz = simd_oprsz(desc);
83c037fbc9SSong Gao 
8464cf6b99SSong Gao     for (i = 0; i < oprsz / 16; i ++) {
8564cf6b99SSong Gao         Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
8664cf6b99SSong Gao                               int128_make64(Vk->UD(2 * i)));
8764cf6b99SSong Gao     }
88c037fbc9SSong Gao }
89c037fbc9SSong Gao 
90c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
91c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
92c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
93c037fbc9SSong Gao 
9404711da1SSong Gao void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
95c037fbc9SSong Gao {
9664cf6b99SSong Gao     int i;
9704711da1SSong Gao     VReg *Vd = (VReg *)vd;
9804711da1SSong Gao     VReg *Vj = (VReg *)vj;
9904711da1SSong Gao     VReg *Vk = (VReg *)vk;
10064cf6b99SSong Gao     int oprsz = simd_oprsz(desc);
101c037fbc9SSong Gao 
10264cf6b99SSong Gao     for (i = 0; i < oprsz / 16; i++) {
10364cf6b99SSong Gao         Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
10464cf6b99SSong Gao                               int128_make64(Vk->UD(2 * i)));
10564cf6b99SSong Gao     }
106c037fbc9SSong Gao }
1072d5f950cSSong Gao 
1082d5f950cSSong Gao #define DO_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
10985995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)   \
1102d5f950cSSong Gao {                                                                \
1112d5f950cSSong Gao     int i;                                                       \
1122d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                       \
1132d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                       \
1142d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                       \
1152d5f950cSSong Gao     typedef __typeof(Vd->E1(0)) TD;                              \
11685995f07SSong Gao     int oprsz = simd_oprsz(desc);                                \
11785995f07SSong Gao                                                                  \
11885995f07SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                    \
1192d5f950cSSong Gao         Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
1202d5f950cSSong Gao     }                                                            \
1212d5f950cSSong Gao }
1222d5f950cSSong Gao 
1232d5f950cSSong Gao #define DO_ODD(NAME, BIT, E1, E2, DO_OP)                                 \
12485995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)           \
1252d5f950cSSong Gao {                                                                        \
1262d5f950cSSong Gao     int i;                                                               \
1272d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                               \
1282d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                               \
1292d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                               \
1302d5f950cSSong Gao     typedef __typeof(Vd->E1(0)) TD;                                      \
13185995f07SSong Gao     int oprsz = simd_oprsz(desc);                                        \
13285995f07SSong Gao                                                                          \
13385995f07SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                            \
1342d5f950cSSong Gao         Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
1352d5f950cSSong Gao     }                                                                    \
1362d5f950cSSong Gao }
1372d5f950cSSong Gao 
13885995f07SSong Gao void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
1392d5f950cSSong Gao {
14085995f07SSong Gao     int i;
1412d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1422d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1432d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
14485995f07SSong Gao     int oprsz = simd_oprsz(desc);
1452d5f950cSSong Gao 
14685995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
14785995f07SSong Gao         Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)),
14885995f07SSong Gao                               int128_makes64(Vk->D(2 * i)));
14985995f07SSong Gao     }
1502d5f950cSSong Gao }
1512d5f950cSSong Gao 
1522d5f950cSSong Gao DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
1532d5f950cSSong Gao DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
1542d5f950cSSong Gao DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
1552d5f950cSSong Gao 
15685995f07SSong Gao void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
1572d5f950cSSong Gao {
15885995f07SSong Gao     int i;
1592d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1602d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1612d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
16285995f07SSong Gao     int oprsz = simd_oprsz(desc);
1632d5f950cSSong Gao 
16485995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
16585995f07SSong Gao         Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)),
16685995f07SSong Gao                               int128_makes64(Vk->D(2 * i +1)));
16785995f07SSong Gao     }
1682d5f950cSSong Gao }
1692d5f950cSSong Gao 
1702d5f950cSSong Gao DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
1712d5f950cSSong Gao DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
1722d5f950cSSong Gao DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
1732d5f950cSSong Gao 
17485995f07SSong Gao void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
1752d5f950cSSong Gao {
17685995f07SSong Gao     int i;
1772d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1782d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1792d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
18085995f07SSong Gao     int oprsz = simd_oprsz(desc);
1812d5f950cSSong Gao 
18285995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
18385995f07SSong Gao         Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)),
18485995f07SSong Gao                               int128_makes64(Vk->D(2 * i)));
18585995f07SSong Gao     }
1862d5f950cSSong Gao }
1872d5f950cSSong Gao 
1882d5f950cSSong Gao DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
1892d5f950cSSong Gao DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
1902d5f950cSSong Gao DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
1912d5f950cSSong Gao 
19285995f07SSong Gao void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
1932d5f950cSSong Gao {
19485995f07SSong Gao     int i;
1952d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1962d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1972d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
19885995f07SSong Gao     int oprsz = simd_oprsz(desc);
1992d5f950cSSong Gao 
20085995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
20185995f07SSong Gao         Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
20285995f07SSong Gao                               int128_makes64(Vk->D(2 * i + 1)));
20385995f07SSong Gao     }
2042d5f950cSSong Gao }
2052d5f950cSSong Gao 
2062d5f950cSSong Gao DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
2072d5f950cSSong Gao DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
2082d5f950cSSong Gao DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
2092d5f950cSSong Gao 
21085995f07SSong Gao void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
2112d5f950cSSong Gao {
21285995f07SSong Gao     int i;
2132d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2142d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2152d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
21685995f07SSong Gao     int oprsz = simd_oprsz(desc);
2172d5f950cSSong Gao 
21885995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
21985995f07SSong Gao         Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
22085995f07SSong Gao                               int128_make64(Vk->UD(2 * i)));
22185995f07SSong Gao     }
2222d5f950cSSong Gao }
2232d5f950cSSong Gao 
2242d5f950cSSong Gao DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
2252d5f950cSSong Gao DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
2262d5f950cSSong Gao DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
2272d5f950cSSong Gao 
22885995f07SSong Gao void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
2292d5f950cSSong Gao {
23085995f07SSong Gao     int i;
2312d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2322d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2332d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
23485995f07SSong Gao     int oprsz = simd_oprsz(desc);
2352d5f950cSSong Gao 
23685995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
23785995f07SSong Gao         Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
23885995f07SSong Gao                               int128_make64(Vk->UD(2 * i + 1)));
23985995f07SSong Gao     }
2402d5f950cSSong Gao }
2412d5f950cSSong Gao 
2422d5f950cSSong Gao DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
2432d5f950cSSong Gao DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
2442d5f950cSSong Gao DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
2452d5f950cSSong Gao 
24685995f07SSong Gao void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
2472d5f950cSSong Gao {
24885995f07SSong Gao     int i;
2492d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2502d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2512d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
25285995f07SSong Gao     int oprsz = simd_oprsz(desc);
2532d5f950cSSong Gao 
25485995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
25585995f07SSong Gao         Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)),
25685995f07SSong Gao                               int128_make64(Vk->UD(2 * i)));
25785995f07SSong Gao     }
2582d5f950cSSong Gao }
2592d5f950cSSong Gao 
2602d5f950cSSong Gao DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
2612d5f950cSSong Gao DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
2622d5f950cSSong Gao DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
2632d5f950cSSong Gao 
26485995f07SSong Gao void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
2652d5f950cSSong Gao {
26685995f07SSong Gao     int i;
2672d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2682d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2692d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
27085995f07SSong Gao     int oprsz = simd_oprsz(desc);
2712d5f950cSSong Gao 
27285995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
27385995f07SSong Gao         Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
27485995f07SSong Gao                               int128_make64(Vk->UD(2 * i + 1)));
27585995f07SSong Gao     }
2762d5f950cSSong Gao }
2772d5f950cSSong Gao 
2782d5f950cSSong Gao DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
2792d5f950cSSong Gao DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
2802d5f950cSSong Gao DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
2812d5f950cSSong Gao 
2822d5f950cSSong Gao #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)             \
28385995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)        \
2842d5f950cSSong Gao {                                                                     \
2852d5f950cSSong Gao     int i;                                                            \
2862d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                            \
2872d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                            \
2882d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                            \
2892d5f950cSSong Gao     typedef __typeof(Vd->ES1(0)) TDS;                                 \
2902d5f950cSSong Gao     typedef __typeof(Vd->EU1(0)) TDU;                                 \
29185995f07SSong Gao     int oprsz = simd_oprsz(desc);                                     \
29285995f07SSong Gao                                                                       \
29385995f07SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                         \
2942d5f950cSSong Gao         Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
2952d5f950cSSong Gao     }                                                                 \
2962d5f950cSSong Gao }
2972d5f950cSSong Gao 
2982d5f950cSSong Gao #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)                      \
29985995f07SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                \
3002d5f950cSSong Gao {                                                                             \
3012d5f950cSSong Gao     int i;                                                                    \
3022d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                                    \
3032d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                                    \
3042d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                                    \
3052d5f950cSSong Gao     typedef __typeof(Vd->ES1(0)) TDS;                                         \
3062d5f950cSSong Gao     typedef __typeof(Vd->EU1(0)) TDU;                                         \
30785995f07SSong Gao     int oprsz = simd_oprsz(desc);                                             \
30885995f07SSong Gao                                                                               \
30985995f07SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                                 \
3102d5f950cSSong Gao         Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
3112d5f950cSSong Gao     }                                                                         \
3122d5f950cSSong Gao }
3132d5f950cSSong Gao 
31485995f07SSong Gao void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
3152d5f950cSSong Gao {
31685995f07SSong Gao     int i;
3172d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
3182d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
3192d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
32085995f07SSong Gao     int oprsz = simd_oprsz(desc);
3212d5f950cSSong Gao 
32285995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
32385995f07SSong Gao         Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
32485995f07SSong Gao                               int128_makes64(Vk->D(2 * i)));
32585995f07SSong Gao     }
3262d5f950cSSong Gao }
3272d5f950cSSong Gao 
3282d5f950cSSong Gao DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
3292d5f950cSSong Gao DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
3302d5f950cSSong Gao DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
3312d5f950cSSong Gao 
33285995f07SSong Gao void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
3332d5f950cSSong Gao {
33485995f07SSong Gao     int i;
3352d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
3362d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
3372d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
33885995f07SSong Gao     int oprsz = simd_oprsz(desc);
3392d5f950cSSong Gao 
34085995f07SSong Gao     for (i = 0; i < oprsz / 16; i++) {
34185995f07SSong Gao         Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
34285995f07SSong Gao                               int128_makes64(Vk->D(2 * i + 1)));
34385995f07SSong Gao     }
3442d5f950cSSong Gao }
3452d5f950cSSong Gao 
3462d5f950cSSong Gao DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
3472d5f950cSSong Gao DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
3482d5f950cSSong Gao DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
34939e9b0a7SSong Gao 
35039e9b0a7SSong Gao #define DO_VAVG(a, b)  ((a >> 1) + (b >> 1) + (a & b & 1))
35139e9b0a7SSong Gao #define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
35239e9b0a7SSong Gao 
35339e9b0a7SSong Gao #define DO_3OP(NAME, BIT, E, DO_OP)                            \
354ee7250d0SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
35539e9b0a7SSong Gao {                                                              \
35639e9b0a7SSong Gao     int i;                                                     \
35739e9b0a7SSong Gao     VReg *Vd = (VReg *)vd;                                     \
35839e9b0a7SSong Gao     VReg *Vj = (VReg *)vj;                                     \
35939e9b0a7SSong Gao     VReg *Vk = (VReg *)vk;                                     \
360ee7250d0SSong Gao     int oprsz = simd_oprsz(desc);                              \
361ee7250d0SSong Gao                                                                \
362ee7250d0SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
36339e9b0a7SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));                  \
36439e9b0a7SSong Gao     }                                                          \
36539e9b0a7SSong Gao }
36639e9b0a7SSong Gao 
36739e9b0a7SSong Gao DO_3OP(vavg_b, 8, B, DO_VAVG)
36839e9b0a7SSong Gao DO_3OP(vavg_h, 16, H, DO_VAVG)
36939e9b0a7SSong Gao DO_3OP(vavg_w, 32, W, DO_VAVG)
37039e9b0a7SSong Gao DO_3OP(vavg_d, 64, D, DO_VAVG)
37139e9b0a7SSong Gao DO_3OP(vavgr_b, 8, B, DO_VAVGR)
37239e9b0a7SSong Gao DO_3OP(vavgr_h, 16, H, DO_VAVGR)
37339e9b0a7SSong Gao DO_3OP(vavgr_w, 32, W, DO_VAVGR)
37439e9b0a7SSong Gao DO_3OP(vavgr_d, 64, D, DO_VAVGR)
37539e9b0a7SSong Gao DO_3OP(vavg_bu, 8, UB, DO_VAVG)
37639e9b0a7SSong Gao DO_3OP(vavg_hu, 16, UH, DO_VAVG)
37739e9b0a7SSong Gao DO_3OP(vavg_wu, 32, UW, DO_VAVG)
37839e9b0a7SSong Gao DO_3OP(vavg_du, 64, UD, DO_VAVG)
37939e9b0a7SSong Gao DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
38039e9b0a7SSong Gao DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
38139e9b0a7SSong Gao DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
38239e9b0a7SSong Gao DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
38349725659SSong Gao 
38449725659SSong Gao #define DO_VABSD(a, b)  ((a > b) ? (a -b) : (b-a))
38549725659SSong Gao 
38649725659SSong Gao DO_3OP(vabsd_b, 8, B, DO_VABSD)
38749725659SSong Gao DO_3OP(vabsd_h, 16, H, DO_VABSD)
38849725659SSong Gao DO_3OP(vabsd_w, 32, W, DO_VABSD)
38949725659SSong Gao DO_3OP(vabsd_d, 64, D, DO_VABSD)
39049725659SSong Gao DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
39149725659SSong Gao DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
39249725659SSong Gao DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
39349725659SSong Gao DO_3OP(vabsd_du, 64, UD, DO_VABSD)
394af448cb3SSong Gao 
395af448cb3SSong Gao #define DO_VABS(a)  ((a < 0) ? (-a) : (a))
396af448cb3SSong Gao 
39727f5485dSSong Gao #define DO_VADDA(NAME, BIT, E)                                 \
39827f5485dSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
399af448cb3SSong Gao {                                                              \
400af448cb3SSong Gao     int i;                                                     \
401af448cb3SSong Gao     VReg *Vd = (VReg *)vd;                                     \
402af448cb3SSong Gao     VReg *Vj = (VReg *)vj;                                     \
403af448cb3SSong Gao     VReg *Vk = (VReg *)vk;                                     \
40427f5485dSSong Gao     int oprsz = simd_oprsz(desc);                              \
40527f5485dSSong Gao                                                                \
40627f5485dSSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
40727f5485dSSong Gao         Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i));      \
408af448cb3SSong Gao     }                                                          \
409af448cb3SSong Gao }
410af448cb3SSong Gao 
41127f5485dSSong Gao DO_VADDA(vadda_b, 8, B)
41227f5485dSSong Gao DO_VADDA(vadda_h, 16, H)
41327f5485dSSong Gao DO_VADDA(vadda_w, 32, W)
41427f5485dSSong Gao DO_VADDA(vadda_d, 64, D)
4159ab29520SSong Gao 
4169ab29520SSong Gao #define DO_MIN(a, b) (a < b ? a : b)
4179ab29520SSong Gao #define DO_MAX(a, b) (a > b ? a : b)
4189ab29520SSong Gao 
4199ab29520SSong Gao #define VMINMAXI(NAME, BIT, E, DO_OP)                              \
420c09360faSSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
4219ab29520SSong Gao {                                                                  \
4229ab29520SSong Gao     int i;                                                         \
4239ab29520SSong Gao     VReg *Vd = (VReg *)vd;                                         \
4249ab29520SSong Gao     VReg *Vj = (VReg *)vj;                                         \
4259ab29520SSong Gao     typedef __typeof(Vd->E(0)) TD;                                 \
426c09360faSSong Gao     int oprsz = simd_oprsz(desc);                                  \
4279ab29520SSong Gao                                                                    \
428c09360faSSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
4299ab29520SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                       \
4309ab29520SSong Gao     }                                                              \
4319ab29520SSong Gao }
4329ab29520SSong Gao 
4339ab29520SSong Gao VMINMAXI(vmini_b, 8, B, DO_MIN)
4349ab29520SSong Gao VMINMAXI(vmini_h, 16, H, DO_MIN)
4359ab29520SSong Gao VMINMAXI(vmini_w, 32, W, DO_MIN)
4369ab29520SSong Gao VMINMAXI(vmini_d, 64, D, DO_MIN)
4379ab29520SSong Gao VMINMAXI(vmaxi_b, 8, B, DO_MAX)
4389ab29520SSong Gao VMINMAXI(vmaxi_h, 16, H, DO_MAX)
4399ab29520SSong Gao VMINMAXI(vmaxi_w, 32, W, DO_MAX)
4409ab29520SSong Gao VMINMAXI(vmaxi_d, 64, D, DO_MAX)
4419ab29520SSong Gao VMINMAXI(vmini_bu, 8, UB, DO_MIN)
4429ab29520SSong Gao VMINMAXI(vmini_hu, 16, UH, DO_MIN)
4439ab29520SSong Gao VMINMAXI(vmini_wu, 32, UW, DO_MIN)
4449ab29520SSong Gao VMINMAXI(vmini_du, 64, UD, DO_MIN)
4459ab29520SSong Gao VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
4469ab29520SSong Gao VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
4479ab29520SSong Gao VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
4489ab29520SSong Gao VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
449cd1c49adSSong Gao 
450cd1c49adSSong Gao #define DO_VMUH(NAME, BIT, E1, E2, DO_OP)                      \
451342dc1cfSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
452cd1c49adSSong Gao {                                                              \
453cd1c49adSSong Gao     int i;                                                     \
454cd1c49adSSong Gao     VReg *Vd = (VReg *)vd;                                     \
455cd1c49adSSong Gao     VReg *Vj = (VReg *)vj;                                     \
456cd1c49adSSong Gao     VReg *Vk = (VReg *)vk;                                     \
457cd1c49adSSong Gao     typedef __typeof(Vd->E1(0)) T;                             \
458342dc1cfSSong Gao     int oprsz = simd_oprsz(desc);                              \
459cd1c49adSSong Gao                                                                \
460342dc1cfSSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
461cd1c49adSSong Gao         Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT;    \
462cd1c49adSSong Gao     }                                                          \
463cd1c49adSSong Gao }
464cd1c49adSSong Gao 
465342dc1cfSSong Gao void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc)
466cd1c49adSSong Gao {
467342dc1cfSSong Gao     int i;
468342dc1cfSSong Gao     uint64_t l, h;
469cd1c49adSSong Gao     VReg *Vd = (VReg *)vd;
470cd1c49adSSong Gao     VReg *Vj = (VReg *)vj;
471cd1c49adSSong Gao     VReg *Vk = (VReg *)vk;
472342dc1cfSSong Gao     int oprsz = simd_oprsz(desc);
473cd1c49adSSong Gao 
474342dc1cfSSong Gao     for (i = 0; i < oprsz / 8; i++) {
475342dc1cfSSong Gao         muls64(&l, &h, Vj->D(i), Vk->D(i));
476342dc1cfSSong Gao         Vd->D(i) = h;
477342dc1cfSSong Gao     }
478cd1c49adSSong Gao }
479cd1c49adSSong Gao 
480cd1c49adSSong Gao DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
481cd1c49adSSong Gao DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
482cd1c49adSSong Gao DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
483cd1c49adSSong Gao 
484342dc1cfSSong Gao void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc)
485cd1c49adSSong Gao {
486342dc1cfSSong Gao     int i;
487342dc1cfSSong Gao     uint64_t l, h;
488cd1c49adSSong Gao     VReg *Vd = (VReg *)vd;
489cd1c49adSSong Gao     VReg *Vj = (VReg *)vj;
490cd1c49adSSong Gao     VReg *Vk = (VReg *)vk;
491342dc1cfSSong Gao     int oprsz = simd_oprsz(desc);
492cd1c49adSSong Gao 
493342dc1cfSSong Gao     for (i = 0; i < oprsz / 8; i++) {
494342dc1cfSSong Gao         mulu64(&l, &h, Vj->D(i), Vk->D(i));
495342dc1cfSSong Gao         Vd->D(i) = h;
496342dc1cfSSong Gao     }
497cd1c49adSSong Gao }
498cd1c49adSSong Gao 
499cd1c49adSSong Gao DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
500cd1c49adSSong Gao DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
501cd1c49adSSong Gao DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
502cd1c49adSSong Gao 
503cd1c49adSSong Gao #define DO_MUL(a, b) (a * b)
504cd1c49adSSong Gao 
505cd1c49adSSong Gao DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
506cd1c49adSSong Gao DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
507cd1c49adSSong Gao DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
508cd1c49adSSong Gao 
509cd1c49adSSong Gao DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
510cd1c49adSSong Gao DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
511cd1c49adSSong Gao DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
512cd1c49adSSong Gao 
513cd1c49adSSong Gao DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
514cd1c49adSSong Gao DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
515cd1c49adSSong Gao DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
516cd1c49adSSong Gao 
517cd1c49adSSong Gao DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
518cd1c49adSSong Gao DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
519cd1c49adSSong Gao DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
520cd1c49adSSong Gao 
521cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
522cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
523cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
524cd1c49adSSong Gao 
525cd1c49adSSong Gao DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
526cd1c49adSSong Gao DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
527cd1c49adSSong Gao DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
528d3aec65bSSong Gao 
529d3aec65bSSong Gao #define DO_MADD(a, b, c)  (a + b * c)
530d3aec65bSSong Gao #define DO_MSUB(a, b, c)  (a - b * c)
531d3aec65bSSong Gao 
532d3aec65bSSong Gao #define VMADDSUB(NAME, BIT, E, DO_OP)                          \
5333f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
534d3aec65bSSong Gao {                                                              \
535d3aec65bSSong Gao     int i;                                                     \
536d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                     \
537d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                     \
538d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                     \
5393f450c17SSong Gao     int oprsz = simd_oprsz(desc);                              \
5403f450c17SSong Gao                                                                \
5413f450c17SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
542d3aec65bSSong Gao         Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i));        \
543d3aec65bSSong Gao     }                                                          \
544d3aec65bSSong Gao }
545d3aec65bSSong Gao 
546d3aec65bSSong Gao VMADDSUB(vmadd_b, 8, B, DO_MADD)
547d3aec65bSSong Gao VMADDSUB(vmadd_h, 16, H, DO_MADD)
548d3aec65bSSong Gao VMADDSUB(vmadd_w, 32, W, DO_MADD)
549d3aec65bSSong Gao VMADDSUB(vmadd_d, 64, D, DO_MADD)
550d3aec65bSSong Gao VMADDSUB(vmsub_b, 8, B, DO_MSUB)
551d3aec65bSSong Gao VMADDSUB(vmsub_h, 16, H, DO_MSUB)
552d3aec65bSSong Gao VMADDSUB(vmsub_w, 32, W, DO_MSUB)
553d3aec65bSSong Gao VMADDSUB(vmsub_d, 64, D, DO_MSUB)
554d3aec65bSSong Gao 
555d3aec65bSSong Gao #define VMADDWEV(NAME, BIT, E1, E2, DO_OP)                        \
5563f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)    \
557d3aec65bSSong Gao {                                                                 \
558d3aec65bSSong Gao     int i;                                                        \
559d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                        \
560d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                        \
561d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                        \
562d3aec65bSSong Gao     typedef __typeof(Vd->E1(0)) TD;                               \
5633f450c17SSong Gao     int oprsz = simd_oprsz(desc);                                 \
564d3aec65bSSong Gao                                                                   \
5653f450c17SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                     \
566d3aec65bSSong Gao         Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
567d3aec65bSSong Gao     }                                                             \
568d3aec65bSSong Gao }
569d3aec65bSSong Gao 
570d3aec65bSSong Gao VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
571d3aec65bSSong Gao VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
572d3aec65bSSong Gao VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
573d3aec65bSSong Gao VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
574d3aec65bSSong Gao VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
575d3aec65bSSong Gao VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
576d3aec65bSSong Gao 
577d3aec65bSSong Gao #define VMADDWOD(NAME, BIT, E1, E2, DO_OP)                     \
5783f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
579d3aec65bSSong Gao {                                                              \
580d3aec65bSSong Gao     int i;                                                     \
581d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                     \
582d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                     \
583d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                     \
584d3aec65bSSong Gao     typedef __typeof(Vd->E1(0)) TD;                            \
5853f450c17SSong Gao     int oprsz = simd_oprsz(desc);                              \
586d3aec65bSSong Gao                                                                \
5873f450c17SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
588d3aec65bSSong Gao         Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1),              \
589d3aec65bSSong Gao                            (TD)Vk->E2(2 * i + 1));             \
590d3aec65bSSong Gao     }                                                          \
591d3aec65bSSong Gao }
592d3aec65bSSong Gao 
593d3aec65bSSong Gao VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
594d3aec65bSSong Gao VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
595d3aec65bSSong Gao VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
596d3aec65bSSong Gao VMADDWOD(vmaddwod_h_bu, 16,  UH, UB, DO_MUL)
597d3aec65bSSong Gao VMADDWOD(vmaddwod_w_hu, 32,  UW, UH, DO_MUL)
598d3aec65bSSong Gao VMADDWOD(vmaddwod_d_wu, 64,  UD, UW, DO_MUL)
599d3aec65bSSong Gao 
600d3aec65bSSong Gao #define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)     \
6013f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
602d3aec65bSSong Gao {                                                              \
603d3aec65bSSong Gao     int i;                                                     \
604d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                     \
605d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                     \
606d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                     \
607d3aec65bSSong Gao     typedef __typeof(Vd->ES1(0)) TS1;                          \
608d3aec65bSSong Gao     typedef __typeof(Vd->EU1(0)) TU1;                          \
6093f450c17SSong Gao     int oprsz = simd_oprsz(desc);                              \
610d3aec65bSSong Gao                                                                \
6113f450c17SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
612d3aec65bSSong Gao         Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i),               \
613d3aec65bSSong Gao                             (TS1)Vk->ES2(2 * i));              \
614d3aec65bSSong Gao     }                                                          \
615d3aec65bSSong Gao }
616d3aec65bSSong Gao 
617d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
618d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
619d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
620d3aec65bSSong Gao 
621d3aec65bSSong Gao #define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)     \
6223f450c17SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
623d3aec65bSSong Gao {                                                              \
624d3aec65bSSong Gao     int i;                                                     \
625d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                     \
626d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                     \
627d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                     \
628d3aec65bSSong Gao     typedef __typeof(Vd->ES1(0)) TS1;                          \
629d3aec65bSSong Gao     typedef __typeof(Vd->EU1(0)) TU1;                          \
6303f450c17SSong Gao     int oprsz = simd_oprsz(desc);                              \
631d3aec65bSSong Gao                                                                \
6323f450c17SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
633d3aec65bSSong Gao         Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1),           \
634d3aec65bSSong Gao                             (TS1)Vk->ES2(2 * i + 1));          \
635d3aec65bSSong Gao     }                                                          \
636d3aec65bSSong Gao }
637d3aec65bSSong Gao 
638d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
639d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
640d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
6414cc4c0f7SSong Gao 
6424cc4c0f7SSong Gao #define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
6434cc4c0f7SSong Gao #define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
6444cc4c0f7SSong Gao #define DO_DIV(N, M)  (unlikely(M == 0) ? 0 :\
6454cc4c0f7SSong Gao         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
6464cc4c0f7SSong Gao #define DO_REM(N, M)  (unlikely(M == 0) ? 0 :\
6474cc4c0f7SSong Gao         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
6484cc4c0f7SSong Gao 
6494cc4c0f7SSong Gao #define VDIV(NAME, BIT, E, DO_OP)                              \
65004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
6514cc4c0f7SSong Gao {                                                              \
6524cc4c0f7SSong Gao     int i;                                                     \
65304711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
65404711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
65504711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
656abb693deSSong Gao     int oprsz = simd_oprsz(desc);                              \
657abb693deSSong Gao                                                                \
658abb693deSSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                  \
6594cc4c0f7SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));                  \
6604cc4c0f7SSong Gao     }                                                          \
6614cc4c0f7SSong Gao }
6624cc4c0f7SSong Gao 
6634cc4c0f7SSong Gao VDIV(vdiv_b, 8, B, DO_DIV)
6644cc4c0f7SSong Gao VDIV(vdiv_h, 16, H, DO_DIV)
6654cc4c0f7SSong Gao VDIV(vdiv_w, 32, W, DO_DIV)
6664cc4c0f7SSong Gao VDIV(vdiv_d, 64, D, DO_DIV)
6674cc4c0f7SSong Gao VDIV(vdiv_bu, 8, UB, DO_DIVU)
6684cc4c0f7SSong Gao VDIV(vdiv_hu, 16, UH, DO_DIVU)
6694cc4c0f7SSong Gao VDIV(vdiv_wu, 32, UW, DO_DIVU)
6704cc4c0f7SSong Gao VDIV(vdiv_du, 64, UD, DO_DIVU)
6714cc4c0f7SSong Gao VDIV(vmod_b, 8, B, DO_REM)
6724cc4c0f7SSong Gao VDIV(vmod_h, 16, H, DO_REM)
6734cc4c0f7SSong Gao VDIV(vmod_w, 32, W, DO_REM)
6744cc4c0f7SSong Gao VDIV(vmod_d, 64, D, DO_REM)
6754cc4c0f7SSong Gao VDIV(vmod_bu, 8, UB, DO_REMU)
6764cc4c0f7SSong Gao VDIV(vmod_hu, 16, UH, DO_REMU)
6774cc4c0f7SSong Gao VDIV(vmod_wu, 32, UW, DO_REMU)
6784cc4c0f7SSong Gao VDIV(vmod_du, 64, UD, DO_REMU)
679cbe44190SSong Gao 
680cbe44190SSong Gao #define VSAT_S(NAME, BIT, E)                                       \
681e5c7f031SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
682cbe44190SSong Gao {                                                                  \
683cbe44190SSong Gao     int i;                                                         \
684cbe44190SSong Gao     VReg *Vd = (VReg *)vd;                                         \
685cbe44190SSong Gao     VReg *Vj = (VReg *)vj;                                         \
686cbe44190SSong Gao     typedef __typeof(Vd->E(0)) TD;                                 \
687e5c7f031SSong Gao     int oprsz = simd_oprsz(desc);                                  \
688cbe44190SSong Gao                                                                    \
689e5c7f031SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
690cbe44190SSong Gao         Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max :                  \
691cbe44190SSong Gao                    Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i);       \
692cbe44190SSong Gao     }                                                              \
693cbe44190SSong Gao }
694cbe44190SSong Gao 
695cbe44190SSong Gao VSAT_S(vsat_b, 8, B)
696cbe44190SSong Gao VSAT_S(vsat_h, 16, H)
697cbe44190SSong Gao VSAT_S(vsat_w, 32, W)
698cbe44190SSong Gao VSAT_S(vsat_d, 64, D)
699cbe44190SSong Gao 
700cbe44190SSong Gao #define VSAT_U(NAME, BIT, E)                                       \
701e5c7f031SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
702cbe44190SSong Gao {                                                                  \
703cbe44190SSong Gao     int i;                                                         \
704cbe44190SSong Gao     VReg *Vd = (VReg *)vd;                                         \
705cbe44190SSong Gao     VReg *Vj = (VReg *)vj;                                         \
706cbe44190SSong Gao     typedef __typeof(Vd->E(0)) TD;                                 \
707e5c7f031SSong Gao     int oprsz = simd_oprsz(desc);                                  \
708cbe44190SSong Gao                                                                    \
709e5c7f031SSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {                      \
710cbe44190SSong Gao         Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i);        \
711cbe44190SSong Gao     }                                                              \
712cbe44190SSong Gao }
713cbe44190SSong Gao 
714cbe44190SSong Gao VSAT_U(vsat_bu, 8, UB)
715cbe44190SSong Gao VSAT_U(vsat_hu, 16, UH)
716cbe44190SSong Gao VSAT_U(vsat_wu, 32, UW)
717cbe44190SSong Gao VSAT_U(vsat_du, 64, UD)
7183734ad93SSong Gao 
7193734ad93SSong Gao #define VEXTH(NAME, BIT, E1, E2)                                 \
720ff27e335SSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc)             \
7213734ad93SSong Gao {                                                                \
722f0db0bebSSong Gao     int i, j, ofs;                                               \
723ff27e335SSong Gao     VReg *Vd = (VReg *)vd;                                       \
724ff27e335SSong Gao     VReg *Vj = (VReg *)vj;                                       \
725f0db0bebSSong Gao     int oprsz = simd_oprsz(desc);                                \
7263734ad93SSong Gao                                                                  \
727f0db0bebSSong Gao     ofs = LSX_LEN / BIT;                                         \
728f0db0bebSSong Gao     for (i = 0; i < oprsz / 16; i++) {                           \
729f0db0bebSSong Gao         for (j = 0; j < ofs; j++) {                              \
730f0db0bebSSong Gao             Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
731f0db0bebSSong Gao         }                                                        \
7323734ad93SSong Gao     }                                                            \
7333734ad93SSong Gao }
7343734ad93SSong Gao 
735ff27e335SSong Gao void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
7363734ad93SSong Gao {
737f0db0bebSSong Gao     int i;
738ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
739ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
740f0db0bebSSong Gao     int oprsz = simd_oprsz(desc);
7413734ad93SSong Gao 
742f0db0bebSSong Gao     for (i = 0; i < oprsz / 16; i++) {
743f0db0bebSSong Gao         Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1));
744f0db0bebSSong Gao     }
7453734ad93SSong Gao }
7463734ad93SSong Gao 
747ff27e335SSong Gao void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
7483734ad93SSong Gao {
749f0db0bebSSong Gao     int i;
750ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
751ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
752f0db0bebSSong Gao     int oprsz = simd_oprsz(desc);
7533734ad93SSong Gao 
754f0db0bebSSong Gao     for (i = 0; i < oprsz / 16; i++) {
755f0db0bebSSong Gao         Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1));
756f0db0bebSSong Gao     }
7573734ad93SSong Gao }
7583734ad93SSong Gao 
7593734ad93SSong Gao VEXTH(vexth_h_b, 16, H, B)
7603734ad93SSong Gao VEXTH(vexth_w_h, 32, W, H)
7613734ad93SSong Gao VEXTH(vexth_d_w, 64, D, W)
7623734ad93SSong Gao VEXTH(vexth_hu_bu, 16, UH, UB)
7633734ad93SSong Gao VEXTH(vexth_wu_hu, 32, UW, UH)
7643734ad93SSong Gao VEXTH(vexth_du_wu, 64, UD, UW)
765f0e395dfSSong Gao 
766790acb2aSSong Gao #define VEXT2XV(NAME, BIT, E1, E2)                   \
767790acb2aSSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
768790acb2aSSong Gao {                                                    \
769790acb2aSSong Gao     int i;                                           \
770790acb2aSSong Gao     VReg temp = {};                                  \
771790acb2aSSong Gao     VReg *Vd = (VReg *)vd;                           \
772790acb2aSSong Gao     VReg *Vj = (VReg *)vj;                           \
773790acb2aSSong Gao     int oprsz = simd_oprsz(desc);                    \
774790acb2aSSong Gao                                                      \
775790acb2aSSong Gao     for (i = 0; i < oprsz / (BIT / 8); i++) {        \
776790acb2aSSong Gao         temp.E1(i) = Vj->E2(i);                      \
777790acb2aSSong Gao     }                                                \
778790acb2aSSong Gao     *Vd = temp;                                      \
779790acb2aSSong Gao }
780790acb2aSSong Gao 
781790acb2aSSong Gao VEXT2XV(vext2xv_h_b, 16, H, B)
782790acb2aSSong Gao VEXT2XV(vext2xv_w_b, 32, W, B)
783790acb2aSSong Gao VEXT2XV(vext2xv_d_b, 64, D, B)
784790acb2aSSong Gao VEXT2XV(vext2xv_w_h, 32, W, H)
785790acb2aSSong Gao VEXT2XV(vext2xv_d_h, 64, D, H)
786790acb2aSSong Gao VEXT2XV(vext2xv_d_w, 64, D, W)
787790acb2aSSong Gao VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
788790acb2aSSong Gao VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
789790acb2aSSong Gao VEXT2XV(vext2xv_du_bu, 64, UD, UB)
790790acb2aSSong Gao VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
791790acb2aSSong Gao VEXT2XV(vext2xv_du_hu, 64, UD, UH)
792790acb2aSSong Gao VEXT2XV(vext2xv_du_wu, 64, UD, UW)
793790acb2aSSong Gao 
794f0e395dfSSong Gao #define DO_SIGNCOV(a, b)  (a == 0 ? 0 : a < 0 ? -b : b)
795f0e395dfSSong Gao 
796f0e395dfSSong Gao DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
797f0e395dfSSong Gao DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
798f0e395dfSSong Gao DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
799f0e395dfSSong Gao DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
800789f4a4cSSong Gao 
801789f4a4cSSong Gao static uint64_t do_vmskltz_b(int64_t val)
802789f4a4cSSong Gao {
803789f4a4cSSong Gao     uint64_t m = 0x8080808080808080ULL;
804789f4a4cSSong Gao     uint64_t c =  val & m;
805789f4a4cSSong Gao     c |= c << 7;
806789f4a4cSSong Gao     c |= c << 14;
807789f4a4cSSong Gao     c |= c << 28;
808789f4a4cSSong Gao     return c >> 56;
809789f4a4cSSong Gao }
810789f4a4cSSong Gao 
811ff27e335SSong Gao void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
812789f4a4cSSong Gao {
813789f4a4cSSong Gao     uint16_t temp = 0;
814ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
815ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
816789f4a4cSSong Gao 
817789f4a4cSSong Gao     temp = do_vmskltz_b(Vj->D(0));
818789f4a4cSSong Gao     temp |= (do_vmskltz_b(Vj->D(1)) << 8);
819789f4a4cSSong Gao     Vd->D(0) = temp;
820789f4a4cSSong Gao     Vd->D(1) = 0;
821789f4a4cSSong Gao }
822789f4a4cSSong Gao 
823789f4a4cSSong Gao static uint64_t do_vmskltz_h(int64_t val)
824789f4a4cSSong Gao {
825789f4a4cSSong Gao     uint64_t m = 0x8000800080008000ULL;
826789f4a4cSSong Gao     uint64_t c =  val & m;
827789f4a4cSSong Gao     c |= c << 15;
828789f4a4cSSong Gao     c |= c << 30;
829789f4a4cSSong Gao     return c >> 60;
830789f4a4cSSong Gao }
831789f4a4cSSong Gao 
832ff27e335SSong Gao void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
833789f4a4cSSong Gao {
834789f4a4cSSong Gao     uint16_t temp = 0;
835ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
836ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
837789f4a4cSSong Gao 
838789f4a4cSSong Gao     temp = do_vmskltz_h(Vj->D(0));
839789f4a4cSSong Gao     temp |= (do_vmskltz_h(Vj->D(1)) << 4);
840789f4a4cSSong Gao     Vd->D(0) = temp;
841789f4a4cSSong Gao     Vd->D(1) = 0;
842789f4a4cSSong Gao }
843789f4a4cSSong Gao 
844789f4a4cSSong Gao static uint64_t do_vmskltz_w(int64_t val)
845789f4a4cSSong Gao {
846789f4a4cSSong Gao     uint64_t m = 0x8000000080000000ULL;
847789f4a4cSSong Gao     uint64_t c =  val & m;
848789f4a4cSSong Gao     c |= c << 31;
849789f4a4cSSong Gao     return c >> 62;
850789f4a4cSSong Gao }
851789f4a4cSSong Gao 
852ff27e335SSong Gao void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
853789f4a4cSSong Gao {
854789f4a4cSSong Gao     uint16_t temp = 0;
855ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
856ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
857789f4a4cSSong Gao 
858789f4a4cSSong Gao     temp = do_vmskltz_w(Vj->D(0));
859789f4a4cSSong Gao     temp |= (do_vmskltz_w(Vj->D(1)) << 2);
860789f4a4cSSong Gao     Vd->D(0) = temp;
861789f4a4cSSong Gao     Vd->D(1) = 0;
862789f4a4cSSong Gao }
863789f4a4cSSong Gao 
864789f4a4cSSong Gao static uint64_t do_vmskltz_d(int64_t val)
865789f4a4cSSong Gao {
866789f4a4cSSong Gao     return (uint64_t)val >> 63;
867789f4a4cSSong Gao }
868ff27e335SSong Gao void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
869789f4a4cSSong Gao {
870789f4a4cSSong Gao     uint16_t temp = 0;
871ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
872ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
873789f4a4cSSong Gao 
874789f4a4cSSong Gao     temp = do_vmskltz_d(Vj->D(0));
875789f4a4cSSong Gao     temp |= (do_vmskltz_d(Vj->D(1)) << 1);
876789f4a4cSSong Gao     Vd->D(0) = temp;
877789f4a4cSSong Gao     Vd->D(1) = 0;
878789f4a4cSSong Gao }
879789f4a4cSSong Gao 
880ff27e335SSong Gao void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
881789f4a4cSSong Gao {
882789f4a4cSSong Gao     uint16_t temp = 0;
883ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
884ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
885789f4a4cSSong Gao 
886789f4a4cSSong Gao     temp =  do_vmskltz_b(Vj->D(0));
887789f4a4cSSong Gao     temp |= (do_vmskltz_b(Vj->D(1)) << 8);
888789f4a4cSSong Gao     Vd->D(0) = (uint16_t)(~temp);
889789f4a4cSSong Gao     Vd->D(1) = 0;
890789f4a4cSSong Gao }
891789f4a4cSSong Gao 
892789f4a4cSSong Gao static uint64_t do_vmskez_b(uint64_t a)
893789f4a4cSSong Gao {
894789f4a4cSSong Gao     uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
895789f4a4cSSong Gao     uint64_t c = ~(((a & m) + m) | a | m);
896789f4a4cSSong Gao     c |= c << 7;
897789f4a4cSSong Gao     c |= c << 14;
898789f4a4cSSong Gao     c |= c << 28;
899789f4a4cSSong Gao     return c >> 56;
900789f4a4cSSong Gao }
901789f4a4cSSong Gao 
902ff27e335SSong Gao void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
903789f4a4cSSong Gao {
904789f4a4cSSong Gao     uint16_t temp = 0;
905ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
906ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
907789f4a4cSSong Gao 
908789f4a4cSSong Gao     temp = do_vmskez_b(Vj->D(0));
909789f4a4cSSong Gao     temp |= (do_vmskez_b(Vj->D(1)) << 8);
910789f4a4cSSong Gao     Vd->D(0) = (uint16_t)(~temp);
911789f4a4cSSong Gao     Vd->D(1) = 0;
912789f4a4cSSong Gao }
913f205a539SSong Gao 
914f205a539SSong Gao void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
915f205a539SSong Gao {
916f205a539SSong Gao     int i;
917f205a539SSong Gao     VReg *Vd = (VReg *)vd;
918f205a539SSong Gao     VReg *Vj = (VReg *)vj;
919f205a539SSong Gao 
920f205a539SSong Gao     for (i = 0; i < LSX_LEN/8; i++) {
921f205a539SSong Gao         Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
922f205a539SSong Gao     }
923f205a539SSong Gao }
9249b21a7a5SSong Gao 
9259b21a7a5SSong Gao #define VSLLWIL(NAME, BIT, E1, E2)                                 \
926329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
9279b21a7a5SSong Gao {                                                                  \
9289b21a7a5SSong Gao     int i;                                                         \
9299b21a7a5SSong Gao     VReg temp;                                                     \
930329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
931329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
9329b21a7a5SSong Gao     typedef __typeof(temp.E1(0)) TD;                               \
9339b21a7a5SSong Gao                                                                    \
9349b21a7a5SSong Gao     temp.D(0) = 0;                                                 \
9359b21a7a5SSong Gao     temp.D(1) = 0;                                                 \
9369b21a7a5SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                            \
9379b21a7a5SSong Gao         temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT);                 \
9389b21a7a5SSong Gao     }                                                              \
9399b21a7a5SSong Gao     *Vd = temp;                                                    \
9409b21a7a5SSong Gao }
9419b21a7a5SSong Gao 
942ff27e335SSong Gao void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
9439b21a7a5SSong Gao {
944ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
945ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
9469b21a7a5SSong Gao 
9479b21a7a5SSong Gao     Vd->Q(0) = int128_makes64(Vj->D(0));
9489b21a7a5SSong Gao }
9499b21a7a5SSong Gao 
950ff27e335SSong Gao void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
9519b21a7a5SSong Gao {
952ff27e335SSong Gao     VReg *Vd = (VReg *)vd;
953ff27e335SSong Gao     VReg *Vj = (VReg *)vj;
9549b21a7a5SSong Gao 
9559b21a7a5SSong Gao     Vd->Q(0) = int128_make64(Vj->D(0));
9569b21a7a5SSong Gao }
9579b21a7a5SSong Gao 
9589b21a7a5SSong Gao VSLLWIL(vsllwil_h_b, 16, H, B)
9599b21a7a5SSong Gao VSLLWIL(vsllwil_w_h, 32, W, H)
9609b21a7a5SSong Gao VSLLWIL(vsllwil_d_w, 64, D, W)
9619b21a7a5SSong Gao VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
9629b21a7a5SSong Gao VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
9639b21a7a5SSong Gao VSLLWIL(vsllwil_du_wu, 64, UD, UW)
964ecb93716SSong Gao 
965ecb93716SSong Gao #define do_vsrlr(E, T)                                  \
966ecb93716SSong Gao static T do_vsrlr_ ##E(T s1, int sh)                    \
967ecb93716SSong Gao {                                                       \
968ecb93716SSong Gao     if (sh == 0) {                                      \
969ecb93716SSong Gao         return s1;                                      \
970ecb93716SSong Gao     } else {                                            \
971ecb93716SSong Gao         return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
972ecb93716SSong Gao     }                                                   \
973ecb93716SSong Gao }
974ecb93716SSong Gao 
975ecb93716SSong Gao do_vsrlr(B, uint8_t)
976ecb93716SSong Gao do_vsrlr(H, uint16_t)
977ecb93716SSong Gao do_vsrlr(W, uint32_t)
978ecb93716SSong Gao do_vsrlr(D, uint64_t)
979ecb93716SSong Gao 
980ecb93716SSong Gao #define VSRLR(NAME, BIT, T, E)                                  \
98104711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)  \
982ecb93716SSong Gao {                                                               \
983ecb93716SSong Gao     int i;                                                      \
98404711da1SSong Gao     VReg *Vd = (VReg *)vd;                                      \
98504711da1SSong Gao     VReg *Vj = (VReg *)vj;                                      \
98604711da1SSong Gao     VReg *Vk = (VReg *)vk;                                      \
987ecb93716SSong Gao                                                                 \
988ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
989ecb93716SSong Gao         Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
990ecb93716SSong Gao     }                                                           \
991ecb93716SSong Gao }
992ecb93716SSong Gao 
993ecb93716SSong Gao VSRLR(vsrlr_b, 8,  uint8_t, B)
994ecb93716SSong Gao VSRLR(vsrlr_h, 16, uint16_t, H)
995ecb93716SSong Gao VSRLR(vsrlr_w, 32, uint32_t, W)
996ecb93716SSong Gao VSRLR(vsrlr_d, 64, uint64_t, D)
997ecb93716SSong Gao 
998ecb93716SSong Gao #define VSRLRI(NAME, BIT, E)                                       \
999329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
1000ecb93716SSong Gao {                                                                  \
1001ecb93716SSong Gao     int i;                                                         \
1002329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
1003329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
1004ecb93716SSong Gao                                                                    \
1005ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                            \
1006ecb93716SSong Gao         Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm);                  \
1007ecb93716SSong Gao     }                                                              \
1008ecb93716SSong Gao }
1009ecb93716SSong Gao 
1010ecb93716SSong Gao VSRLRI(vsrlri_b, 8, B)
1011ecb93716SSong Gao VSRLRI(vsrlri_h, 16, H)
1012ecb93716SSong Gao VSRLRI(vsrlri_w, 32, W)
1013ecb93716SSong Gao VSRLRI(vsrlri_d, 64, D)
1014ecb93716SSong Gao 
1015ecb93716SSong Gao #define do_vsrar(E, T)                                  \
1016ecb93716SSong Gao static T do_vsrar_ ##E(T s1, int sh)                    \
1017ecb93716SSong Gao {                                                       \
1018ecb93716SSong Gao     if (sh == 0) {                                      \
1019ecb93716SSong Gao         return s1;                                      \
1020ecb93716SSong Gao     } else {                                            \
1021ecb93716SSong Gao         return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
1022ecb93716SSong Gao     }                                                   \
1023ecb93716SSong Gao }
1024ecb93716SSong Gao 
1025ecb93716SSong Gao do_vsrar(B, int8_t)
1026ecb93716SSong Gao do_vsrar(H, int16_t)
1027ecb93716SSong Gao do_vsrar(W, int32_t)
1028ecb93716SSong Gao do_vsrar(D, int64_t)
1029ecb93716SSong Gao 
1030ecb93716SSong Gao #define VSRAR(NAME, BIT, T, E)                                  \
103104711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)  \
1032ecb93716SSong Gao {                                                               \
1033ecb93716SSong Gao     int i;                                                      \
103404711da1SSong Gao     VReg *Vd = (VReg *)vd;                                      \
103504711da1SSong Gao     VReg *Vj = (VReg *)vj;                                      \
103604711da1SSong Gao     VReg *Vk = (VReg *)vk;                                      \
1037ecb93716SSong Gao                                                                 \
1038ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
1039ecb93716SSong Gao         Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
1040ecb93716SSong Gao     }                                                           \
1041ecb93716SSong Gao }
1042ecb93716SSong Gao 
1043ecb93716SSong Gao VSRAR(vsrar_b, 8,  uint8_t, B)
1044ecb93716SSong Gao VSRAR(vsrar_h, 16, uint16_t, H)
1045ecb93716SSong Gao VSRAR(vsrar_w, 32, uint32_t, W)
1046ecb93716SSong Gao VSRAR(vsrar_d, 64, uint64_t, D)
1047ecb93716SSong Gao 
1048ecb93716SSong Gao #define VSRARI(NAME, BIT, E)                                       \
1049329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
1050ecb93716SSong Gao {                                                                  \
1051ecb93716SSong Gao     int i;                                                         \
1052329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
1053329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
1054ecb93716SSong Gao                                                                    \
1055ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                            \
1056ecb93716SSong Gao         Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm);                  \
1057ecb93716SSong Gao     }                                                              \
1058ecb93716SSong Gao }
1059ecb93716SSong Gao 
1060ecb93716SSong Gao VSRARI(vsrari_b, 8, B)
1061ecb93716SSong Gao VSRARI(vsrari_h, 16, H)
1062ecb93716SSong Gao VSRARI(vsrari_w, 32, W)
1063ecb93716SSong Gao VSRARI(vsrari_d, 64, D)
1064d79fb8ddSSong Gao 
1065d79fb8ddSSong Gao #define R_SHIFT(a, b) (a >> b)
1066d79fb8ddSSong Gao 
1067d79fb8ddSSong Gao #define VSRLN(NAME, BIT, T, E1, E2)                             \
106804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)  \
1069d79fb8ddSSong Gao {                                                               \
1070d79fb8ddSSong Gao     int i;                                                      \
107104711da1SSong Gao     VReg *Vd = (VReg *)vd;                                      \
107204711da1SSong Gao     VReg *Vj = (VReg *)vj;                                      \
107304711da1SSong Gao     VReg *Vk = (VReg *)vk;                                      \
1074d79fb8ddSSong Gao                                                                 \
1075d79fb8ddSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
1076d79fb8ddSSong Gao         Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
1077d79fb8ddSSong Gao     }                                                           \
1078d79fb8ddSSong Gao     Vd->D(1) = 0;                                               \
1079d79fb8ddSSong Gao }
1080d79fb8ddSSong Gao 
1081d79fb8ddSSong Gao VSRLN(vsrln_b_h, 16, uint16_t, B, H)
1082d79fb8ddSSong Gao VSRLN(vsrln_h_w, 32, uint32_t, H, W)
1083d79fb8ddSSong Gao VSRLN(vsrln_w_d, 64, uint64_t, W, D)
1084d79fb8ddSSong Gao 
1085d79fb8ddSSong Gao #define VSRAN(NAME, BIT, T, E1, E2)                            \
108604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
1087d79fb8ddSSong Gao {                                                              \
1088d79fb8ddSSong Gao     int i;                                                     \
108904711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
109004711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
109104711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
1092d79fb8ddSSong Gao                                                                \
1093d79fb8ddSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
1094d79fb8ddSSong Gao         Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT);  \
1095d79fb8ddSSong Gao     }                                                          \
1096d79fb8ddSSong Gao     Vd->D(1) = 0;                                              \
1097d79fb8ddSSong Gao }
1098d79fb8ddSSong Gao 
1099d79fb8ddSSong Gao VSRAN(vsran_b_h, 16, uint16_t, B, H)
1100d79fb8ddSSong Gao VSRAN(vsran_h_w, 32, uint32_t, H, W)
1101d79fb8ddSSong Gao VSRAN(vsran_w_d, 64, uint64_t, W, D)
1102d79fb8ddSSong Gao 
1103d79fb8ddSSong Gao #define VSRLNI(NAME, BIT, T, E1, E2)                               \
1104329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
1105d79fb8ddSSong Gao {                                                                  \
1106d79fb8ddSSong Gao     int i, max;                                                    \
1107d79fb8ddSSong Gao     VReg temp;                                                     \
1108329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
1109329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
1110d79fb8ddSSong Gao                                                                    \
1111d79fb8ddSSong Gao     temp.D(0) = 0;                                                 \
1112d79fb8ddSSong Gao     temp.D(1) = 0;                                                 \
1113d79fb8ddSSong Gao     max = LSX_LEN/BIT;                                             \
1114d79fb8ddSSong Gao     for (i = 0; i < max; i++) {                                    \
1115d79fb8ddSSong Gao         temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm);                   \
1116d79fb8ddSSong Gao         temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm);             \
1117d79fb8ddSSong Gao     }                                                              \
1118d79fb8ddSSong Gao     *Vd = temp;                                                    \
1119d79fb8ddSSong Gao }
1120d79fb8ddSSong Gao 
1121329517d5SSong Gao void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
1122d79fb8ddSSong Gao {
1123d79fb8ddSSong Gao     VReg temp;
1124329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1125329517d5SSong Gao     VReg *Vj = (VReg *)vj;
1126d79fb8ddSSong Gao 
1127d79fb8ddSSong Gao     temp.D(0) = 0;
1128d79fb8ddSSong Gao     temp.D(1) = 0;
1129d79fb8ddSSong Gao     temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128));
1130d79fb8ddSSong Gao     temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128));
1131d79fb8ddSSong Gao     *Vd = temp;
1132d79fb8ddSSong Gao }
1133d79fb8ddSSong Gao 
1134d79fb8ddSSong Gao VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
1135d79fb8ddSSong Gao VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
1136d79fb8ddSSong Gao VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
1137d79fb8ddSSong Gao 
1138d79fb8ddSSong Gao #define VSRANI(NAME, BIT, E1, E2)                                  \
1139329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
1140d79fb8ddSSong Gao {                                                                  \
1141d79fb8ddSSong Gao     int i, max;                                                    \
1142d79fb8ddSSong Gao     VReg temp;                                                     \
1143329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
1144329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
1145d79fb8ddSSong Gao                                                                    \
1146d79fb8ddSSong Gao     temp.D(0) = 0;                                                 \
1147d79fb8ddSSong Gao     temp.D(1) = 0;                                                 \
1148d79fb8ddSSong Gao     max = LSX_LEN/BIT;                                             \
1149d79fb8ddSSong Gao     for (i = 0; i < max; i++) {                                    \
1150d79fb8ddSSong Gao         temp.E1(i) = R_SHIFT(Vj->E2(i), imm);                      \
1151d79fb8ddSSong Gao         temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm);                \
1152d79fb8ddSSong Gao     }                                                              \
1153d79fb8ddSSong Gao     *Vd = temp;                                                    \
1154d79fb8ddSSong Gao }
1155d79fb8ddSSong Gao 
1156329517d5SSong Gao void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
1157d79fb8ddSSong Gao {
1158d79fb8ddSSong Gao     VReg temp;
1159329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1160329517d5SSong Gao     VReg *Vj = (VReg *)vj;
1161d79fb8ddSSong Gao 
1162d79fb8ddSSong Gao     temp.D(0) = 0;
1163d79fb8ddSSong Gao     temp.D(1) = 0;
1164d79fb8ddSSong Gao     temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128));
1165d79fb8ddSSong Gao     temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128));
1166d79fb8ddSSong Gao     *Vd = temp;
1167d79fb8ddSSong Gao }
1168d79fb8ddSSong Gao 
1169d79fb8ddSSong Gao VSRANI(vsrani_b_h, 16, B, H)
1170d79fb8ddSSong Gao VSRANI(vsrani_h_w, 32, H, W)
1171d79fb8ddSSong Gao VSRANI(vsrani_w_d, 64, W, D)
1172a5200a17SSong Gao 
1173a5200a17SSong Gao #define VSRLRN(NAME, BIT, T, E1, E2)                                \
117404711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)      \
1175a5200a17SSong Gao {                                                                   \
1176a5200a17SSong Gao     int i;                                                          \
117704711da1SSong Gao     VReg *Vd = (VReg *)vd;                                          \
117804711da1SSong Gao     VReg *Vj = (VReg *)vj;                                          \
117904711da1SSong Gao     VReg *Vk = (VReg *)vk;                                          \
1180a5200a17SSong Gao                                                                     \
1181a5200a17SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
1182a5200a17SSong Gao         Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
1183a5200a17SSong Gao     }                                                               \
1184a5200a17SSong Gao     Vd->D(1) = 0;                                                   \
1185a5200a17SSong Gao }
1186a5200a17SSong Gao 
1187a5200a17SSong Gao VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H)
1188a5200a17SSong Gao VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
1189a5200a17SSong Gao VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
1190a5200a17SSong Gao 
1191a5200a17SSong Gao #define VSRARN(NAME, BIT, T, E1, E2)                                \
119204711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)      \
1193a5200a17SSong Gao {                                                                   \
1194a5200a17SSong Gao     int i;                                                          \
119504711da1SSong Gao     VReg *Vd = (VReg *)vd;                                          \
119604711da1SSong Gao     VReg *Vj = (VReg *)vj;                                          \
119704711da1SSong Gao     VReg *Vk = (VReg *)vk;                                          \
1198a5200a17SSong Gao                                                                     \
1199a5200a17SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
1200a5200a17SSong Gao         Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
1201a5200a17SSong Gao     }                                                               \
1202a5200a17SSong Gao     Vd->D(1) = 0;                                                   \
1203a5200a17SSong Gao }
1204a5200a17SSong Gao 
1205a5200a17SSong Gao VSRARN(vsrarn_b_h, 16, uint8_t,  B, H)
1206a5200a17SSong Gao VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
1207a5200a17SSong Gao VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
1208a5200a17SSong Gao 
1209a5200a17SSong Gao #define VSRLRNI(NAME, BIT, E1, E2)                                 \
1210329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
1211a5200a17SSong Gao {                                                                  \
1212a5200a17SSong Gao     int i, max;                                                    \
1213a5200a17SSong Gao     VReg temp;                                                     \
1214329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
1215329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
1216a5200a17SSong Gao                                                                    \
1217a5200a17SSong Gao     temp.D(0) = 0;                                                 \
1218a5200a17SSong Gao     temp.D(1) = 0;                                                 \
1219a5200a17SSong Gao     max = LSX_LEN/BIT;                                             \
1220a5200a17SSong Gao     for (i = 0; i < max; i++) {                                    \
1221a5200a17SSong Gao         temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm);              \
1222a5200a17SSong Gao         temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm);        \
1223a5200a17SSong Gao     }                                                              \
1224a5200a17SSong Gao     *Vd = temp;                                                    \
1225a5200a17SSong Gao }
1226a5200a17SSong Gao 
1227329517d5SSong Gao void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
1228a5200a17SSong Gao {
1229a5200a17SSong Gao     VReg temp;
1230329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1231329517d5SSong Gao     VReg *Vj = (VReg *)vj;
1232a5200a17SSong Gao     Int128 r1, r2;
1233a5200a17SSong Gao 
1234a5200a17SSong Gao     if (imm == 0) {
1235a5200a17SSong Gao         temp.D(0) = int128_getlo(Vj->Q(0));
1236a5200a17SSong Gao         temp.D(1) = int128_getlo(Vd->Q(0));
1237a5200a17SSong Gao     } else {
1238a5200a17SSong Gao         r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one());
1239a5200a17SSong Gao         r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one());
1240a5200a17SSong Gao 
1241a5200a17SSong Gao        temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1));
1242a5200a17SSong Gao        temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2));
1243a5200a17SSong Gao     }
1244a5200a17SSong Gao     *Vd = temp;
1245a5200a17SSong Gao }
1246a5200a17SSong Gao 
1247a5200a17SSong Gao VSRLRNI(vsrlrni_b_h, 16, B, H)
1248a5200a17SSong Gao VSRLRNI(vsrlrni_h_w, 32, H, W)
1249a5200a17SSong Gao VSRLRNI(vsrlrni_w_d, 64, W, D)
1250a5200a17SSong Gao 
1251a5200a17SSong Gao #define VSRARNI(NAME, BIT, E1, E2)                                 \
1252329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
1253a5200a17SSong Gao {                                                                  \
1254a5200a17SSong Gao     int i, max;                                                    \
1255a5200a17SSong Gao     VReg temp;                                                     \
1256329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
1257329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
1258a5200a17SSong Gao                                                                    \
1259a5200a17SSong Gao     temp.D(0) = 0;                                                 \
1260a5200a17SSong Gao     temp.D(1) = 0;                                                 \
1261a5200a17SSong Gao     max = LSX_LEN/BIT;                                             \
1262a5200a17SSong Gao     for (i = 0; i < max; i++) {                                    \
1263a5200a17SSong Gao         temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm);              \
1264a5200a17SSong Gao         temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm);        \
1265a5200a17SSong Gao     }                                                              \
1266a5200a17SSong Gao     *Vd = temp;                                                    \
1267a5200a17SSong Gao }
1268a5200a17SSong Gao 
1269329517d5SSong Gao void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
1270a5200a17SSong Gao {
1271a5200a17SSong Gao     VReg temp;
1272329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1273329517d5SSong Gao     VReg *Vj = (VReg *)vj;
1274a5200a17SSong Gao     Int128 r1, r2;
1275a5200a17SSong Gao 
1276a5200a17SSong Gao     if (imm == 0) {
1277a5200a17SSong Gao         temp.D(0) = int128_getlo(Vj->Q(0));
1278a5200a17SSong Gao         temp.D(1) = int128_getlo(Vd->Q(0));
1279a5200a17SSong Gao     } else {
1280a5200a17SSong Gao         r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
1281a5200a17SSong Gao         r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
1282a5200a17SSong Gao 
1283a5200a17SSong Gao        temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1));
1284a5200a17SSong Gao        temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2));
1285a5200a17SSong Gao     }
1286a5200a17SSong Gao     *Vd = temp;
1287a5200a17SSong Gao }
1288a5200a17SSong Gao 
1289a5200a17SSong Gao VSRARNI(vsrarni_b_h, 16, B, H)
1290a5200a17SSong Gao VSRARNI(vsrarni_h_w, 32, H, W)
1291a5200a17SSong Gao VSRARNI(vsrarni_w_d, 64, W, D)
129283b3815dSSong Gao 
129383b3815dSSong Gao #define SSRLNS(NAME, T1, T2, T3)                    \
129483b3815dSSong Gao static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
129583b3815dSSong Gao {                                                   \
129683b3815dSSong Gao         T1 shft_res;                                \
129783b3815dSSong Gao         if (sa == 0) {                              \
129883b3815dSSong Gao             shft_res = e2;                          \
129983b3815dSSong Gao         } else {                                    \
130083b3815dSSong Gao             shft_res = (((T1)e2) >> sa);            \
130183b3815dSSong Gao         }                                           \
130283b3815dSSong Gao         T3 mask;                                    \
130383b3815dSSong Gao         mask = (1ull << sh) -1;                     \
130483b3815dSSong Gao         if (shft_res > mask) {                      \
130583b3815dSSong Gao             return mask;                            \
130683b3815dSSong Gao         } else {                                    \
130783b3815dSSong Gao             return  shft_res;                       \
130883b3815dSSong Gao         }                                           \
130983b3815dSSong Gao }
131083b3815dSSong Gao 
131183b3815dSSong Gao SSRLNS(B, uint16_t, int16_t, uint8_t)
131283b3815dSSong Gao SSRLNS(H, uint32_t, int32_t, uint16_t)
131383b3815dSSong Gao SSRLNS(W, uint64_t, int64_t, uint32_t)
131483b3815dSSong Gao 
131583b3815dSSong Gao #define VSSRLN(NAME, BIT, T, E1, E2)                                          \
131604711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                \
131783b3815dSSong Gao {                                                                             \
131883b3815dSSong Gao     int i;                                                                    \
131904711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                    \
132004711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                    \
132104711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                    \
132283b3815dSSong Gao                                                                               \
132383b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
132483b3815dSSong Gao         Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
132583b3815dSSong Gao     }                                                                         \
132683b3815dSSong Gao     Vd->D(1) = 0;                                                             \
132783b3815dSSong Gao }
132883b3815dSSong Gao 
132983b3815dSSong Gao VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
133083b3815dSSong Gao VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
133183b3815dSSong Gao VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
133283b3815dSSong Gao 
133383b3815dSSong Gao #define SSRANS(E, T1, T2)                        \
133483b3815dSSong Gao static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
133583b3815dSSong Gao {                                                \
133683b3815dSSong Gao         T1 shft_res;                             \
133783b3815dSSong Gao         if (sa == 0) {                           \
133883b3815dSSong Gao             shft_res = e2;                       \
133983b3815dSSong Gao         } else {                                 \
134083b3815dSSong Gao             shft_res = e2 >> sa;                 \
134183b3815dSSong Gao         }                                        \
134283b3815dSSong Gao         T2 mask;                                 \
134383b3815dSSong Gao         mask = (1ll << sh) -1;                   \
134483b3815dSSong Gao         if (shft_res > mask) {                   \
134583b3815dSSong Gao             return  mask;                        \
134683b3815dSSong Gao         } else if (shft_res < -(mask +1)) {      \
134783b3815dSSong Gao             return  ~mask;                       \
134883b3815dSSong Gao         } else {                                 \
134983b3815dSSong Gao             return shft_res;                     \
135083b3815dSSong Gao         }                                        \
135183b3815dSSong Gao }
135283b3815dSSong Gao 
135383b3815dSSong Gao SSRANS(B, int16_t, int8_t)
135483b3815dSSong Gao SSRANS(H, int32_t, int16_t)
135583b3815dSSong Gao SSRANS(W, int64_t, int32_t)
135683b3815dSSong Gao 
135783b3815dSSong Gao #define VSSRAN(NAME, BIT, T, E1, E2)                                         \
135804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)               \
135983b3815dSSong Gao {                                                                            \
136083b3815dSSong Gao     int i;                                                                   \
136104711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                   \
136204711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                   \
136304711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                   \
136483b3815dSSong Gao                                                                              \
136583b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
136683b3815dSSong Gao         Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
136783b3815dSSong Gao     }                                                                        \
136883b3815dSSong Gao     Vd->D(1) = 0;                                                            \
136983b3815dSSong Gao }
137083b3815dSSong Gao 
137183b3815dSSong Gao VSSRAN(vssran_b_h, 16, uint16_t, B, H)
137283b3815dSSong Gao VSSRAN(vssran_h_w, 32, uint32_t, H, W)
137383b3815dSSong Gao VSSRAN(vssran_w_d, 64, uint64_t, W, D)
137483b3815dSSong Gao 
137583b3815dSSong Gao #define SSRLNU(E, T1, T2, T3)                    \
137683b3815dSSong Gao static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
137783b3815dSSong Gao {                                                \
137883b3815dSSong Gao         T1 shft_res;                             \
137983b3815dSSong Gao         if (sa == 0) {                           \
138083b3815dSSong Gao             shft_res = e2;                       \
138183b3815dSSong Gao         } else {                                 \
138283b3815dSSong Gao             shft_res = (((T1)e2) >> sa);         \
138383b3815dSSong Gao         }                                        \
138483b3815dSSong Gao         T2 mask;                                 \
138583b3815dSSong Gao         mask = (1ull << sh) -1;                  \
138683b3815dSSong Gao         if (shft_res > mask) {                   \
138783b3815dSSong Gao             return mask;                         \
138883b3815dSSong Gao         } else {                                 \
138983b3815dSSong Gao             return shft_res;                     \
139083b3815dSSong Gao         }                                        \
139183b3815dSSong Gao }
139283b3815dSSong Gao 
139383b3815dSSong Gao SSRLNU(B, uint16_t, uint8_t,  int16_t)
139483b3815dSSong Gao SSRLNU(H, uint32_t, uint16_t, int32_t)
139583b3815dSSong Gao SSRLNU(W, uint64_t, uint32_t, int64_t)
139683b3815dSSong Gao 
139783b3815dSSong Gao #define VSSRLNU(NAME, BIT, T, E1, E2)                                     \
139804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)            \
139983b3815dSSong Gao {                                                                         \
140083b3815dSSong Gao     int i;                                                                \
140104711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                \
140204711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                \
140304711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                \
140483b3815dSSong Gao                                                                           \
140583b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                   \
140683b3815dSSong Gao         Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
140783b3815dSSong Gao     }                                                                     \
140883b3815dSSong Gao     Vd->D(1) = 0;                                                         \
140983b3815dSSong Gao }
141083b3815dSSong Gao 
141183b3815dSSong Gao VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
141283b3815dSSong Gao VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
141383b3815dSSong Gao VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
141483b3815dSSong Gao 
141583b3815dSSong Gao #define SSRANU(E, T1, T2, T3)                    \
141683b3815dSSong Gao static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
141783b3815dSSong Gao {                                                \
141883b3815dSSong Gao         T1 shft_res;                             \
141983b3815dSSong Gao         if (sa == 0) {                           \
142083b3815dSSong Gao             shft_res = e2;                       \
142183b3815dSSong Gao         } else {                                 \
142283b3815dSSong Gao             shft_res = e2 >> sa;                 \
142383b3815dSSong Gao         }                                        \
142483b3815dSSong Gao         if (e2 < 0) {                            \
142583b3815dSSong Gao             shft_res = 0;                        \
142683b3815dSSong Gao         }                                        \
142783b3815dSSong Gao         T2 mask;                                 \
142883b3815dSSong Gao         mask = (1ull << sh) -1;                  \
142983b3815dSSong Gao         if (shft_res > mask) {                   \
143083b3815dSSong Gao             return mask;                         \
143183b3815dSSong Gao         } else {                                 \
143283b3815dSSong Gao             return shft_res;                     \
143383b3815dSSong Gao         }                                        \
143483b3815dSSong Gao }
143583b3815dSSong Gao 
143683b3815dSSong Gao SSRANU(B, uint16_t, uint8_t,  int16_t)
143783b3815dSSong Gao SSRANU(H, uint32_t, uint16_t, int32_t)
143883b3815dSSong Gao SSRANU(W, uint64_t, uint32_t, int64_t)
143983b3815dSSong Gao 
144083b3815dSSong Gao #define VSSRANU(NAME, BIT, T, E1, E2)                                     \
144104711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)            \
144283b3815dSSong Gao {                                                                         \
144383b3815dSSong Gao     int i;                                                                \
144404711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                \
144504711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                \
144604711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                \
144783b3815dSSong Gao                                                                           \
144883b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                   \
144983b3815dSSong Gao         Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
145083b3815dSSong Gao     }                                                                     \
145183b3815dSSong Gao     Vd->D(1) = 0;                                                         \
145283b3815dSSong Gao }
145383b3815dSSong Gao 
145483b3815dSSong Gao VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
145583b3815dSSong Gao VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
145683b3815dSSong Gao VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
145783b3815dSSong Gao 
145883b3815dSSong Gao #define VSSRLNI(NAME, BIT, E1, E2)                                            \
1459329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)            \
146083b3815dSSong Gao {                                                                             \
146183b3815dSSong Gao     int i;                                                                    \
146283b3815dSSong Gao     VReg temp;                                                                \
1463329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                    \
1464329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                    \
146583b3815dSSong Gao                                                                               \
146683b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
146783b3815dSSong Gao         temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1);              \
146883b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
146983b3815dSSong Gao     }                                                                         \
147083b3815dSSong Gao     *Vd = temp;                                                               \
147183b3815dSSong Gao }
147283b3815dSSong Gao 
1473329517d5SSong Gao void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
147483b3815dSSong Gao {
147583b3815dSSong Gao     Int128 shft_res1, shft_res2, mask;
1476329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1477329517d5SSong Gao     VReg *Vj = (VReg *)vj;
147883b3815dSSong Gao 
147983b3815dSSong Gao     if (imm == 0) {
148083b3815dSSong Gao         shft_res1 = Vj->Q(0);
148183b3815dSSong Gao         shft_res2 = Vd->Q(0);
148283b3815dSSong Gao     } else {
148383b3815dSSong Gao         shft_res1 = int128_urshift(Vj->Q(0), imm);
148483b3815dSSong Gao         shft_res2 = int128_urshift(Vd->Q(0), imm);
148583b3815dSSong Gao     }
148683b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
148783b3815dSSong Gao 
148883b3815dSSong Gao     if (int128_ult(mask, shft_res1)) {
148983b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
149083b3815dSSong Gao     }else {
149183b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
149283b3815dSSong Gao     }
149383b3815dSSong Gao 
149483b3815dSSong Gao     if (int128_ult(mask, shft_res2)) {
149583b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
149683b3815dSSong Gao     }else {
149783b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
149883b3815dSSong Gao     }
149983b3815dSSong Gao }
150083b3815dSSong Gao 
150183b3815dSSong Gao VSSRLNI(vssrlni_b_h, 16, B, H)
150283b3815dSSong Gao VSSRLNI(vssrlni_h_w, 32, H, W)
150383b3815dSSong Gao VSSRLNI(vssrlni_w_d, 64, W, D)
150483b3815dSSong Gao 
150583b3815dSSong Gao #define VSSRANI(NAME, BIT, E1, E2)                                             \
1506329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)             \
150783b3815dSSong Gao {                                                                              \
150883b3815dSSong Gao     int i;                                                                     \
150983b3815dSSong Gao     VReg temp;                                                                 \
1510329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                     \
1511329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                     \
151283b3815dSSong Gao                                                                                \
151383b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                        \
151483b3815dSSong Gao         temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1);               \
151583b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
151683b3815dSSong Gao     }                                                                          \
151783b3815dSSong Gao     *Vd = temp;                                                                \
151883b3815dSSong Gao }
151983b3815dSSong Gao 
1520329517d5SSong Gao void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
152183b3815dSSong Gao {
152283b3815dSSong Gao     Int128 shft_res1, shft_res2, mask, min;
1523329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1524329517d5SSong Gao     VReg *Vj = (VReg *)vj;
152583b3815dSSong Gao 
152683b3815dSSong Gao     if (imm == 0) {
152783b3815dSSong Gao         shft_res1 = Vj->Q(0);
152883b3815dSSong Gao         shft_res2 = Vd->Q(0);
152983b3815dSSong Gao     } else {
153083b3815dSSong Gao         shft_res1 = int128_rshift(Vj->Q(0), imm);
153183b3815dSSong Gao         shft_res2 = int128_rshift(Vd->Q(0), imm);
153283b3815dSSong Gao     }
153383b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
153483b3815dSSong Gao     min  = int128_lshift(int128_one(), 63);
153583b3815dSSong Gao 
153683b3815dSSong Gao     if (int128_gt(shft_res1,  mask)) {
153783b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
153883b3815dSSong Gao     } else if (int128_lt(shft_res1, int128_neg(min))) {
153983b3815dSSong Gao         Vd->D(0) = int128_getlo(min);
154083b3815dSSong Gao     } else {
154183b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
154283b3815dSSong Gao     }
154383b3815dSSong Gao 
154483b3815dSSong Gao     if (int128_gt(shft_res2, mask)) {
154583b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
154683b3815dSSong Gao     } else if (int128_lt(shft_res2, int128_neg(min))) {
154783b3815dSSong Gao         Vd->D(1) = int128_getlo(min);
154883b3815dSSong Gao     } else {
154983b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
155083b3815dSSong Gao     }
155183b3815dSSong Gao }
155283b3815dSSong Gao 
155383b3815dSSong Gao VSSRANI(vssrani_b_h, 16, B, H)
155483b3815dSSong Gao VSSRANI(vssrani_h_w, 32, H, W)
155583b3815dSSong Gao VSSRANI(vssrani_w_d, 64, W, D)
155683b3815dSSong Gao 
155783b3815dSSong Gao #define VSSRLNUI(NAME, BIT, E1, E2)                                         \
1558329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)          \
155983b3815dSSong Gao {                                                                           \
156083b3815dSSong Gao     int i;                                                                  \
156183b3815dSSong Gao     VReg temp;                                                              \
1562329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                  \
1563329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                  \
156483b3815dSSong Gao                                                                             \
156583b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
156683b3815dSSong Gao         temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
156783b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
156883b3815dSSong Gao     }                                                                       \
156983b3815dSSong Gao     *Vd = temp;                                                             \
157083b3815dSSong Gao }
157183b3815dSSong Gao 
1572329517d5SSong Gao void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
157383b3815dSSong Gao {
157483b3815dSSong Gao     Int128 shft_res1, shft_res2, mask;
1575329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1576329517d5SSong Gao     VReg *Vj = (VReg *)vj;
157783b3815dSSong Gao 
157883b3815dSSong Gao     if (imm == 0) {
157983b3815dSSong Gao         shft_res1 = Vj->Q(0);
158083b3815dSSong Gao         shft_res2 = Vd->Q(0);
158183b3815dSSong Gao     } else {
158283b3815dSSong Gao         shft_res1 = int128_urshift(Vj->Q(0), imm);
158383b3815dSSong Gao         shft_res2 = int128_urshift(Vd->Q(0), imm);
158483b3815dSSong Gao     }
158583b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
158683b3815dSSong Gao 
158783b3815dSSong Gao     if (int128_ult(mask, shft_res1)) {
158883b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
158983b3815dSSong Gao     }else {
159083b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
159183b3815dSSong Gao     }
159283b3815dSSong Gao 
159383b3815dSSong Gao     if (int128_ult(mask, shft_res2)) {
159483b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
159583b3815dSSong Gao     }else {
159683b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
159783b3815dSSong Gao     }
159883b3815dSSong Gao }
159983b3815dSSong Gao 
160083b3815dSSong Gao VSSRLNUI(vssrlni_bu_h, 16, B, H)
160183b3815dSSong Gao VSSRLNUI(vssrlni_hu_w, 32, H, W)
160283b3815dSSong Gao VSSRLNUI(vssrlni_wu_d, 64, W, D)
160383b3815dSSong Gao 
160483b3815dSSong Gao #define VSSRANUI(NAME, BIT, E1, E2)                                         \
1605329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)          \
160683b3815dSSong Gao {                                                                           \
160783b3815dSSong Gao     int i;                                                                  \
160883b3815dSSong Gao     VReg temp;                                                              \
1609329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                  \
1610329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                  \
161183b3815dSSong Gao                                                                             \
161283b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
161383b3815dSSong Gao         temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2);               \
161483b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
161583b3815dSSong Gao     }                                                                       \
161683b3815dSSong Gao     *Vd = temp;                                                             \
161783b3815dSSong Gao }
161883b3815dSSong Gao 
1619329517d5SSong Gao void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
162083b3815dSSong Gao {
162183b3815dSSong Gao     Int128 shft_res1, shft_res2, mask;
1622329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1623329517d5SSong Gao     VReg *Vj = (VReg *)vj;
162483b3815dSSong Gao 
162583b3815dSSong Gao     if (imm == 0) {
162683b3815dSSong Gao         shft_res1 = Vj->Q(0);
162783b3815dSSong Gao         shft_res2 = Vd->Q(0);
162883b3815dSSong Gao     } else {
162983b3815dSSong Gao         shft_res1 = int128_rshift(Vj->Q(0), imm);
163083b3815dSSong Gao         shft_res2 = int128_rshift(Vd->Q(0), imm);
163183b3815dSSong Gao     }
163283b3815dSSong Gao 
163383b3815dSSong Gao     if (int128_lt(Vj->Q(0), int128_zero())) {
163483b3815dSSong Gao         shft_res1 = int128_zero();
163583b3815dSSong Gao     }
163683b3815dSSong Gao 
163783b3815dSSong Gao     if (int128_lt(Vd->Q(0), int128_zero())) {
163883b3815dSSong Gao         shft_res2 = int128_zero();
163983b3815dSSong Gao     }
164083b3815dSSong Gao 
164183b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
164283b3815dSSong Gao 
164383b3815dSSong Gao     if (int128_ult(mask, shft_res1)) {
164483b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
164583b3815dSSong Gao     }else {
164683b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
164783b3815dSSong Gao     }
164883b3815dSSong Gao 
164983b3815dSSong Gao     if (int128_ult(mask, shft_res2)) {
165083b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
165183b3815dSSong Gao     }else {
165283b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
165383b3815dSSong Gao     }
165483b3815dSSong Gao }
165583b3815dSSong Gao 
165683b3815dSSong Gao VSSRANUI(vssrani_bu_h, 16, B, H)
165783b3815dSSong Gao VSSRANUI(vssrani_hu_w, 32, H, W)
165883b3815dSSong Gao VSSRANUI(vssrani_wu_d, 64, W, D)
1659162cd32cSSong Gao 
1660162cd32cSSong Gao #define SSRLRNS(E1, E2, T1, T2, T3)                \
1661162cd32cSSong Gao static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
1662162cd32cSSong Gao {                                                  \
1663162cd32cSSong Gao     T1 shft_res;                                   \
1664162cd32cSSong Gao                                                    \
1665162cd32cSSong Gao     shft_res = do_vsrlr_ ## E2(e2, sa);            \
1666162cd32cSSong Gao     T1 mask;                                       \
1667162cd32cSSong Gao     mask = (1ull << sh) -1;                        \
1668162cd32cSSong Gao     if (shft_res > mask) {                         \
1669162cd32cSSong Gao         return mask;                               \
1670162cd32cSSong Gao     } else {                                       \
1671162cd32cSSong Gao         return  shft_res;                          \
1672162cd32cSSong Gao     }                                              \
1673162cd32cSSong Gao }
1674162cd32cSSong Gao 
1675162cd32cSSong Gao SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
1676162cd32cSSong Gao SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
1677162cd32cSSong Gao SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
1678162cd32cSSong Gao 
1679162cd32cSSong Gao #define VSSRLRN(NAME, BIT, T, E1, E2)                                         \
168004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                \
1681162cd32cSSong Gao {                                                                             \
1682162cd32cSSong Gao     int i;                                                                    \
168304711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                    \
168404711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                    \
168504711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                    \
1686162cd32cSSong Gao                                                                               \
1687162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
1688162cd32cSSong Gao         Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
1689162cd32cSSong Gao     }                                                                         \
1690162cd32cSSong Gao     Vd->D(1) = 0;                                                             \
1691162cd32cSSong Gao }
1692162cd32cSSong Gao 
1693162cd32cSSong Gao VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H)
1694162cd32cSSong Gao VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W)
1695162cd32cSSong Gao VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D)
1696162cd32cSSong Gao 
1697162cd32cSSong Gao #define SSRARNS(E1, E2, T1, T2)                    \
1698162cd32cSSong Gao static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
1699162cd32cSSong Gao {                                                  \
1700162cd32cSSong Gao     T1 shft_res;                                   \
1701162cd32cSSong Gao                                                    \
1702162cd32cSSong Gao     shft_res = do_vsrar_ ## E2(e2, sa);            \
1703162cd32cSSong Gao     T2 mask;                                       \
1704162cd32cSSong Gao     mask = (1ll << sh) -1;                         \
1705162cd32cSSong Gao     if (shft_res > mask) {                         \
1706162cd32cSSong Gao         return  mask;                              \
1707162cd32cSSong Gao     } else if (shft_res < -(mask +1)) {            \
1708162cd32cSSong Gao         return  ~mask;                             \
1709162cd32cSSong Gao     } else {                                       \
1710162cd32cSSong Gao         return shft_res;                           \
1711162cd32cSSong Gao     }                                              \
1712162cd32cSSong Gao }
1713162cd32cSSong Gao 
1714162cd32cSSong Gao SSRARNS(B, H, int16_t, int8_t)
1715162cd32cSSong Gao SSRARNS(H, W, int32_t, int16_t)
1716162cd32cSSong Gao SSRARNS(W, D, int64_t, int32_t)
1717162cd32cSSong Gao 
1718162cd32cSSong Gao #define VSSRARN(NAME, BIT, T, E1, E2)                                         \
171904711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)                \
1720162cd32cSSong Gao {                                                                             \
1721162cd32cSSong Gao     int i;                                                                    \
172204711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                    \
172304711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                    \
172404711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                    \
1725162cd32cSSong Gao                                                                               \
1726162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
1727162cd32cSSong Gao         Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
1728162cd32cSSong Gao     }                                                                         \
1729162cd32cSSong Gao     Vd->D(1) = 0;                                                             \
1730162cd32cSSong Gao }
1731162cd32cSSong Gao 
1732162cd32cSSong Gao VSSRARN(vssrarn_b_h, 16, uint16_t, B, H)
1733162cd32cSSong Gao VSSRARN(vssrarn_h_w, 32, uint32_t, H, W)
1734162cd32cSSong Gao VSSRARN(vssrarn_w_d, 64, uint64_t, W, D)
1735162cd32cSSong Gao 
1736162cd32cSSong Gao #define SSRLRNU(E1, E2, T1, T2, T3)                \
1737162cd32cSSong Gao static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
1738162cd32cSSong Gao {                                                  \
1739162cd32cSSong Gao     T1 shft_res;                                   \
1740162cd32cSSong Gao                                                    \
1741162cd32cSSong Gao     shft_res = do_vsrlr_ ## E2(e2, sa);            \
1742162cd32cSSong Gao                                                    \
1743162cd32cSSong Gao     T2 mask;                                       \
1744162cd32cSSong Gao     mask = (1ull << sh) -1;                        \
1745162cd32cSSong Gao     if (shft_res > mask) {                         \
1746162cd32cSSong Gao         return mask;                               \
1747162cd32cSSong Gao     } else {                                       \
1748162cd32cSSong Gao         return shft_res;                           \
1749162cd32cSSong Gao     }                                              \
1750162cd32cSSong Gao }
1751162cd32cSSong Gao 
1752162cd32cSSong Gao SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
1753162cd32cSSong Gao SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
1754162cd32cSSong Gao SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
1755162cd32cSSong Gao 
1756162cd32cSSong Gao #define VSSRLRNU(NAME, BIT, T, E1, E2)                                     \
175704711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)             \
1758162cd32cSSong Gao {                                                                          \
1759162cd32cSSong Gao     int i;                                                                 \
176004711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                 \
176104711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                 \
176204711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                 \
1763162cd32cSSong Gao                                                                            \
1764162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                    \
1765162cd32cSSong Gao         Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
1766162cd32cSSong Gao     }                                                                      \
1767162cd32cSSong Gao     Vd->D(1) = 0;                                                          \
1768162cd32cSSong Gao }
1769162cd32cSSong Gao 
1770162cd32cSSong Gao VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H)
1771162cd32cSSong Gao VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W)
1772162cd32cSSong Gao VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D)
1773162cd32cSSong Gao 
1774162cd32cSSong Gao #define SSRARNU(E1, E2, T1, T2, T3)                \
1775162cd32cSSong Gao static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
1776162cd32cSSong Gao {                                                  \
1777162cd32cSSong Gao     T1 shft_res;                                   \
1778162cd32cSSong Gao                                                    \
1779162cd32cSSong Gao     if (e2 < 0) {                                  \
1780162cd32cSSong Gao         shft_res = 0;                              \
1781162cd32cSSong Gao     } else {                                       \
1782162cd32cSSong Gao         shft_res = do_vsrar_ ## E2(e2, sa);        \
1783162cd32cSSong Gao     }                                              \
1784162cd32cSSong Gao     T2 mask;                                       \
1785162cd32cSSong Gao     mask = (1ull << sh) -1;                        \
1786162cd32cSSong Gao     if (shft_res > mask) {                         \
1787162cd32cSSong Gao         return mask;                               \
1788162cd32cSSong Gao     } else {                                       \
1789162cd32cSSong Gao         return shft_res;                           \
1790162cd32cSSong Gao     }                                              \
1791162cd32cSSong Gao }
1792162cd32cSSong Gao 
1793162cd32cSSong Gao SSRARNU(B, H, uint16_t, uint8_t, int16_t)
1794162cd32cSSong Gao SSRARNU(H, W, uint32_t, uint16_t, int32_t)
1795162cd32cSSong Gao SSRARNU(W, D, uint64_t, uint32_t, int64_t)
1796162cd32cSSong Gao 
1797162cd32cSSong Gao #define VSSRARNU(NAME, BIT, T, E1, E2)                                     \
179804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc)             \
1799162cd32cSSong Gao {                                                                          \
1800162cd32cSSong Gao     int i;                                                                 \
180104711da1SSong Gao     VReg *Vd = (VReg *)vd;                                                 \
180204711da1SSong Gao     VReg *Vj = (VReg *)vj;                                                 \
180304711da1SSong Gao     VReg *Vk = (VReg *)vk;                                                 \
1804162cd32cSSong Gao                                                                            \
1805162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                    \
1806162cd32cSSong Gao         Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
1807162cd32cSSong Gao     }                                                                      \
1808162cd32cSSong Gao     Vd->D(1) = 0;                                                          \
1809162cd32cSSong Gao }
1810162cd32cSSong Gao 
1811162cd32cSSong Gao VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H)
1812162cd32cSSong Gao VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
1813162cd32cSSong Gao VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
1814162cd32cSSong Gao 
1815162cd32cSSong Gao #define VSSRLRNI(NAME, BIT, E1, E2)                                            \
1816329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)             \
1817162cd32cSSong Gao {                                                                              \
1818162cd32cSSong Gao     int i;                                                                     \
1819162cd32cSSong Gao     VReg temp;                                                                 \
1820329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                     \
1821329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                     \
1822162cd32cSSong Gao                                                                                \
1823162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                        \
1824162cd32cSSong Gao         temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1);              \
1825162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
1826162cd32cSSong Gao     }                                                                          \
1827162cd32cSSong Gao     *Vd = temp;                                                                \
1828162cd32cSSong Gao }
1829162cd32cSSong Gao 
1830162cd32cSSong Gao #define VSSRLRNI_Q(NAME, sh)                                               \
1831329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)         \
1832162cd32cSSong Gao {                                                                          \
1833162cd32cSSong Gao     Int128 shft_res1, shft_res2, mask, r1, r2;                             \
1834329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                 \
1835329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                 \
1836162cd32cSSong Gao                                                                            \
1837162cd32cSSong Gao     if (imm == 0) {                                                        \
1838162cd32cSSong Gao         shft_res1 = Vj->Q(0);                                              \
1839162cd32cSSong Gao         shft_res2 = Vd->Q(0);                                              \
1840162cd32cSSong Gao     } else {                                                               \
1841162cd32cSSong Gao         r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \
1842162cd32cSSong Gao         r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \
1843162cd32cSSong Gao                                                                            \
1844162cd32cSSong Gao         shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1));       \
1845162cd32cSSong Gao         shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2));       \
1846162cd32cSSong Gao     }                                                                      \
1847162cd32cSSong Gao                                                                            \
1848162cd32cSSong Gao     mask = int128_sub(int128_lshift(int128_one(), sh), int128_one());      \
1849162cd32cSSong Gao                                                                            \
1850162cd32cSSong Gao     if (int128_ult(mask, shft_res1)) {                                     \
1851162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask);                                     \
1852162cd32cSSong Gao     }else {                                                                \
1853162cd32cSSong Gao         Vd->D(0) = int128_getlo(shft_res1);                                \
1854162cd32cSSong Gao     }                                                                      \
1855162cd32cSSong Gao                                                                            \
1856162cd32cSSong Gao     if (int128_ult(mask, shft_res2)) {                                     \
1857162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask);                                     \
1858162cd32cSSong Gao     }else {                                                                \
1859162cd32cSSong Gao         Vd->D(1) = int128_getlo(shft_res2);                                \
1860162cd32cSSong Gao     }                                                                      \
1861162cd32cSSong Gao }
1862162cd32cSSong Gao 
1863162cd32cSSong Gao VSSRLRNI(vssrlrni_b_h, 16, B, H)
1864162cd32cSSong Gao VSSRLRNI(vssrlrni_h_w, 32, H, W)
1865162cd32cSSong Gao VSSRLRNI(vssrlrni_w_d, 64, W, D)
1866162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_d_q, 63)
1867162cd32cSSong Gao 
1868162cd32cSSong Gao #define VSSRARNI(NAME, BIT, E1, E2)                                             \
1869329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)              \
1870162cd32cSSong Gao {                                                                               \
1871162cd32cSSong Gao     int i;                                                                      \
1872162cd32cSSong Gao     VReg temp;                                                                  \
1873329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                      \
1874329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                      \
1875162cd32cSSong Gao                                                                                 \
1876162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                         \
1877162cd32cSSong Gao         temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1);               \
1878162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
1879162cd32cSSong Gao     }                                                                           \
1880162cd32cSSong Gao     *Vd = temp;                                                                 \
1881162cd32cSSong Gao }
1882162cd32cSSong Gao 
1883329517d5SSong Gao void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
1884162cd32cSSong Gao {
1885162cd32cSSong Gao     Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
1886329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1887329517d5SSong Gao     VReg *Vj = (VReg *)vj;
1888162cd32cSSong Gao 
1889162cd32cSSong Gao     if (imm == 0) {
1890162cd32cSSong Gao         shft_res1 = Vj->Q(0);
1891162cd32cSSong Gao         shft_res2 = Vd->Q(0);
1892162cd32cSSong Gao     } else {
1893162cd32cSSong Gao         r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
1894162cd32cSSong Gao         r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
1895162cd32cSSong Gao 
1896162cd32cSSong Gao         shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
1897162cd32cSSong Gao         shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
1898162cd32cSSong Gao     }
1899162cd32cSSong Gao 
1900162cd32cSSong Gao     mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
1901162cd32cSSong Gao     mask2  = int128_lshift(int128_one(), 63);
1902162cd32cSSong Gao 
1903162cd32cSSong Gao     if (int128_gt(shft_res1,  mask1)) {
1904162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask1);
1905162cd32cSSong Gao     } else if (int128_lt(shft_res1, int128_neg(mask2))) {
1906162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask2);
1907162cd32cSSong Gao     } else {
1908162cd32cSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
1909162cd32cSSong Gao     }
1910162cd32cSSong Gao 
1911162cd32cSSong Gao     if (int128_gt(shft_res2, mask1)) {
1912162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask1);
1913162cd32cSSong Gao     } else if (int128_lt(shft_res2, int128_neg(mask2))) {
1914162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask2);
1915162cd32cSSong Gao     } else {
1916162cd32cSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
1917162cd32cSSong Gao     }
1918162cd32cSSong Gao }
1919162cd32cSSong Gao 
1920162cd32cSSong Gao VSSRARNI(vssrarni_b_h, 16, B, H)
1921162cd32cSSong Gao VSSRARNI(vssrarni_h_w, 32, H, W)
1922162cd32cSSong Gao VSSRARNI(vssrarni_w_d, 64, W, D)
1923162cd32cSSong Gao 
1924162cd32cSSong Gao #define VSSRLRNUI(NAME, BIT, E1, E2)                                         \
1925329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)           \
1926162cd32cSSong Gao {                                                                            \
1927162cd32cSSong Gao     int i;                                                                   \
1928162cd32cSSong Gao     VReg temp;                                                               \
1929329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                   \
1930329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                   \
1931162cd32cSSong Gao                                                                              \
1932162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
1933162cd32cSSong Gao         temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
1934162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \
1935162cd32cSSong Gao     }                                                                        \
1936162cd32cSSong Gao     *Vd = temp;                                                              \
1937162cd32cSSong Gao }
1938162cd32cSSong Gao 
1939162cd32cSSong Gao VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
1940162cd32cSSong Gao VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
1941162cd32cSSong Gao VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
1942162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_du_q, 64)
1943162cd32cSSong Gao 
1944162cd32cSSong Gao #define VSSRARNUI(NAME, BIT, E1, E2)                                         \
1945329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc)           \
1946162cd32cSSong Gao {                                                                            \
1947162cd32cSSong Gao     int i;                                                                   \
1948162cd32cSSong Gao     VReg temp;                                                               \
1949329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                                   \
1950329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                                   \
1951162cd32cSSong Gao                                                                              \
1952162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
1953162cd32cSSong Gao         temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
1954162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \
1955162cd32cSSong Gao     }                                                                        \
1956162cd32cSSong Gao     *Vd = temp;                                                              \
1957162cd32cSSong Gao }
1958162cd32cSSong Gao 
1959329517d5SSong Gao void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
1960162cd32cSSong Gao {
1961162cd32cSSong Gao     Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
1962329517d5SSong Gao     VReg *Vd = (VReg *)vd;
1963329517d5SSong Gao     VReg *Vj = (VReg *)vj;
1964162cd32cSSong Gao 
1965162cd32cSSong Gao     if (imm == 0) {
1966162cd32cSSong Gao         shft_res1 = Vj->Q(0);
1967162cd32cSSong Gao         shft_res2 = Vd->Q(0);
1968162cd32cSSong Gao     } else {
1969162cd32cSSong Gao         r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
1970162cd32cSSong Gao         r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
1971162cd32cSSong Gao 
1972162cd32cSSong Gao         shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
1973162cd32cSSong Gao         shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
1974162cd32cSSong Gao     }
1975162cd32cSSong Gao 
1976162cd32cSSong Gao     if (int128_lt(Vj->Q(0), int128_zero())) {
1977162cd32cSSong Gao         shft_res1 = int128_zero();
1978162cd32cSSong Gao     }
1979162cd32cSSong Gao     if (int128_lt(Vd->Q(0), int128_zero())) {
1980162cd32cSSong Gao         shft_res2 = int128_zero();
1981162cd32cSSong Gao     }
1982162cd32cSSong Gao 
1983162cd32cSSong Gao     mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
1984162cd32cSSong Gao     mask2  = int128_lshift(int128_one(), 64);
1985162cd32cSSong Gao 
1986162cd32cSSong Gao     if (int128_gt(shft_res1,  mask1)) {
1987162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask1);
1988162cd32cSSong Gao     } else if (int128_lt(shft_res1, int128_neg(mask2))) {
1989162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask2);
1990162cd32cSSong Gao     } else {
1991162cd32cSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
1992162cd32cSSong Gao     }
1993162cd32cSSong Gao 
1994162cd32cSSong Gao     if (int128_gt(shft_res2, mask1)) {
1995162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask1);
1996162cd32cSSong Gao     } else if (int128_lt(shft_res2, int128_neg(mask2))) {
1997162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask2);
1998162cd32cSSong Gao     } else {
1999162cd32cSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
2000162cd32cSSong Gao     }
2001162cd32cSSong Gao }
2002162cd32cSSong Gao 
2003162cd32cSSong Gao VSSRARNUI(vssrarni_bu_h, 16, B, H)
2004162cd32cSSong Gao VSSRARNUI(vssrarni_hu_w, 32, H, W)
2005162cd32cSSong Gao VSSRARNUI(vssrarni_wu_d, 64, W, D)
20062e105e12SSong Gao 
20072e105e12SSong Gao #define DO_2OP(NAME, BIT, E, DO_OP)                  \
2008ff27e335SSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
20092e105e12SSong Gao {                                                    \
20102e105e12SSong Gao     int i;                                           \
2011ff27e335SSong Gao     VReg *Vd = (VReg *)vd;                           \
2012ff27e335SSong Gao     VReg *Vj = (VReg *)vj;                           \
20132e105e12SSong Gao                                                      \
20142e105e12SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++)                \
20152e105e12SSong Gao     {                                                \
20162e105e12SSong Gao         Vd->E(i) = DO_OP(Vj->E(i));                  \
20172e105e12SSong Gao     }                                                \
20182e105e12SSong Gao }
20192e105e12SSong Gao 
20202e105e12SSong Gao #define DO_CLO_B(N)  (clz32(~N & 0xff) - 24)
20212e105e12SSong Gao #define DO_CLO_H(N)  (clz32(~N & 0xffff) - 16)
20222e105e12SSong Gao #define DO_CLO_W(N)  (clz32(~N))
20232e105e12SSong Gao #define DO_CLO_D(N)  (clz64(~N))
20242e105e12SSong Gao #define DO_CLZ_B(N)  (clz32(N) - 24)
20252e105e12SSong Gao #define DO_CLZ_H(N)  (clz32(N) - 16)
20262e105e12SSong Gao #define DO_CLZ_W(N)  (clz32(N))
20272e105e12SSong Gao #define DO_CLZ_D(N)  (clz64(N))
20282e105e12SSong Gao 
20292e105e12SSong Gao DO_2OP(vclo_b, 8, UB, DO_CLO_B)
20302e105e12SSong Gao DO_2OP(vclo_h, 16, UH, DO_CLO_H)
20312e105e12SSong Gao DO_2OP(vclo_w, 32, UW, DO_CLO_W)
20322e105e12SSong Gao DO_2OP(vclo_d, 64, UD, DO_CLO_D)
20332e105e12SSong Gao DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
20342e105e12SSong Gao DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
20352e105e12SSong Gao DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
20362e105e12SSong Gao DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
2037bb22ee57SSong Gao 
2038bb22ee57SSong Gao #define VPCNT(NAME, BIT, E, FN)                      \
2039ff27e335SSong Gao void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
2040bb22ee57SSong Gao {                                                    \
2041bb22ee57SSong Gao     int i;                                           \
2042ff27e335SSong Gao     VReg *Vd = (VReg *)vd;                           \
2043ff27e335SSong Gao     VReg *Vj = (VReg *)vj;                           \
2044bb22ee57SSong Gao                                                      \
2045bb22ee57SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++)                \
2046bb22ee57SSong Gao     {                                                \
2047bb22ee57SSong Gao         Vd->E(i) = FN(Vj->E(i));                     \
2048bb22ee57SSong Gao     }                                                \
2049bb22ee57SSong Gao }
2050bb22ee57SSong Gao 
2051bb22ee57SSong Gao VPCNT(vpcnt_b, 8, UB, ctpop8)
2052bb22ee57SSong Gao VPCNT(vpcnt_h, 16, UH, ctpop16)
2053bb22ee57SSong Gao VPCNT(vpcnt_w, 32, UW, ctpop32)
2054bb22ee57SSong Gao VPCNT(vpcnt_d, 64, UD, ctpop64)
20550b1e6705SSong Gao 
20560b1e6705SSong Gao #define DO_BITCLR(a, bit) (a & ~(1ull << bit))
20570b1e6705SSong Gao #define DO_BITSET(a, bit) (a | 1ull << bit)
20580b1e6705SSong Gao #define DO_BITREV(a, bit) (a ^ (1ull << bit))
20590b1e6705SSong Gao 
20600b1e6705SSong Gao #define DO_BIT(NAME, BIT, E, DO_OP)                         \
20610b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
20620b1e6705SSong Gao {                                                           \
20630b1e6705SSong Gao     int i;                                                  \
20640b1e6705SSong Gao     VReg *Vd = (VReg *)vd;                                  \
20650b1e6705SSong Gao     VReg *Vj = (VReg *)vj;                                  \
20660b1e6705SSong Gao     VReg *Vk = (VReg *)vk;                                  \
20670b1e6705SSong Gao                                                             \
20680b1e6705SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
20690b1e6705SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT);           \
20700b1e6705SSong Gao     }                                                       \
20710b1e6705SSong Gao }
20720b1e6705SSong Gao 
20730b1e6705SSong Gao DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
20740b1e6705SSong Gao DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
20750b1e6705SSong Gao DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
20760b1e6705SSong Gao DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
20770b1e6705SSong Gao DO_BIT(vbitset_b, 8, UB, DO_BITSET)
20780b1e6705SSong Gao DO_BIT(vbitset_h, 16, UH, DO_BITSET)
20790b1e6705SSong Gao DO_BIT(vbitset_w, 32, UW, DO_BITSET)
20800b1e6705SSong Gao DO_BIT(vbitset_d, 64, UD, DO_BITSET)
20810b1e6705SSong Gao DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
20820b1e6705SSong Gao DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
20830b1e6705SSong Gao DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
20840b1e6705SSong Gao DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
20850b1e6705SSong Gao 
20860b1e6705SSong Gao #define DO_BITI(NAME, BIT, E, DO_OP)                            \
20870b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
20880b1e6705SSong Gao {                                                               \
20890b1e6705SSong Gao     int i;                                                      \
20900b1e6705SSong Gao     VReg *Vd = (VReg *)vd;                                      \
20910b1e6705SSong Gao     VReg *Vj = (VReg *)vj;                                      \
20920b1e6705SSong Gao                                                                 \
20930b1e6705SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
20940b1e6705SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), imm);                        \
20950b1e6705SSong Gao     }                                                           \
20960b1e6705SSong Gao }
20970b1e6705SSong Gao 
20980b1e6705SSong Gao DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
20990b1e6705SSong Gao DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
21000b1e6705SSong Gao DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
21010b1e6705SSong Gao DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
21020b1e6705SSong Gao DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
21030b1e6705SSong Gao DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
21040b1e6705SSong Gao DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
21050b1e6705SSong Gao DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
21060b1e6705SSong Gao DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
21070b1e6705SSong Gao DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
21080b1e6705SSong Gao DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
21090b1e6705SSong Gao DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
2110ac95a0b9SSong Gao 
2111ac95a0b9SSong Gao #define VFRSTP(NAME, BIT, MASK, E)                             \
211204711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2113ac95a0b9SSong Gao {                                                              \
2114ac95a0b9SSong Gao     int i, m;                                                  \
211504711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
211604711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
211704711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2118ac95a0b9SSong Gao                                                                \
2119ac95a0b9SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2120ac95a0b9SSong Gao         if (Vj->E(i) < 0) {                                    \
2121ac95a0b9SSong Gao             break;                                             \
2122ac95a0b9SSong Gao         }                                                      \
2123ac95a0b9SSong Gao     }                                                          \
2124ac95a0b9SSong Gao     m = Vk->E(0) & MASK;                                       \
2125ac95a0b9SSong Gao     Vd->E(m) = i;                                              \
2126ac95a0b9SSong Gao }
2127ac95a0b9SSong Gao 
2128ac95a0b9SSong Gao VFRSTP(vfrstp_b, 8, 0xf, B)
2129ac95a0b9SSong Gao VFRSTP(vfrstp_h, 16, 0x7, H)
2130ac95a0b9SSong Gao 
2131ac95a0b9SSong Gao #define VFRSTPI(NAME, BIT, E)                                      \
2132329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
2133ac95a0b9SSong Gao {                                                                  \
2134ac95a0b9SSong Gao     int i, m;                                                      \
2135329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
2136329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
2137ac95a0b9SSong Gao                                                                    \
2138ac95a0b9SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                            \
2139ac95a0b9SSong Gao         if (Vj->E(i) < 0) {                                        \
2140ac95a0b9SSong Gao             break;                                                 \
2141ac95a0b9SSong Gao         }                                                          \
2142ac95a0b9SSong Gao     }                                                              \
2143ac95a0b9SSong Gao     m = imm % (LSX_LEN/BIT);                                       \
2144ac95a0b9SSong Gao     Vd->E(m) = i;                                                  \
2145ac95a0b9SSong Gao }
2146ac95a0b9SSong Gao 
2147ac95a0b9SSong Gao VFRSTPI(vfrstpi_b, 8,  B)
2148ac95a0b9SSong Gao VFRSTPI(vfrstpi_h, 16, H)
2149aca67472SSong Gao 
2150aca67472SSong Gao static void vec_update_fcsr0_mask(CPULoongArchState *env,
2151aca67472SSong Gao                                   uintptr_t pc, int mask)
2152aca67472SSong Gao {
2153aca67472SSong Gao     int flags = get_float_exception_flags(&env->fp_status);
2154aca67472SSong Gao 
2155aca67472SSong Gao     set_float_exception_flags(0, &env->fp_status);
2156aca67472SSong Gao 
2157aca67472SSong Gao     flags &= ~mask;
2158aca67472SSong Gao 
2159aca67472SSong Gao     if (flags) {
2160aca67472SSong Gao         flags = ieee_ex_to_loongarch(flags);
2161aca67472SSong Gao         UPDATE_FP_CAUSE(env->fcsr0, flags);
2162aca67472SSong Gao     }
2163aca67472SSong Gao 
2164aca67472SSong Gao     if (GET_FP_ENABLES(env->fcsr0) & flags) {
2165aca67472SSong Gao         do_raise_exception(env, EXCCODE_FPE, pc);
2166aca67472SSong Gao     } else {
2167aca67472SSong Gao         UPDATE_FP_FLAGS(env->fcsr0, flags);
2168aca67472SSong Gao     }
2169aca67472SSong Gao }
2170aca67472SSong Gao 
2171aca67472SSong Gao static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
2172aca67472SSong Gao {
2173aca67472SSong Gao     vec_update_fcsr0_mask(env, pc, 0);
2174aca67472SSong Gao }
2175aca67472SSong Gao 
2176aca67472SSong Gao static inline void vec_clear_cause(CPULoongArchState *env)
2177aca67472SSong Gao {
2178aca67472SSong Gao     SET_FP_CAUSE(env->fcsr0, 0);
2179aca67472SSong Gao }
2180aca67472SSong Gao 
2181aca67472SSong Gao #define DO_3OP_F(NAME, BIT, E, FN)                          \
21823b286753SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk,             \
21833b286753SSong Gao                   CPULoongArchState *env, uint32_t desc)    \
2184aca67472SSong Gao {                                                           \
2185aca67472SSong Gao     int i;                                                  \
21863b286753SSong Gao     VReg *Vd = (VReg *)vd;                                  \
21873b286753SSong Gao     VReg *Vj = (VReg *)vj;                                  \
21883b286753SSong Gao     VReg *Vk = (VReg *)vk;                                  \
2189aca67472SSong Gao                                                             \
2190aca67472SSong Gao     vec_clear_cause(env);                                   \
2191aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
2192aca67472SSong Gao         Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
2193aca67472SSong Gao         vec_update_fcsr0(env, GETPC());                     \
2194aca67472SSong Gao     }                                                       \
2195aca67472SSong Gao }
2196aca67472SSong Gao 
2197aca67472SSong Gao DO_3OP_F(vfadd_s, 32, UW, float32_add)
2198aca67472SSong Gao DO_3OP_F(vfadd_d, 64, UD, float64_add)
2199aca67472SSong Gao DO_3OP_F(vfsub_s, 32, UW, float32_sub)
2200aca67472SSong Gao DO_3OP_F(vfsub_d, 64, UD, float64_sub)
2201aca67472SSong Gao DO_3OP_F(vfmul_s, 32, UW, float32_mul)
2202aca67472SSong Gao DO_3OP_F(vfmul_d, 64, UD, float64_mul)
2203aca67472SSong Gao DO_3OP_F(vfdiv_s, 32, UW, float32_div)
2204aca67472SSong Gao DO_3OP_F(vfdiv_d, 64, UD, float64_div)
2205aca67472SSong Gao DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
2206aca67472SSong Gao DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
2207aca67472SSong Gao DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
2208aca67472SSong Gao DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
2209aca67472SSong Gao DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
2210aca67472SSong Gao DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
2211aca67472SSong Gao DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
2212aca67472SSong Gao DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
2213aca67472SSong Gao 
2214aca67472SSong Gao #define DO_4OP_F(NAME, BIT, E, FN, flags)                                    \
2215e2600dadSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, void *va,                    \
2216e2600dadSSong Gao                   CPULoongArchState *env, uint32_t desc)                     \
2217aca67472SSong Gao {                                                                            \
2218aca67472SSong Gao     int i;                                                                   \
2219e2600dadSSong Gao     VReg *Vd = (VReg *)vd;                                                   \
2220e2600dadSSong Gao     VReg *Vj = (VReg *)vj;                                                   \
2221e2600dadSSong Gao     VReg *Vk = (VReg *)vk;                                                   \
2222e2600dadSSong Gao     VReg *Va = (VReg *)va;                                                   \
2223aca67472SSong Gao                                                                              \
2224aca67472SSong Gao     vec_clear_cause(env);                                                    \
2225aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
2226aca67472SSong Gao         Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
2227aca67472SSong Gao         vec_update_fcsr0(env, GETPC());                                      \
2228aca67472SSong Gao     }                                                                        \
2229aca67472SSong Gao }
2230aca67472SSong Gao 
2231aca67472SSong Gao DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
2232aca67472SSong Gao DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
2233aca67472SSong Gao DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
2234aca67472SSong Gao DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
2235aca67472SSong Gao DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
2236aca67472SSong Gao DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
2237aca67472SSong Gao DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
2238aca67472SSong Gao          float_muladd_negate_c | float_muladd_negate_result)
2239aca67472SSong Gao DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
2240aca67472SSong Gao          float_muladd_negate_c | float_muladd_negate_result)
2241aca67472SSong Gao 
2242aca67472SSong Gao #define DO_2OP_F(NAME, BIT, E, FN)                       \
2243226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj,                    \
2244226bf881SSong Gao                   CPULoongArchState *env, uint32_t desc) \
2245aca67472SSong Gao {                                                        \
2246aca67472SSong Gao     int i;                                               \
2247226bf881SSong Gao     VReg *Vd = (VReg *)vd;                               \
2248226bf881SSong Gao     VReg *Vj = (VReg *)vj;                               \
2249aca67472SSong Gao                                                          \
2250aca67472SSong Gao     vec_clear_cause(env);                                \
2251aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2252aca67472SSong Gao         Vd->E(i) = FN(env, Vj->E(i));                    \
2253aca67472SSong Gao     }                                                    \
2254aca67472SSong Gao }
2255aca67472SSong Gao 
2256aca67472SSong Gao #define FLOGB(BIT, T)                                            \
2257aca67472SSong Gao static T do_flogb_## BIT(CPULoongArchState *env, T fj)           \
2258aca67472SSong Gao {                                                                \
2259aca67472SSong Gao     T fp, fd;                                                    \
2260aca67472SSong Gao     float_status *status = &env->fp_status;                      \
2261aca67472SSong Gao     FloatRoundMode old_mode = get_float_rounding_mode(status);   \
2262aca67472SSong Gao                                                                  \
2263aca67472SSong Gao     set_float_rounding_mode(float_round_down, status);           \
2264aca67472SSong Gao     fp = float ## BIT ##_log2(fj, status);                       \
2265aca67472SSong Gao     fd = float ## BIT ##_round_to_int(fp, status);               \
2266aca67472SSong Gao     set_float_rounding_mode(old_mode, status);                   \
2267aca67472SSong Gao     vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact);     \
2268aca67472SSong Gao     return fd;                                                   \
2269aca67472SSong Gao }
2270aca67472SSong Gao 
2271aca67472SSong Gao FLOGB(32, uint32_t)
2272aca67472SSong Gao FLOGB(64, uint64_t)
2273aca67472SSong Gao 
2274aca67472SSong Gao #define FCLASS(NAME, BIT, E, FN)                         \
2275226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj,                    \
2276226bf881SSong Gao                   CPULoongArchState *env, uint32_t desc) \
2277aca67472SSong Gao {                                                        \
2278aca67472SSong Gao     int i;                                               \
2279226bf881SSong Gao     VReg *Vd = (VReg *)vd;                               \
2280226bf881SSong Gao     VReg *Vj = (VReg *)vj;                               \
2281aca67472SSong Gao                                                          \
2282aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2283aca67472SSong Gao         Vd->E(i) = FN(env, Vj->E(i));                    \
2284aca67472SSong Gao     }                                                    \
2285aca67472SSong Gao }
2286aca67472SSong Gao 
2287aca67472SSong Gao FCLASS(vfclass_s, 32, UW, helper_fclass_s)
2288aca67472SSong Gao FCLASS(vfclass_d, 64, UD, helper_fclass_d)
2289aca67472SSong Gao 
2290aca67472SSong Gao #define FSQRT(BIT, T)                                  \
2291aca67472SSong Gao static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
2292aca67472SSong Gao {                                                      \
2293aca67472SSong Gao     T fd;                                              \
2294aca67472SSong Gao     fd = float ## BIT ##_sqrt(fj, &env->fp_status);    \
2295aca67472SSong Gao     vec_update_fcsr0(env, GETPC());                    \
2296aca67472SSong Gao     return fd;                                         \
2297aca67472SSong Gao }
2298aca67472SSong Gao 
2299aca67472SSong Gao FSQRT(32, uint32_t)
2300aca67472SSong Gao FSQRT(64, uint64_t)
2301aca67472SSong Gao 
2302aca67472SSong Gao #define FRECIP(BIT, T)                                                  \
2303aca67472SSong Gao static T do_frecip_## BIT(CPULoongArchState *env, T fj)                 \
2304aca67472SSong Gao {                                                                       \
2305aca67472SSong Gao     T fd;                                                               \
2306aca67472SSong Gao     fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
2307aca67472SSong Gao     vec_update_fcsr0(env, GETPC());                                     \
2308aca67472SSong Gao     return fd;                                                          \
2309aca67472SSong Gao }
2310aca67472SSong Gao 
2311aca67472SSong Gao FRECIP(32, uint32_t)
2312aca67472SSong Gao FRECIP(64, uint64_t)
2313aca67472SSong Gao 
2314aca67472SSong Gao #define FRSQRT(BIT, T)                                                  \
2315aca67472SSong Gao static T do_frsqrt_## BIT(CPULoongArchState *env, T fj)                 \
2316aca67472SSong Gao {                                                                       \
2317aca67472SSong Gao     T fd, fp;                                                           \
2318aca67472SSong Gao     fp = float ## BIT ##_sqrt(fj, &env->fp_status);                     \
2319aca67472SSong Gao     fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
2320aca67472SSong Gao     vec_update_fcsr0(env, GETPC());                                     \
2321aca67472SSong Gao     return fd;                                                          \
2322aca67472SSong Gao }
2323aca67472SSong Gao 
2324aca67472SSong Gao FRSQRT(32, uint32_t)
2325aca67472SSong Gao FRSQRT(64, uint64_t)
2326aca67472SSong Gao 
2327aca67472SSong Gao DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
2328aca67472SSong Gao DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
2329aca67472SSong Gao DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
2330aca67472SSong Gao DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
2331aca67472SSong Gao DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
2332aca67472SSong Gao DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
2333aca67472SSong Gao DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
2334aca67472SSong Gao DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
2335399665d2SSong Gao 
2336399665d2SSong Gao static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
2337399665d2SSong Gao {
2338399665d2SSong Gao     return float16_to_float32(h, true, status);
2339399665d2SSong Gao }
2340399665d2SSong Gao static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
2341399665d2SSong Gao {
2342399665d2SSong Gao     return float32_to_float64(s, status);
2343399665d2SSong Gao }
2344399665d2SSong Gao 
2345399665d2SSong Gao static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
2346399665d2SSong Gao {
2347399665d2SSong Gao     return float32_to_float16(s, true, status);
2348399665d2SSong Gao }
2349399665d2SSong Gao static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
2350399665d2SSong Gao {
2351399665d2SSong Gao     return float64_to_float32(d, status);
2352399665d2SSong Gao }
2353399665d2SSong Gao 
2354226bf881SSong Gao void HELPER(vfcvtl_s_h)(void *vd, void *vj,
2355226bf881SSong Gao                         CPULoongArchState *env, uint32_t desc)
2356399665d2SSong Gao {
2357399665d2SSong Gao     int i;
2358399665d2SSong Gao     VReg temp;
2359226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2360226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2361399665d2SSong Gao 
2362399665d2SSong Gao     vec_clear_cause(env);
2363399665d2SSong Gao     for (i = 0; i < LSX_LEN/32; i++) {
2364399665d2SSong Gao         temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status);
2365399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2366399665d2SSong Gao     }
2367399665d2SSong Gao     *Vd = temp;
2368399665d2SSong Gao }
2369399665d2SSong Gao 
2370226bf881SSong Gao void HELPER(vfcvtl_d_s)(void *vd, void *vj,
2371226bf881SSong Gao                         CPULoongArchState *env, uint32_t desc)
2372399665d2SSong Gao {
2373399665d2SSong Gao     int i;
2374399665d2SSong Gao     VReg temp;
2375226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2376226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2377399665d2SSong Gao 
2378399665d2SSong Gao     vec_clear_cause(env);
2379399665d2SSong Gao     for (i = 0; i < LSX_LEN/64; i++) {
2380399665d2SSong Gao         temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status);
2381399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2382399665d2SSong Gao     }
2383399665d2SSong Gao     *Vd = temp;
2384399665d2SSong Gao }
2385399665d2SSong Gao 
2386226bf881SSong Gao void HELPER(vfcvth_s_h)(void *vd, void *vj,
2387226bf881SSong Gao                         CPULoongArchState *env, uint32_t desc)
2388399665d2SSong Gao {
2389399665d2SSong Gao     int i;
2390399665d2SSong Gao     VReg temp;
2391226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2392226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2393399665d2SSong Gao 
2394399665d2SSong Gao     vec_clear_cause(env);
2395399665d2SSong Gao     for (i = 0; i < LSX_LEN/32; i++) {
2396399665d2SSong Gao         temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status);
2397399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2398399665d2SSong Gao     }
2399399665d2SSong Gao     *Vd = temp;
2400399665d2SSong Gao }
2401399665d2SSong Gao 
2402226bf881SSong Gao void HELPER(vfcvth_d_s)(void *vd, void *vj,
2403226bf881SSong Gao                         CPULoongArchState *env, uint32_t desc)
2404399665d2SSong Gao {
2405399665d2SSong Gao     int i;
2406399665d2SSong Gao     VReg temp;
2407226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2408226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2409399665d2SSong Gao 
2410399665d2SSong Gao     vec_clear_cause(env);
2411399665d2SSong Gao     for (i = 0; i < LSX_LEN/64; i++) {
2412399665d2SSong Gao         temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status);
2413399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2414399665d2SSong Gao     }
2415399665d2SSong Gao     *Vd = temp;
2416399665d2SSong Gao }
2417399665d2SSong Gao 
24183b286753SSong Gao void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
24193b286753SSong Gao                        CPULoongArchState *env, uint32_t desc)
2420399665d2SSong Gao {
2421399665d2SSong Gao     int i;
2422399665d2SSong Gao     VReg temp;
24233b286753SSong Gao     VReg *Vd = (VReg *)vd;
24243b286753SSong Gao     VReg *Vj = (VReg *)vj;
24253b286753SSong Gao     VReg *Vk = (VReg *)vk;
2426399665d2SSong Gao 
2427399665d2SSong Gao     vec_clear_cause(env);
2428399665d2SSong Gao     for(i = 0; i < LSX_LEN/32; i++) {
2429399665d2SSong Gao         temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status);
2430399665d2SSong Gao         temp.UH(i)  = float32_cvt_float16(Vk->UW(i), &env->fp_status);
2431399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2432399665d2SSong Gao     }
2433399665d2SSong Gao     *Vd = temp;
2434399665d2SSong Gao }
2435399665d2SSong Gao 
24363b286753SSong Gao void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
24373b286753SSong Gao                        CPULoongArchState *env, uint32_t desc)
2438399665d2SSong Gao {
2439399665d2SSong Gao     int i;
2440399665d2SSong Gao     VReg temp;
24413b286753SSong Gao     VReg *Vd = (VReg *)vd;
24423b286753SSong Gao     VReg *Vj = (VReg *)vj;
24433b286753SSong Gao     VReg *Vk = (VReg *)vk;
2444399665d2SSong Gao 
2445399665d2SSong Gao     vec_clear_cause(env);
2446399665d2SSong Gao     for(i = 0; i < LSX_LEN/64; i++) {
2447399665d2SSong Gao         temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status);
2448399665d2SSong Gao         temp.UW(i)  = float64_cvt_float32(Vk->UD(i), &env->fp_status);
2449399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2450399665d2SSong Gao     }
2451399665d2SSong Gao     *Vd = temp;
2452399665d2SSong Gao }
2453399665d2SSong Gao 
2454226bf881SSong Gao void HELPER(vfrint_s)(void *vd, void *vj,
2455226bf881SSong Gao                       CPULoongArchState *env, uint32_t desc)
2456399665d2SSong Gao {
2457399665d2SSong Gao     int i;
2458226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2459226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2460399665d2SSong Gao 
2461399665d2SSong Gao     vec_clear_cause(env);
2462399665d2SSong Gao     for (i = 0; i < 4; i++) {
2463399665d2SSong Gao         Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
2464399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2465399665d2SSong Gao     }
2466399665d2SSong Gao }
2467399665d2SSong Gao 
2468226bf881SSong Gao void HELPER(vfrint_d)(void *vd, void *vj,
2469226bf881SSong Gao                       CPULoongArchState *env, uint32_t desc)
2470399665d2SSong Gao {
2471399665d2SSong Gao     int i;
2472226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2473226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2474399665d2SSong Gao 
2475399665d2SSong Gao     vec_clear_cause(env);
2476399665d2SSong Gao     for (i = 0; i < 2; i++) {
2477399665d2SSong Gao         Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
2478399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2479399665d2SSong Gao     }
2480399665d2SSong Gao }
2481399665d2SSong Gao 
2482399665d2SSong Gao #define FCVT_2OP(NAME, BIT, E, MODE)                                        \
2483226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj,                                       \
2484226bf881SSong Gao                   CPULoongArchState *env, uint32_t desc)                    \
2485399665d2SSong Gao {                                                                           \
2486399665d2SSong Gao     int i;                                                                  \
2487226bf881SSong Gao     VReg *Vd = (VReg *)vd;                                                  \
2488226bf881SSong Gao     VReg *Vj = (VReg *)vj;                                                  \
2489399665d2SSong Gao                                                                             \
2490399665d2SSong Gao     vec_clear_cause(env);                                                   \
2491399665d2SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
2492399665d2SSong Gao         FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
2493399665d2SSong Gao         set_float_rounding_mode(MODE, &env->fp_status);                     \
2494399665d2SSong Gao         Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
2495399665d2SSong Gao         set_float_rounding_mode(old_mode, &env->fp_status);                 \
2496399665d2SSong Gao         vec_update_fcsr0(env, GETPC());                                     \
2497399665d2SSong Gao     }                                                                       \
2498399665d2SSong Gao }
2499399665d2SSong Gao 
2500399665d2SSong Gao FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
2501399665d2SSong Gao FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
2502399665d2SSong Gao FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
2503399665d2SSong Gao FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
2504399665d2SSong Gao FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
2505399665d2SSong Gao FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
2506399665d2SSong Gao FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
2507399665d2SSong Gao FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
2508399665d2SSong Gao 
2509399665d2SSong Gao #define FTINT(NAME, FMT1, FMT2, T1, T2,  MODE)                          \
2510399665d2SSong Gao static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj)               \
2511399665d2SSong Gao {                                                                       \
2512399665d2SSong Gao     T2 fd;                                                              \
2513399665d2SSong Gao     FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
2514399665d2SSong Gao                                                                         \
2515399665d2SSong Gao     set_float_rounding_mode(MODE, &env->fp_status);                     \
2516399665d2SSong Gao     fd = do_## FMT1 ##_to_## FMT2(env, fj);                             \
2517399665d2SSong Gao     set_float_rounding_mode(old_mode, &env->fp_status);                 \
2518399665d2SSong Gao     return fd;                                                          \
2519399665d2SSong Gao }
2520399665d2SSong Gao 
2521399665d2SSong Gao #define DO_FTINT(FMT1, FMT2, T1, T2)                                         \
2522399665d2SSong Gao static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj)            \
2523399665d2SSong Gao {                                                                            \
2524399665d2SSong Gao     T2 fd;                                                                   \
2525399665d2SSong Gao                                                                              \
2526399665d2SSong Gao     fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);                            \
2527399665d2SSong Gao     if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
2528399665d2SSong Gao         if (FMT1 ##_is_any_nan(fj)) {                                        \
2529399665d2SSong Gao             fd = 0;                                                          \
2530399665d2SSong Gao         }                                                                    \
2531399665d2SSong Gao     }                                                                        \
2532399665d2SSong Gao     vec_update_fcsr0(env, GETPC());                                          \
2533399665d2SSong Gao     return fd;                                                               \
2534399665d2SSong Gao }
2535399665d2SSong Gao 
2536399665d2SSong Gao DO_FTINT(float32, int32, uint32_t, uint32_t)
2537399665d2SSong Gao DO_FTINT(float64, int64, uint64_t, uint64_t)
2538399665d2SSong Gao DO_FTINT(float32, uint32, uint32_t, uint32_t)
2539399665d2SSong Gao DO_FTINT(float64, uint64, uint64_t, uint64_t)
2540399665d2SSong Gao DO_FTINT(float64, int32, uint64_t, uint32_t)
2541399665d2SSong Gao DO_FTINT(float32, int64, uint32_t, uint64_t)
2542399665d2SSong Gao 
2543399665d2SSong Gao FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
2544399665d2SSong Gao FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
2545399665d2SSong Gao FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
2546399665d2SSong Gao FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
2547399665d2SSong Gao FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
2548399665d2SSong Gao FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
2549399665d2SSong Gao FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
2550399665d2SSong Gao FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
2551399665d2SSong Gao 
2552399665d2SSong Gao DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
2553399665d2SSong Gao DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
2554399665d2SSong Gao DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
2555399665d2SSong Gao DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
2556399665d2SSong Gao DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
2557399665d2SSong Gao DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
2558399665d2SSong Gao DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
2559399665d2SSong Gao DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
2560399665d2SSong Gao DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
2561399665d2SSong Gao DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
2562399665d2SSong Gao 
2563399665d2SSong Gao FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
2564399665d2SSong Gao FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
2565399665d2SSong Gao 
2566399665d2SSong Gao DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
2567399665d2SSong Gao DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
2568399665d2SSong Gao DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
2569399665d2SSong Gao DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
2570399665d2SSong Gao 
2571399665d2SSong Gao FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
2572399665d2SSong Gao FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
2573399665d2SSong Gao FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
2574399665d2SSong Gao FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
2575399665d2SSong Gao 
2576399665d2SSong Gao #define FTINT_W_D(NAME, FN)                              \
25773b286753SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk,          \
25783b286753SSong Gao                   CPULoongArchState *env, uint32_t desc) \
2579399665d2SSong Gao {                                                        \
2580399665d2SSong Gao     int i;                                               \
2581399665d2SSong Gao     VReg temp;                                           \
25823b286753SSong Gao     VReg *Vd = (VReg *)vd;                               \
25833b286753SSong Gao     VReg *Vj = (VReg *)vj;                               \
25843b286753SSong Gao     VReg *Vk = (VReg *)vk;                               \
2585399665d2SSong Gao                                                          \
2586399665d2SSong Gao     vec_clear_cause(env);                                \
2587399665d2SSong Gao     for (i = 0; i < 2; i++) {                            \
2588399665d2SSong Gao         temp.W(i + 2) = FN(env, Vj->UD(i));              \
2589399665d2SSong Gao         temp.W(i) = FN(env, Vk->UD(i));                  \
2590399665d2SSong Gao     }                                                    \
2591399665d2SSong Gao     *Vd = temp;                                          \
2592399665d2SSong Gao }
2593399665d2SSong Gao 
2594399665d2SSong Gao FTINT_W_D(vftint_w_d, do_float64_to_int32)
2595399665d2SSong Gao FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
2596399665d2SSong Gao FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
2597399665d2SSong Gao FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
2598399665d2SSong Gao FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
2599399665d2SSong Gao 
2600399665d2SSong Gao FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
2601399665d2SSong Gao FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
2602399665d2SSong Gao FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
2603399665d2SSong Gao FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
2604399665d2SSong Gao FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
2605399665d2SSong Gao FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
2606399665d2SSong Gao FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
2607399665d2SSong Gao FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
2608399665d2SSong Gao 
2609399665d2SSong Gao #define FTINTL_L_S(NAME, FN)                             \
2610226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj,                    \
2611226bf881SSong Gao                   CPULoongArchState *env, uint32_t desc) \
2612399665d2SSong Gao {                                                        \
2613399665d2SSong Gao     int i;                                               \
2614399665d2SSong Gao     VReg temp;                                           \
2615226bf881SSong Gao     VReg *Vd = (VReg *)vd;                               \
2616226bf881SSong Gao     VReg *Vj = (VReg *)vj;                               \
2617399665d2SSong Gao                                                          \
2618399665d2SSong Gao     vec_clear_cause(env);                                \
2619399665d2SSong Gao     for (i = 0; i < 2; i++) {                            \
2620399665d2SSong Gao         temp.D(i) = FN(env, Vj->UW(i));                  \
2621399665d2SSong Gao     }                                                    \
2622399665d2SSong Gao     *Vd = temp;                                          \
2623399665d2SSong Gao }
2624399665d2SSong Gao 
2625399665d2SSong Gao FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
2626399665d2SSong Gao FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
2627399665d2SSong Gao FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
2628399665d2SSong Gao FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
2629399665d2SSong Gao FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
2630399665d2SSong Gao 
2631399665d2SSong Gao #define FTINTH_L_S(NAME, FN)                             \
2632226bf881SSong Gao void HELPER(NAME)(void *vd, void *vj,                    \
2633226bf881SSong Gao                   CPULoongArchState *env, uint32_t desc) \
2634399665d2SSong Gao {                                                        \
2635399665d2SSong Gao     int i;                                               \
2636399665d2SSong Gao     VReg temp;                                           \
2637226bf881SSong Gao     VReg *Vd = (VReg *)vd;                               \
2638226bf881SSong Gao     VReg *Vj = (VReg *)vj;                               \
2639399665d2SSong Gao                                                          \
2640399665d2SSong Gao     vec_clear_cause(env);                                \
2641399665d2SSong Gao     for (i = 0; i < 2; i++) {                            \
2642399665d2SSong Gao         temp.D(i) = FN(env, Vj->UW(i + 2));              \
2643399665d2SSong Gao     }                                                    \
2644399665d2SSong Gao     *Vd = temp;                                          \
2645399665d2SSong Gao }
2646399665d2SSong Gao 
2647399665d2SSong Gao FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
2648399665d2SSong Gao FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
2649399665d2SSong Gao FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
2650399665d2SSong Gao FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
2651399665d2SSong Gao FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
2652399665d2SSong Gao 
2653399665d2SSong Gao #define FFINT(NAME, FMT1, FMT2, T1, T2)                    \
2654399665d2SSong Gao static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
2655399665d2SSong Gao {                                                          \
2656399665d2SSong Gao     T2 fd;                                                 \
2657399665d2SSong Gao                                                            \
2658399665d2SSong Gao     fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);          \
2659399665d2SSong Gao     vec_update_fcsr0(env, GETPC());                        \
2660399665d2SSong Gao     return fd;                                             \
2661399665d2SSong Gao }
2662399665d2SSong Gao 
2663399665d2SSong Gao FFINT(s_w, int32, float32, int32_t, uint32_t)
2664399665d2SSong Gao FFINT(d_l, int64, float64, int64_t, uint64_t)
2665399665d2SSong Gao FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
2666399665d2SSong Gao FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
2667399665d2SSong Gao 
2668399665d2SSong Gao DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
2669399665d2SSong Gao DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
2670399665d2SSong Gao DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
2671399665d2SSong Gao DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
2672399665d2SSong Gao 
2673226bf881SSong Gao void HELPER(vffintl_d_w)(void *vd, void *vj,
2674226bf881SSong Gao                          CPULoongArchState *env, uint32_t desc)
2675399665d2SSong Gao {
2676399665d2SSong Gao     int i;
2677399665d2SSong Gao     VReg temp;
2678226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2679226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2680399665d2SSong Gao 
2681399665d2SSong Gao     vec_clear_cause(env);
2682399665d2SSong Gao     for (i = 0; i < 2; i++) {
2683399665d2SSong Gao         temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status);
2684399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2685399665d2SSong Gao     }
2686399665d2SSong Gao     *Vd = temp;
2687399665d2SSong Gao }
2688399665d2SSong Gao 
2689226bf881SSong Gao void HELPER(vffinth_d_w)(void *vd, void *vj,
2690226bf881SSong Gao                          CPULoongArchState *env, uint32_t desc)
2691399665d2SSong Gao {
2692399665d2SSong Gao     int i;
2693399665d2SSong Gao     VReg temp;
2694226bf881SSong Gao     VReg *Vd = (VReg *)vd;
2695226bf881SSong Gao     VReg *Vj = (VReg *)vj;
2696399665d2SSong Gao 
2697399665d2SSong Gao     vec_clear_cause(env);
2698399665d2SSong Gao     for (i = 0; i < 2; i++) {
2699399665d2SSong Gao         temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status);
2700399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2701399665d2SSong Gao     }
2702399665d2SSong Gao     *Vd = temp;
2703399665d2SSong Gao }
2704399665d2SSong Gao 
27053b286753SSong Gao void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
27063b286753SSong Gao                         CPULoongArchState *env, uint32_t desc)
2707399665d2SSong Gao {
2708399665d2SSong Gao     int i;
2709399665d2SSong Gao     VReg temp;
27103b286753SSong Gao     VReg *Vd = (VReg *)vd;
27113b286753SSong Gao     VReg *Vj = (VReg *)vj;
27123b286753SSong Gao     VReg *Vk = (VReg *)vk;
2713399665d2SSong Gao 
2714399665d2SSong Gao     vec_clear_cause(env);
2715399665d2SSong Gao     for (i = 0; i < 2; i++) {
2716399665d2SSong Gao         temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status);
2717399665d2SSong Gao         temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status);
2718399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2719399665d2SSong Gao     }
2720399665d2SSong Gao     *Vd = temp;
2721399665d2SSong Gao }
2722f435e1e5SSong Gao 
2723f435e1e5SSong Gao #define VSEQ(a, b) (a == b ? -1 : 0)
2724f435e1e5SSong Gao #define VSLE(a, b) (a <= b ? -1 : 0)
2725f435e1e5SSong Gao #define VSLT(a, b) (a < b ? -1 : 0)
2726f435e1e5SSong Gao 
2727f435e1e5SSong Gao #define VCMPI(NAME, BIT, E, DO_OP)                              \
2728f435e1e5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
2729f435e1e5SSong Gao {                                                               \
2730f435e1e5SSong Gao     int i;                                                      \
2731f435e1e5SSong Gao     VReg *Vd = (VReg *)vd;                                      \
2732f435e1e5SSong Gao     VReg *Vj = (VReg *)vj;                                      \
2733f435e1e5SSong Gao     typedef __typeof(Vd->E(0)) TD;                              \
2734f435e1e5SSong Gao                                                                 \
2735f435e1e5SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
2736f435e1e5SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                    \
2737f435e1e5SSong Gao     }                                                           \
2738f435e1e5SSong Gao }
2739f435e1e5SSong Gao 
2740f435e1e5SSong Gao VCMPI(vseqi_b, 8, B, VSEQ)
2741f435e1e5SSong Gao VCMPI(vseqi_h, 16, H, VSEQ)
2742f435e1e5SSong Gao VCMPI(vseqi_w, 32, W, VSEQ)
2743f435e1e5SSong Gao VCMPI(vseqi_d, 64, D, VSEQ)
2744f435e1e5SSong Gao VCMPI(vslei_b, 8, B, VSLE)
2745f435e1e5SSong Gao VCMPI(vslei_h, 16, H, VSLE)
2746f435e1e5SSong Gao VCMPI(vslei_w, 32, W, VSLE)
2747f435e1e5SSong Gao VCMPI(vslei_d, 64, D, VSLE)
2748f435e1e5SSong Gao VCMPI(vslei_bu, 8, UB, VSLE)
2749f435e1e5SSong Gao VCMPI(vslei_hu, 16, UH, VSLE)
2750f435e1e5SSong Gao VCMPI(vslei_wu, 32, UW, VSLE)
2751f435e1e5SSong Gao VCMPI(vslei_du, 64, UD, VSLE)
2752f435e1e5SSong Gao VCMPI(vslti_b, 8, B, VSLT)
2753f435e1e5SSong Gao VCMPI(vslti_h, 16, H, VSLT)
2754f435e1e5SSong Gao VCMPI(vslti_w, 32, W, VSLT)
2755f435e1e5SSong Gao VCMPI(vslti_d, 64, D, VSLT)
2756f435e1e5SSong Gao VCMPI(vslti_bu, 8, UB, VSLT)
2757f435e1e5SSong Gao VCMPI(vslti_hu, 16, UH, VSLT)
2758f435e1e5SSong Gao VCMPI(vslti_wu, 32, UW, VSLT)
2759f435e1e5SSong Gao VCMPI(vslti_du, 64, UD, VSLT)
2760386c4e86SSong Gao 
2761386c4e86SSong Gao static uint64_t vfcmp_common(CPULoongArchState *env,
2762386c4e86SSong Gao                              FloatRelation cmp, uint32_t flags)
2763386c4e86SSong Gao {
2764386c4e86SSong Gao     uint64_t ret = 0;
2765386c4e86SSong Gao 
2766386c4e86SSong Gao     switch (cmp) {
2767386c4e86SSong Gao     case float_relation_less:
2768386c4e86SSong Gao         ret = (flags & FCMP_LT);
2769386c4e86SSong Gao         break;
2770386c4e86SSong Gao     case float_relation_equal:
2771386c4e86SSong Gao         ret = (flags & FCMP_EQ);
2772386c4e86SSong Gao         break;
2773386c4e86SSong Gao     case float_relation_greater:
2774386c4e86SSong Gao         ret = (flags & FCMP_GT);
2775386c4e86SSong Gao         break;
2776386c4e86SSong Gao     case float_relation_unordered:
2777386c4e86SSong Gao         ret = (flags & FCMP_UN);
2778386c4e86SSong Gao         break;
2779386c4e86SSong Gao     default:
2780386c4e86SSong Gao         g_assert_not_reached();
2781386c4e86SSong Gao     }
2782386c4e86SSong Gao 
2783386c4e86SSong Gao     if (ret) {
2784386c4e86SSong Gao         ret = -1;
2785386c4e86SSong Gao     }
2786386c4e86SSong Gao 
2787386c4e86SSong Gao     return ret;
2788386c4e86SSong Gao }
2789386c4e86SSong Gao 
2790386c4e86SSong Gao #define VFCMP(NAME, BIT, E, FN)                                          \
2791386c4e86SSong Gao void HELPER(NAME)(CPULoongArchState *env,                                \
2792386c4e86SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
2793386c4e86SSong Gao {                                                                        \
2794386c4e86SSong Gao     int i;                                                               \
2795386c4e86SSong Gao     VReg t;                                                              \
2796386c4e86SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                     \
2797386c4e86SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                     \
2798386c4e86SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                     \
2799386c4e86SSong Gao                                                                          \
2800386c4e86SSong Gao     vec_clear_cause(env);                                                \
2801386c4e86SSong Gao     for (i = 0; i < LSX_LEN/BIT ; i++) {                                 \
2802386c4e86SSong Gao         FloatRelation cmp;                                               \
2803386c4e86SSong Gao         cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status);                   \
2804386c4e86SSong Gao         t.E(i) = vfcmp_common(env, cmp, flags);                          \
2805386c4e86SSong Gao         vec_update_fcsr0(env, GETPC());                                  \
2806386c4e86SSong Gao     }                                                                    \
2807386c4e86SSong Gao     *Vd = t;                                                             \
2808386c4e86SSong Gao }
2809386c4e86SSong Gao 
2810386c4e86SSong Gao VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
2811386c4e86SSong Gao VFCMP(vfcmp_s_s, 32, UW, float32_compare)
2812386c4e86SSong Gao VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
2813386c4e86SSong Gao VFCMP(vfcmp_s_d, 64, UD, float64_compare)
2814d0dfa19aSSong Gao 
2815d0dfa19aSSong Gao void HELPER(vbitseli_b)(void *vd, void *vj,  uint64_t imm, uint32_t v)
2816d0dfa19aSSong Gao {
2817d0dfa19aSSong Gao     int i;
2818d0dfa19aSSong Gao     VReg *Vd = (VReg *)vd;
2819d0dfa19aSSong Gao     VReg *Vj = (VReg *)vj;
2820d0dfa19aSSong Gao 
2821d0dfa19aSSong Gao     for (i = 0; i < 16; i++) {
2822d0dfa19aSSong Gao         Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
2823d0dfa19aSSong Gao     }
2824d0dfa19aSSong Gao }
2825d0dfa19aSSong Gao 
2826d0dfa19aSSong Gao /* Copy from target/arm/tcg/sve_helper.c */
2827d0dfa19aSSong Gao static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
2828d0dfa19aSSong Gao {
2829d0dfa19aSSong Gao     uint64_t bits = 8 << esz;
2830d0dfa19aSSong Gao     uint64_t ones = dup_const(esz, 1);
2831d0dfa19aSSong Gao     uint64_t signs = ones << (bits - 1);
2832d0dfa19aSSong Gao     uint64_t cmp0, cmp1;
2833d0dfa19aSSong Gao 
2834d0dfa19aSSong Gao     cmp1 = dup_const(esz, n);
2835d0dfa19aSSong Gao     cmp0 = cmp1 ^ m0;
2836d0dfa19aSSong Gao     cmp1 = cmp1 ^ m1;
2837d0dfa19aSSong Gao     cmp0 = (cmp0 - ones) & ~cmp0;
2838d0dfa19aSSong Gao     cmp1 = (cmp1 - ones) & ~cmp1;
2839d0dfa19aSSong Gao     return (cmp0 | cmp1) & signs;
2840d0dfa19aSSong Gao }
2841d0dfa19aSSong Gao 
2842d0dfa19aSSong Gao #define SETANYEQZ(NAME, MO)                                         \
2843d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
2844d0dfa19aSSong Gao {                                                                   \
2845d0dfa19aSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2846d0dfa19aSSong Gao                                                                     \
2847d0dfa19aSSong Gao     env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO);       \
2848d0dfa19aSSong Gao }
2849d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_b, MO_8)
2850d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_h, MO_16)
2851d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_w, MO_32)
2852d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_d, MO_64)
2853d0dfa19aSSong Gao 
2854d0dfa19aSSong Gao #define SETALLNEZ(NAME, MO)                                         \
2855d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
2856d0dfa19aSSong Gao {                                                                   \
2857d0dfa19aSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2858d0dfa19aSSong Gao                                                                     \
2859d0dfa19aSSong Gao     env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO);       \
2860d0dfa19aSSong Gao }
2861d0dfa19aSSong Gao SETALLNEZ(vsetallnez_b, MO_8)
2862d0dfa19aSSong Gao SETALLNEZ(vsetallnez_h, MO_16)
2863d0dfa19aSSong Gao SETALLNEZ(vsetallnez_w, MO_32)
2864d0dfa19aSSong Gao SETALLNEZ(vsetallnez_d, MO_64)
2865d5e5563cSSong Gao 
2866d5e5563cSSong Gao #define VPACKEV(NAME, BIT, E)                                  \
286704711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2868d5e5563cSSong Gao {                                                              \
2869d5e5563cSSong Gao     int i;                                                     \
2870d5e5563cSSong Gao     VReg temp;                                                 \
287104711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
287204711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
287304711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2874d5e5563cSSong Gao                                                                \
2875d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2876d5e5563cSSong Gao         temp.E(2 * i + 1) = Vj->E(2 * i);                      \
2877d5e5563cSSong Gao         temp.E(2 *i) = Vk->E(2 * i);                           \
2878d5e5563cSSong Gao     }                                                          \
2879d5e5563cSSong Gao     *Vd = temp;                                                \
2880d5e5563cSSong Gao }
2881d5e5563cSSong Gao 
2882d5e5563cSSong Gao VPACKEV(vpackev_b, 16, B)
2883d5e5563cSSong Gao VPACKEV(vpackev_h, 32, H)
2884d5e5563cSSong Gao VPACKEV(vpackev_w, 64, W)
2885d5e5563cSSong Gao VPACKEV(vpackev_d, 128, D)
2886d5e5563cSSong Gao 
2887d5e5563cSSong Gao #define VPACKOD(NAME, BIT, E)                                  \
288804711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2889d5e5563cSSong Gao {                                                              \
2890d5e5563cSSong Gao     int i;                                                     \
2891d5e5563cSSong Gao     VReg temp;                                                 \
289204711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
289304711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
289404711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2895d5e5563cSSong Gao                                                                \
2896d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2897d5e5563cSSong Gao         temp.E(2 * i + 1) = Vj->E(2 * i + 1);                  \
2898d5e5563cSSong Gao         temp.E(2 * i) = Vk->E(2 * i + 1);                      \
2899d5e5563cSSong Gao     }                                                          \
2900d5e5563cSSong Gao     *Vd = temp;                                                \
2901d5e5563cSSong Gao }
2902d5e5563cSSong Gao 
2903d5e5563cSSong Gao VPACKOD(vpackod_b, 16, B)
2904d5e5563cSSong Gao VPACKOD(vpackod_h, 32, H)
2905d5e5563cSSong Gao VPACKOD(vpackod_w, 64, W)
2906d5e5563cSSong Gao VPACKOD(vpackod_d, 128, D)
2907d5e5563cSSong Gao 
2908d5e5563cSSong Gao #define VPICKEV(NAME, BIT, E)                                  \
290904711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2910d5e5563cSSong Gao {                                                              \
2911d5e5563cSSong Gao     int i;                                                     \
2912d5e5563cSSong Gao     VReg temp;                                                 \
291304711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
291404711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
291504711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2916d5e5563cSSong Gao                                                                \
2917d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2918d5e5563cSSong Gao         temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i);                \
2919d5e5563cSSong Gao         temp.E(i) = Vk->E(2 * i);                              \
2920d5e5563cSSong Gao     }                                                          \
2921d5e5563cSSong Gao     *Vd = temp;                                                \
2922d5e5563cSSong Gao }
2923d5e5563cSSong Gao 
2924d5e5563cSSong Gao VPICKEV(vpickev_b, 16, B)
2925d5e5563cSSong Gao VPICKEV(vpickev_h, 32, H)
2926d5e5563cSSong Gao VPICKEV(vpickev_w, 64, W)
2927d5e5563cSSong Gao VPICKEV(vpickev_d, 128, D)
2928d5e5563cSSong Gao 
2929d5e5563cSSong Gao #define VPICKOD(NAME, BIT, E)                                  \
293004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2931d5e5563cSSong Gao {                                                              \
2932d5e5563cSSong Gao     int i;                                                     \
2933d5e5563cSSong Gao     VReg temp;                                                 \
293404711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
293504711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
293604711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2937d5e5563cSSong Gao                                                                \
2938d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2939d5e5563cSSong Gao         temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1);            \
2940d5e5563cSSong Gao         temp.E(i) = Vk->E(2 * i + 1);                          \
2941d5e5563cSSong Gao     }                                                          \
2942d5e5563cSSong Gao     *Vd = temp;                                                \
2943d5e5563cSSong Gao }
2944d5e5563cSSong Gao 
2945d5e5563cSSong Gao VPICKOD(vpickod_b, 16, B)
2946d5e5563cSSong Gao VPICKOD(vpickod_h, 32, H)
2947d5e5563cSSong Gao VPICKOD(vpickod_w, 64, W)
2948d5e5563cSSong Gao VPICKOD(vpickod_d, 128, D)
2949e93dd431SSong Gao 
2950e93dd431SSong Gao #define VILVL(NAME, BIT, E)                                    \
295104711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2952e93dd431SSong Gao {                                                              \
2953e93dd431SSong Gao     int i;                                                     \
2954e93dd431SSong Gao     VReg temp;                                                 \
295504711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
295604711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
295704711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2958e93dd431SSong Gao                                                                \
2959e93dd431SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2960e93dd431SSong Gao         temp.E(2 * i + 1) = Vj->E(i);                          \
2961e93dd431SSong Gao         temp.E(2 * i) = Vk->E(i);                              \
2962e93dd431SSong Gao     }                                                          \
2963e93dd431SSong Gao     *Vd = temp;                                                \
2964e93dd431SSong Gao }
2965e93dd431SSong Gao 
2966e93dd431SSong Gao VILVL(vilvl_b, 16, B)
2967e93dd431SSong Gao VILVL(vilvl_h, 32, H)
2968e93dd431SSong Gao VILVL(vilvl_w, 64, W)
2969e93dd431SSong Gao VILVL(vilvl_d, 128, D)
2970e93dd431SSong Gao 
2971e93dd431SSong Gao #define VILVH(NAME, BIT, E)                                    \
297204711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
2973e93dd431SSong Gao {                                                              \
2974e93dd431SSong Gao     int i;                                                     \
2975e93dd431SSong Gao     VReg temp;                                                 \
297604711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
297704711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
297804711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
2979e93dd431SSong Gao                                                                \
2980e93dd431SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                        \
2981e93dd431SSong Gao         temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT);            \
2982e93dd431SSong Gao         temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT);                \
2983e93dd431SSong Gao     }                                                          \
2984e93dd431SSong Gao     *Vd = temp;                                                \
2985e93dd431SSong Gao }
2986e93dd431SSong Gao 
2987e93dd431SSong Gao VILVH(vilvh_b, 16, B)
2988e93dd431SSong Gao VILVH(vilvh_h, 32, H)
2989e93dd431SSong Gao VILVH(vilvh_w, 64, W)
2990e93dd431SSong Gao VILVH(vilvh_d, 128, D)
2991e93dd431SSong Gao 
2992eb48ab22SSong Gao void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
2993e93dd431SSong Gao {
2994e93dd431SSong Gao     int i, m;
2995e93dd431SSong Gao     VReg temp;
2996eb48ab22SSong Gao     VReg *Vd = (VReg *)vd;
2997eb48ab22SSong Gao     VReg *Vj = (VReg *)vj;
2998eb48ab22SSong Gao     VReg *Vk = (VReg *)vk;
2999eb48ab22SSong Gao     VReg *Va = (VReg *)va;
3000e93dd431SSong Gao 
3001e93dd431SSong Gao     m = LSX_LEN/8;
3002e93dd431SSong Gao     for (i = 0; i < m ; i++) {
3003e93dd431SSong Gao         uint64_t k = (uint8_t)Va->B(i) % (2 * m);
3004e93dd431SSong Gao         temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
3005e93dd431SSong Gao     }
3006e93dd431SSong Gao     *Vd = temp;
3007e93dd431SSong Gao }
3008e93dd431SSong Gao 
3009e93dd431SSong Gao #define VSHUF(NAME, BIT, E)                                    \
301004711da1SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
3011e93dd431SSong Gao {                                                              \
3012e93dd431SSong Gao     int i, m;                                                  \
3013e93dd431SSong Gao     VReg temp;                                                 \
301404711da1SSong Gao     VReg *Vd = (VReg *)vd;                                     \
301504711da1SSong Gao     VReg *Vj = (VReg *)vj;                                     \
301604711da1SSong Gao     VReg *Vk = (VReg *)vk;                                     \
3017e93dd431SSong Gao                                                                \
3018e93dd431SSong Gao     m = LSX_LEN/BIT;                                           \
3019e93dd431SSong Gao     for (i = 0; i < m; i++) {                                  \
3020e93dd431SSong Gao         uint64_t k  = ((uint8_t) Vd->E(i)) % (2 * m);          \
3021e93dd431SSong Gao         temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m);           \
3022e93dd431SSong Gao     }                                                          \
3023e93dd431SSong Gao     *Vd = temp;                                                \
3024e93dd431SSong Gao }
3025e93dd431SSong Gao 
3026e93dd431SSong Gao VSHUF(vshuf_h, 16, H)
3027e93dd431SSong Gao VSHUF(vshuf_w, 32, W)
3028e93dd431SSong Gao VSHUF(vshuf_d, 64, D)
3029e93dd431SSong Gao 
3030e93dd431SSong Gao #define VSHUF4I(NAME, BIT, E)                                      \
3031329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
3032e93dd431SSong Gao {                                                                  \
3033e93dd431SSong Gao     int i;                                                         \
3034e93dd431SSong Gao     VReg temp;                                                     \
3035329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
3036329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
3037e93dd431SSong Gao                                                                    \
3038e93dd431SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                            \
3039e93dd431SSong Gao          temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >>               \
3040e93dd431SSong Gao                            (2 * ((i) & 0x03))) & 0x03));           \
3041e93dd431SSong Gao     }                                                              \
3042e93dd431SSong Gao     *Vd = temp;                                                    \
3043e93dd431SSong Gao }
3044e93dd431SSong Gao 
3045e93dd431SSong Gao VSHUF4I(vshuf4i_b, 8, B)
3046e93dd431SSong Gao VSHUF4I(vshuf4i_h, 16, H)
3047e93dd431SSong Gao VSHUF4I(vshuf4i_w, 32, W)
3048e93dd431SSong Gao 
3049329517d5SSong Gao void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
3050e93dd431SSong Gao {
3051329517d5SSong Gao     VReg *Vd = (VReg *)vd;
3052329517d5SSong Gao     VReg *Vj = (VReg *)vj;
3053e93dd431SSong Gao 
3054e93dd431SSong Gao     VReg temp;
3055e93dd431SSong Gao     temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
3056e93dd431SSong Gao     temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
3057e93dd431SSong Gao     *Vd = temp;
3058e93dd431SSong Gao }
3059e93dd431SSong Gao 
3060329517d5SSong Gao void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
3061e93dd431SSong Gao {
3062e93dd431SSong Gao     VReg temp;
3063329517d5SSong Gao     VReg *Vd = (VReg *)vd;
3064329517d5SSong Gao     VReg *Vj = (VReg *)vj;
3065e93dd431SSong Gao 
3066e93dd431SSong Gao     temp.W(0) = Vj->W(imm & 0x3);
3067e93dd431SSong Gao     temp.W(1) = Vj->W((imm >> 2) & 0x3);
3068e93dd431SSong Gao     temp.W(2) = Vd->W((imm >> 4) & 0x3);
3069e93dd431SSong Gao     temp.W(3) = Vd->W((imm >> 6) & 0x3);
3070e93dd431SSong Gao     *Vd = temp;
3071e93dd431SSong Gao }
3072e93dd431SSong Gao 
3073e93dd431SSong Gao #define VEXTRINS(NAME, BIT, E, MASK)                               \
3074329517d5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
3075e93dd431SSong Gao {                                                                  \
3076e93dd431SSong Gao     int ins, extr;                                                 \
3077329517d5SSong Gao     VReg *Vd = (VReg *)vd;                                         \
3078329517d5SSong Gao     VReg *Vj = (VReg *)vj;                                         \
3079e93dd431SSong Gao                                                                    \
3080e93dd431SSong Gao     ins = (imm >> 4) & MASK;                                       \
3081e93dd431SSong Gao     extr = imm & MASK;                                             \
3082e93dd431SSong Gao     Vd->E(ins) = Vj->E(extr);                                      \
3083e93dd431SSong Gao }
3084e93dd431SSong Gao 
3085e93dd431SSong Gao VEXTRINS(vextrins_b, 8, B, 0xf)
3086e93dd431SSong Gao VEXTRINS(vextrins_h, 16, H, 0x7)
3087e93dd431SSong Gao VEXTRINS(vextrins_w, 32, W, 0x3)
3088e93dd431SSong Gao VEXTRINS(vextrins_d, 64, D, 0x1)
3089