xref: /qemu/target/loongarch/tcg/vec_helper.c (revision 1dc33f2653ab8c564f0e4371166d2dec4c622dcd)
1a0c9400aSSong Gao /* SPDX-License-Identifier: GPL-2.0-or-later */
2a0c9400aSSong Gao /*
31dc33f26SSong Gao  * QEMU LoongArch vector helper functions.
4a0c9400aSSong Gao  *
5a0c9400aSSong Gao  * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
6a0c9400aSSong Gao  */
7c037fbc9SSong Gao 
8c037fbc9SSong Gao #include "qemu/osdep.h"
9c037fbc9SSong Gao #include "cpu.h"
10c037fbc9SSong Gao #include "exec/exec-all.h"
11c037fbc9SSong Gao #include "exec/helper-proto.h"
12aca67472SSong Gao #include "fpu/softfloat.h"
13aca67472SSong Gao #include "internals.h"
14d0dfa19aSSong Gao #include "tcg/tcg.h"
15c037fbc9SSong Gao 
16c037fbc9SSong Gao #define DO_ADD(a, b)  (a + b)
17c037fbc9SSong Gao #define DO_SUB(a, b)  (a - b)
18c037fbc9SSong Gao 
19c037fbc9SSong Gao #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
20c037fbc9SSong Gao void HELPER(NAME)(CPULoongArchState *env,                            \
21c037fbc9SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)             \
22c037fbc9SSong Gao {                                                                    \
23c037fbc9SSong Gao     int i;                                                           \
24c037fbc9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                 \
25c037fbc9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                 \
26c037fbc9SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                 \
27c037fbc9SSong Gao     typedef __typeof(Vd->E1(0)) TD;                                  \
28c037fbc9SSong Gao                                                                      \
29c037fbc9SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                              \
30c037fbc9SSong Gao         Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
31c037fbc9SSong Gao     }                                                                \
32c037fbc9SSong Gao }
33c037fbc9SSong Gao 
34c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
35c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
36c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
37c037fbc9SSong Gao 
38c037fbc9SSong Gao void HELPER(vhaddw_q_d)(CPULoongArchState *env,
39c037fbc9SSong Gao                         uint32_t vd, uint32_t vj, uint32_t vk)
40c037fbc9SSong Gao {
41c037fbc9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
42c037fbc9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
43c037fbc9SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
44c037fbc9SSong Gao 
45c037fbc9SSong Gao     Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
46c037fbc9SSong Gao }
47c037fbc9SSong Gao 
48c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
49c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
50c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
51c037fbc9SSong Gao 
52c037fbc9SSong Gao void HELPER(vhsubw_q_d)(CPULoongArchState *env,
53c037fbc9SSong Gao                         uint32_t vd, uint32_t vj, uint32_t vk)
54c037fbc9SSong Gao {
55c037fbc9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
56c037fbc9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
57c037fbc9SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
58c037fbc9SSong Gao 
59c037fbc9SSong Gao     Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
60c037fbc9SSong Gao }
61c037fbc9SSong Gao 
62c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
63c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
64c037fbc9SSong Gao DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
65c037fbc9SSong Gao 
66c037fbc9SSong Gao void HELPER(vhaddw_qu_du)(CPULoongArchState *env,
67c037fbc9SSong Gao                           uint32_t vd, uint32_t vj, uint32_t vk)
68c037fbc9SSong Gao {
69c037fbc9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
70c037fbc9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
71c037fbc9SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
72c037fbc9SSong Gao 
73c037fbc9SSong Gao     Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
74c037fbc9SSong Gao                           int128_make64((uint64_t)Vk->D(0)));
75c037fbc9SSong Gao }
76c037fbc9SSong Gao 
77c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
78c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
79c037fbc9SSong Gao DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
80c037fbc9SSong Gao 
81c037fbc9SSong Gao void HELPER(vhsubw_qu_du)(CPULoongArchState *env,
82c037fbc9SSong Gao                           uint32_t vd, uint32_t vj, uint32_t vk)
83c037fbc9SSong Gao {
84c037fbc9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
85c037fbc9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
86c037fbc9SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
87c037fbc9SSong Gao 
88c037fbc9SSong Gao     Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
89c037fbc9SSong Gao                           int128_make64((uint64_t)Vk->D(0)));
90c037fbc9SSong Gao }
912d5f950cSSong Gao 
922d5f950cSSong Gao #define DO_EVEN(NAME, BIT, E1, E2, DO_OP)                        \
932d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)      \
942d5f950cSSong Gao {                                                                \
952d5f950cSSong Gao     int i;                                                       \
962d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                       \
972d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                       \
982d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                       \
992d5f950cSSong Gao     typedef __typeof(Vd->E1(0)) TD;                              \
1002d5f950cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                          \
1012d5f950cSSong Gao         Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
1022d5f950cSSong Gao     }                                                            \
1032d5f950cSSong Gao }
1042d5f950cSSong Gao 
1052d5f950cSSong Gao #define DO_ODD(NAME, BIT, E1, E2, DO_OP)                                 \
1062d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)              \
1072d5f950cSSong Gao {                                                                        \
1082d5f950cSSong Gao     int i;                                                               \
1092d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                               \
1102d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                               \
1112d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                               \
1122d5f950cSSong Gao     typedef __typeof(Vd->E1(0)) TD;                                      \
1132d5f950cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                  \
1142d5f950cSSong Gao         Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
1152d5f950cSSong Gao     }                                                                    \
1162d5f950cSSong Gao }
1172d5f950cSSong Gao 
1182d5f950cSSong Gao void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1192d5f950cSSong Gao {
1202d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1212d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1222d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
1232d5f950cSSong Gao 
1242d5f950cSSong Gao     Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
1252d5f950cSSong Gao }
1262d5f950cSSong Gao 
1272d5f950cSSong Gao DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
1282d5f950cSSong Gao DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
1292d5f950cSSong Gao DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
1302d5f950cSSong Gao 
1312d5f950cSSong Gao void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1322d5f950cSSong Gao {
1332d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1342d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1352d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
1362d5f950cSSong Gao 
1372d5f950cSSong Gao     Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
1382d5f950cSSong Gao }
1392d5f950cSSong Gao 
1402d5f950cSSong Gao DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
1412d5f950cSSong Gao DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
1422d5f950cSSong Gao DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
1432d5f950cSSong Gao 
1442d5f950cSSong Gao void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1452d5f950cSSong Gao {
1462d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1472d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1482d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
1492d5f950cSSong Gao 
1502d5f950cSSong Gao     Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
1512d5f950cSSong Gao }
1522d5f950cSSong Gao 
1532d5f950cSSong Gao DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
1542d5f950cSSong Gao DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
1552d5f950cSSong Gao DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
1562d5f950cSSong Gao 
1572d5f950cSSong Gao void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
1582d5f950cSSong Gao {
1592d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1602d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1612d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
1622d5f950cSSong Gao 
1632d5f950cSSong Gao     Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
1642d5f950cSSong Gao }
1652d5f950cSSong Gao 
1662d5f950cSSong Gao DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
1672d5f950cSSong Gao DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
1682d5f950cSSong Gao DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
1692d5f950cSSong Gao 
1702d5f950cSSong Gao void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1712d5f950cSSong Gao {
1722d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1732d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1742d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
1752d5f950cSSong Gao 
1762d5f950cSSong Gao     Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
1772d5f950cSSong Gao                           int128_make64((uint64_t)Vk->D(0)));
1782d5f950cSSong Gao }
1792d5f950cSSong Gao 
1802d5f950cSSong Gao DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
1812d5f950cSSong Gao DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
1822d5f950cSSong Gao DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
1832d5f950cSSong Gao 
1842d5f950cSSong Gao void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1852d5f950cSSong Gao {
1862d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
1872d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
1882d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
1892d5f950cSSong Gao 
1902d5f950cSSong Gao     Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
1912d5f950cSSong Gao                           int128_make64((uint64_t)Vk->D(1)));
1922d5f950cSSong Gao }
1932d5f950cSSong Gao 
1942d5f950cSSong Gao DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
1952d5f950cSSong Gao DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
1962d5f950cSSong Gao DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
1972d5f950cSSong Gao 
1982d5f950cSSong Gao void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
1992d5f950cSSong Gao {
2002d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2012d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2022d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
2032d5f950cSSong Gao 
2042d5f950cSSong Gao     Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)),
2052d5f950cSSong Gao                           int128_make64((uint64_t)Vk->D(0)));
2062d5f950cSSong Gao }
2072d5f950cSSong Gao 
2082d5f950cSSong Gao DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
2092d5f950cSSong Gao DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
2102d5f950cSSong Gao DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
2112d5f950cSSong Gao 
2122d5f950cSSong Gao void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
2132d5f950cSSong Gao {
2142d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2152d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2162d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
2172d5f950cSSong Gao 
2182d5f950cSSong Gao     Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
2192d5f950cSSong Gao                           int128_make64((uint64_t)Vk->D(1)));
2202d5f950cSSong Gao }
2212d5f950cSSong Gao 
2222d5f950cSSong Gao DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
2232d5f950cSSong Gao DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
2242d5f950cSSong Gao DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
2252d5f950cSSong Gao 
2262d5f950cSSong Gao #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)             \
2272d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)           \
2282d5f950cSSong Gao {                                                                     \
2292d5f950cSSong Gao     int i;                                                            \
2302d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                            \
2312d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                            \
2322d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                            \
2332d5f950cSSong Gao     typedef __typeof(Vd->ES1(0)) TDS;                                 \
2342d5f950cSSong Gao     typedef __typeof(Vd->EU1(0)) TDU;                                 \
2352d5f950cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                               \
2362d5f950cSSong Gao         Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
2372d5f950cSSong Gao     }                                                                 \
2382d5f950cSSong Gao }
2392d5f950cSSong Gao 
2402d5f950cSSong Gao #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)                      \
2412d5f950cSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)                   \
2422d5f950cSSong Gao {                                                                             \
2432d5f950cSSong Gao     int i;                                                                    \
2442d5f950cSSong Gao     VReg *Vd = (VReg *)vd;                                                    \
2452d5f950cSSong Gao     VReg *Vj = (VReg *)vj;                                                    \
2462d5f950cSSong Gao     VReg *Vk = (VReg *)vk;                                                    \
2472d5f950cSSong Gao     typedef __typeof(Vd->ES1(0)) TDS;                                         \
2482d5f950cSSong Gao     typedef __typeof(Vd->EU1(0)) TDU;                                         \
2492d5f950cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
2502d5f950cSSong Gao         Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
2512d5f950cSSong Gao     }                                                                         \
2522d5f950cSSong Gao }
2532d5f950cSSong Gao 
2542d5f950cSSong Gao void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
2552d5f950cSSong Gao {
2562d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2572d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2582d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
2592d5f950cSSong Gao 
2602d5f950cSSong Gao     Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
2612d5f950cSSong Gao                           int128_makes64(Vk->D(0)));
2622d5f950cSSong Gao }
2632d5f950cSSong Gao 
2642d5f950cSSong Gao DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
2652d5f950cSSong Gao DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
2662d5f950cSSong Gao DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
2672d5f950cSSong Gao 
2682d5f950cSSong Gao void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
2692d5f950cSSong Gao {
2702d5f950cSSong Gao     VReg *Vd = (VReg *)vd;
2712d5f950cSSong Gao     VReg *Vj = (VReg *)vj;
2722d5f950cSSong Gao     VReg *Vk = (VReg *)vk;
2732d5f950cSSong Gao 
2742d5f950cSSong Gao     Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
2752d5f950cSSong Gao                           int128_makes64(Vk->D(1)));
2762d5f950cSSong Gao }
2772d5f950cSSong Gao 
2782d5f950cSSong Gao DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
2792d5f950cSSong Gao DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
2802d5f950cSSong Gao DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
28139e9b0a7SSong Gao 
28239e9b0a7SSong Gao #define DO_VAVG(a, b)  ((a >> 1) + (b >> 1) + (a & b & 1))
28339e9b0a7SSong Gao #define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
28439e9b0a7SSong Gao 
28539e9b0a7SSong Gao #define DO_3OP(NAME, BIT, E, DO_OP)                         \
28639e9b0a7SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
28739e9b0a7SSong Gao {                                                           \
28839e9b0a7SSong Gao     int i;                                                  \
28939e9b0a7SSong Gao     VReg *Vd = (VReg *)vd;                                  \
29039e9b0a7SSong Gao     VReg *Vj = (VReg *)vj;                                  \
29139e9b0a7SSong Gao     VReg *Vk = (VReg *)vk;                                  \
29239e9b0a7SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
29339e9b0a7SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));               \
29439e9b0a7SSong Gao     }                                                       \
29539e9b0a7SSong Gao }
29639e9b0a7SSong Gao 
29739e9b0a7SSong Gao DO_3OP(vavg_b, 8, B, DO_VAVG)
29839e9b0a7SSong Gao DO_3OP(vavg_h, 16, H, DO_VAVG)
29939e9b0a7SSong Gao DO_3OP(vavg_w, 32, W, DO_VAVG)
30039e9b0a7SSong Gao DO_3OP(vavg_d, 64, D, DO_VAVG)
30139e9b0a7SSong Gao DO_3OP(vavgr_b, 8, B, DO_VAVGR)
30239e9b0a7SSong Gao DO_3OP(vavgr_h, 16, H, DO_VAVGR)
30339e9b0a7SSong Gao DO_3OP(vavgr_w, 32, W, DO_VAVGR)
30439e9b0a7SSong Gao DO_3OP(vavgr_d, 64, D, DO_VAVGR)
30539e9b0a7SSong Gao DO_3OP(vavg_bu, 8, UB, DO_VAVG)
30639e9b0a7SSong Gao DO_3OP(vavg_hu, 16, UH, DO_VAVG)
30739e9b0a7SSong Gao DO_3OP(vavg_wu, 32, UW, DO_VAVG)
30839e9b0a7SSong Gao DO_3OP(vavg_du, 64, UD, DO_VAVG)
30939e9b0a7SSong Gao DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
31039e9b0a7SSong Gao DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
31139e9b0a7SSong Gao DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
31239e9b0a7SSong Gao DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
31349725659SSong Gao 
31449725659SSong Gao #define DO_VABSD(a, b)  ((a > b) ? (a -b) : (b-a))
31549725659SSong Gao 
31649725659SSong Gao DO_3OP(vabsd_b, 8, B, DO_VABSD)
31749725659SSong Gao DO_3OP(vabsd_h, 16, H, DO_VABSD)
31849725659SSong Gao DO_3OP(vabsd_w, 32, W, DO_VABSD)
31949725659SSong Gao DO_3OP(vabsd_d, 64, D, DO_VABSD)
32049725659SSong Gao DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
32149725659SSong Gao DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
32249725659SSong Gao DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
32349725659SSong Gao DO_3OP(vabsd_du, 64, UD, DO_VABSD)
324af448cb3SSong Gao 
325af448cb3SSong Gao #define DO_VABS(a)  ((a < 0) ? (-a) : (a))
326af448cb3SSong Gao 
327af448cb3SSong Gao #define DO_VADDA(NAME, BIT, E, DO_OP)                       \
328af448cb3SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
329af448cb3SSong Gao {                                                           \
330af448cb3SSong Gao     int i;                                                  \
331af448cb3SSong Gao     VReg *Vd = (VReg *)vd;                                  \
332af448cb3SSong Gao     VReg *Vj = (VReg *)vj;                                  \
333af448cb3SSong Gao     VReg *Vk = (VReg *)vk;                                  \
334af448cb3SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
335af448cb3SSong Gao         Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i));       \
336af448cb3SSong Gao     }                                                       \
337af448cb3SSong Gao }
338af448cb3SSong Gao 
339af448cb3SSong Gao DO_VADDA(vadda_b, 8, B, DO_VABS)
340af448cb3SSong Gao DO_VADDA(vadda_h, 16, H, DO_VABS)
341af448cb3SSong Gao DO_VADDA(vadda_w, 32, W, DO_VABS)
342af448cb3SSong Gao DO_VADDA(vadda_d, 64, D, DO_VABS)
3439ab29520SSong Gao 
3449ab29520SSong Gao #define DO_MIN(a, b) (a < b ? a : b)
3459ab29520SSong Gao #define DO_MAX(a, b) (a > b ? a : b)
3469ab29520SSong Gao 
3479ab29520SSong Gao #define VMINMAXI(NAME, BIT, E, DO_OP)                           \
3489ab29520SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
3499ab29520SSong Gao {                                                               \
3509ab29520SSong Gao     int i;                                                      \
3519ab29520SSong Gao     VReg *Vd = (VReg *)vd;                                      \
3529ab29520SSong Gao     VReg *Vj = (VReg *)vj;                                      \
3539ab29520SSong Gao     typedef __typeof(Vd->E(0)) TD;                              \
3549ab29520SSong Gao                                                                 \
3559ab29520SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
3569ab29520SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                    \
3579ab29520SSong Gao     }                                                           \
3589ab29520SSong Gao }
3599ab29520SSong Gao 
3609ab29520SSong Gao VMINMAXI(vmini_b, 8, B, DO_MIN)
3619ab29520SSong Gao VMINMAXI(vmini_h, 16, H, DO_MIN)
3629ab29520SSong Gao VMINMAXI(vmini_w, 32, W, DO_MIN)
3639ab29520SSong Gao VMINMAXI(vmini_d, 64, D, DO_MIN)
3649ab29520SSong Gao VMINMAXI(vmaxi_b, 8, B, DO_MAX)
3659ab29520SSong Gao VMINMAXI(vmaxi_h, 16, H, DO_MAX)
3669ab29520SSong Gao VMINMAXI(vmaxi_w, 32, W, DO_MAX)
3679ab29520SSong Gao VMINMAXI(vmaxi_d, 64, D, DO_MAX)
3689ab29520SSong Gao VMINMAXI(vmini_bu, 8, UB, DO_MIN)
3699ab29520SSong Gao VMINMAXI(vmini_hu, 16, UH, DO_MIN)
3709ab29520SSong Gao VMINMAXI(vmini_wu, 32, UW, DO_MIN)
3719ab29520SSong Gao VMINMAXI(vmini_du, 64, UD, DO_MIN)
3729ab29520SSong Gao VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
3739ab29520SSong Gao VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
3749ab29520SSong Gao VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
3759ab29520SSong Gao VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
376cd1c49adSSong Gao 
377cd1c49adSSong Gao #define DO_VMUH(NAME, BIT, E1, E2, DO_OP)                   \
378cd1c49adSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
379cd1c49adSSong Gao {                                                           \
380cd1c49adSSong Gao     int i;                                                  \
381cd1c49adSSong Gao     VReg *Vd = (VReg *)vd;                                  \
382cd1c49adSSong Gao     VReg *Vj = (VReg *)vj;                                  \
383cd1c49adSSong Gao     VReg *Vk = (VReg *)vk;                                  \
384cd1c49adSSong Gao     typedef __typeof(Vd->E1(0)) T;                          \
385cd1c49adSSong Gao                                                             \
386cd1c49adSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
387cd1c49adSSong Gao         Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
388cd1c49adSSong Gao     }                                                       \
389cd1c49adSSong Gao }
390cd1c49adSSong Gao 
391cd1c49adSSong Gao void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v)
392cd1c49adSSong Gao {
393cd1c49adSSong Gao     uint64_t l, h1, h2;
394cd1c49adSSong Gao     VReg *Vd = (VReg *)vd;
395cd1c49adSSong Gao     VReg *Vj = (VReg *)vj;
396cd1c49adSSong Gao     VReg *Vk = (VReg *)vk;
397cd1c49adSSong Gao 
398cd1c49adSSong Gao     muls64(&l, &h1, Vj->D(0), Vk->D(0));
399cd1c49adSSong Gao     muls64(&l, &h2, Vj->D(1), Vk->D(1));
400cd1c49adSSong Gao 
401cd1c49adSSong Gao     Vd->D(0) = h1;
402cd1c49adSSong Gao     Vd->D(1) = h2;
403cd1c49adSSong Gao }
404cd1c49adSSong Gao 
405cd1c49adSSong Gao DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
406cd1c49adSSong Gao DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
407cd1c49adSSong Gao DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
408cd1c49adSSong Gao 
409cd1c49adSSong Gao void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v)
410cd1c49adSSong Gao {
411cd1c49adSSong Gao     uint64_t l, h1, h2;
412cd1c49adSSong Gao     VReg *Vd = (VReg *)vd;
413cd1c49adSSong Gao     VReg *Vj = (VReg *)vj;
414cd1c49adSSong Gao     VReg *Vk = (VReg *)vk;
415cd1c49adSSong Gao 
416cd1c49adSSong Gao     mulu64(&l, &h1, Vj->D(0), Vk->D(0));
417cd1c49adSSong Gao     mulu64(&l, &h2, Vj->D(1), Vk->D(1));
418cd1c49adSSong Gao 
419cd1c49adSSong Gao     Vd->D(0) = h1;
420cd1c49adSSong Gao     Vd->D(1) = h2;
421cd1c49adSSong Gao }
422cd1c49adSSong Gao 
423cd1c49adSSong Gao DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
424cd1c49adSSong Gao DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
425cd1c49adSSong Gao DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
426cd1c49adSSong Gao 
427cd1c49adSSong Gao #define DO_MUL(a, b) (a * b)
428cd1c49adSSong Gao 
429cd1c49adSSong Gao DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
430cd1c49adSSong Gao DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
431cd1c49adSSong Gao DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
432cd1c49adSSong Gao 
433cd1c49adSSong Gao DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
434cd1c49adSSong Gao DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
435cd1c49adSSong Gao DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
436cd1c49adSSong Gao 
437cd1c49adSSong Gao DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
438cd1c49adSSong Gao DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
439cd1c49adSSong Gao DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
440cd1c49adSSong Gao 
441cd1c49adSSong Gao DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
442cd1c49adSSong Gao DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
443cd1c49adSSong Gao DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
444cd1c49adSSong Gao 
445cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
446cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
447cd1c49adSSong Gao DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
448cd1c49adSSong Gao 
449cd1c49adSSong Gao DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
450cd1c49adSSong Gao DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
451cd1c49adSSong Gao DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
452d3aec65bSSong Gao 
453d3aec65bSSong Gao #define DO_MADD(a, b, c)  (a + b * c)
454d3aec65bSSong Gao #define DO_MSUB(a, b, c)  (a - b * c)
455d3aec65bSSong Gao 
456d3aec65bSSong Gao #define VMADDSUB(NAME, BIT, E, DO_OP)                       \
457d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
458d3aec65bSSong Gao {                                                           \
459d3aec65bSSong Gao     int i;                                                  \
460d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                  \
461d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                  \
462d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                  \
463d3aec65bSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
464d3aec65bSSong Gao         Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i));     \
465d3aec65bSSong Gao     }                                                       \
466d3aec65bSSong Gao }
467d3aec65bSSong Gao 
468d3aec65bSSong Gao VMADDSUB(vmadd_b, 8, B, DO_MADD)
469d3aec65bSSong Gao VMADDSUB(vmadd_h, 16, H, DO_MADD)
470d3aec65bSSong Gao VMADDSUB(vmadd_w, 32, W, DO_MADD)
471d3aec65bSSong Gao VMADDSUB(vmadd_d, 64, D, DO_MADD)
472d3aec65bSSong Gao VMADDSUB(vmsub_b, 8, B, DO_MSUB)
473d3aec65bSSong Gao VMADDSUB(vmsub_h, 16, H, DO_MSUB)
474d3aec65bSSong Gao VMADDSUB(vmsub_w, 32, W, DO_MSUB)
475d3aec65bSSong Gao VMADDSUB(vmsub_d, 64, D, DO_MSUB)
476d3aec65bSSong Gao 
477d3aec65bSSong Gao #define VMADDWEV(NAME, BIT, E1, E2, DO_OP)                        \
478d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v)       \
479d3aec65bSSong Gao {                                                                 \
480d3aec65bSSong Gao     int i;                                                        \
481d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                        \
482d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                        \
483d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                        \
484d3aec65bSSong Gao     typedef __typeof(Vd->E1(0)) TD;                               \
485d3aec65bSSong Gao                                                                   \
486d3aec65bSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                           \
487d3aec65bSSong Gao         Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
488d3aec65bSSong Gao     }                                                             \
489d3aec65bSSong Gao }
490d3aec65bSSong Gao 
491d3aec65bSSong Gao VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
492d3aec65bSSong Gao VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
493d3aec65bSSong Gao VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
494d3aec65bSSong Gao VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
495d3aec65bSSong Gao VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
496d3aec65bSSong Gao VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
497d3aec65bSSong Gao 
498d3aec65bSSong Gao #define VMADDWOD(NAME, BIT, E1, E2, DO_OP)                  \
499d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
500d3aec65bSSong Gao {                                                           \
501d3aec65bSSong Gao     int i;                                                  \
502d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                  \
503d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                  \
504d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                  \
505d3aec65bSSong Gao     typedef __typeof(Vd->E1(0)) TD;                         \
506d3aec65bSSong Gao                                                             \
507d3aec65bSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
508d3aec65bSSong Gao         Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1),           \
509d3aec65bSSong Gao                            (TD)Vk->E2(2 * i + 1));          \
510d3aec65bSSong Gao     }                                                       \
511d3aec65bSSong Gao }
512d3aec65bSSong Gao 
513d3aec65bSSong Gao VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
514d3aec65bSSong Gao VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
515d3aec65bSSong Gao VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
516d3aec65bSSong Gao VMADDWOD(vmaddwod_h_bu, 16,  UH, UB, DO_MUL)
517d3aec65bSSong Gao VMADDWOD(vmaddwod_w_hu, 32,  UW, UH, DO_MUL)
518d3aec65bSSong Gao VMADDWOD(vmaddwod_d_wu, 64,  UD, UW, DO_MUL)
519d3aec65bSSong Gao 
520d3aec65bSSong Gao #define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)  \
521d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
522d3aec65bSSong Gao {                                                           \
523d3aec65bSSong Gao     int i;                                                  \
524d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                  \
525d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                  \
526d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                  \
527d3aec65bSSong Gao     typedef __typeof(Vd->ES1(0)) TS1;                       \
528d3aec65bSSong Gao     typedef __typeof(Vd->EU1(0)) TU1;                       \
529d3aec65bSSong Gao                                                             \
530d3aec65bSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
531d3aec65bSSong Gao         Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i),            \
532d3aec65bSSong Gao                             (TS1)Vk->ES2(2 * i));           \
533d3aec65bSSong Gao     }                                                       \
534d3aec65bSSong Gao }
535d3aec65bSSong Gao 
536d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
537d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
538d3aec65bSSong Gao VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
539d3aec65bSSong Gao 
540d3aec65bSSong Gao #define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP)  \
541d3aec65bSSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
542d3aec65bSSong Gao {                                                           \
543d3aec65bSSong Gao     int i;                                                  \
544d3aec65bSSong Gao     VReg *Vd = (VReg *)vd;                                  \
545d3aec65bSSong Gao     VReg *Vj = (VReg *)vj;                                  \
546d3aec65bSSong Gao     VReg *Vk = (VReg *)vk;                                  \
547d3aec65bSSong Gao     typedef __typeof(Vd->ES1(0)) TS1;                       \
548d3aec65bSSong Gao     typedef __typeof(Vd->EU1(0)) TU1;                       \
549d3aec65bSSong Gao                                                             \
550d3aec65bSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
551d3aec65bSSong Gao         Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1),         \
552d3aec65bSSong Gao                             (TS1)Vk->ES2(2 * i + 1));        \
553d3aec65bSSong Gao     }                                                       \
554d3aec65bSSong Gao }
555d3aec65bSSong Gao 
556d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
557d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
558d3aec65bSSong Gao VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
5594cc4c0f7SSong Gao 
5604cc4c0f7SSong Gao #define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
5614cc4c0f7SSong Gao #define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
5624cc4c0f7SSong Gao #define DO_DIV(N, M)  (unlikely(M == 0) ? 0 :\
5634cc4c0f7SSong Gao         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
5644cc4c0f7SSong Gao #define DO_REM(N, M)  (unlikely(M == 0) ? 0 :\
5654cc4c0f7SSong Gao         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
5664cc4c0f7SSong Gao 
5674cc4c0f7SSong Gao #define VDIV(NAME, BIT, E, DO_OP)                           \
5684cc4c0f7SSong Gao void HELPER(NAME)(CPULoongArchState *env,                   \
5694cc4c0f7SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)    \
5704cc4c0f7SSong Gao {                                                           \
5714cc4c0f7SSong Gao     int i;                                                  \
5724cc4c0f7SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                        \
5734cc4c0f7SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                        \
5744cc4c0f7SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                        \
5754cc4c0f7SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
5764cc4c0f7SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i));               \
5774cc4c0f7SSong Gao     }                                                       \
5784cc4c0f7SSong Gao }
5794cc4c0f7SSong Gao 
5804cc4c0f7SSong Gao VDIV(vdiv_b, 8, B, DO_DIV)
5814cc4c0f7SSong Gao VDIV(vdiv_h, 16, H, DO_DIV)
5824cc4c0f7SSong Gao VDIV(vdiv_w, 32, W, DO_DIV)
5834cc4c0f7SSong Gao VDIV(vdiv_d, 64, D, DO_DIV)
5844cc4c0f7SSong Gao VDIV(vdiv_bu, 8, UB, DO_DIVU)
5854cc4c0f7SSong Gao VDIV(vdiv_hu, 16, UH, DO_DIVU)
5864cc4c0f7SSong Gao VDIV(vdiv_wu, 32, UW, DO_DIVU)
5874cc4c0f7SSong Gao VDIV(vdiv_du, 64, UD, DO_DIVU)
5884cc4c0f7SSong Gao VDIV(vmod_b, 8, B, DO_REM)
5894cc4c0f7SSong Gao VDIV(vmod_h, 16, H, DO_REM)
5904cc4c0f7SSong Gao VDIV(vmod_w, 32, W, DO_REM)
5914cc4c0f7SSong Gao VDIV(vmod_d, 64, D, DO_REM)
5924cc4c0f7SSong Gao VDIV(vmod_bu, 8, UB, DO_REMU)
5934cc4c0f7SSong Gao VDIV(vmod_hu, 16, UH, DO_REMU)
5944cc4c0f7SSong Gao VDIV(vmod_wu, 32, UW, DO_REMU)
5954cc4c0f7SSong Gao VDIV(vmod_du, 64, UD, DO_REMU)
596cbe44190SSong Gao 
597cbe44190SSong Gao #define VSAT_S(NAME, BIT, E)                                    \
598cbe44190SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
599cbe44190SSong Gao {                                                               \
600cbe44190SSong Gao     int i;                                                      \
601cbe44190SSong Gao     VReg *Vd = (VReg *)vd;                                      \
602cbe44190SSong Gao     VReg *Vj = (VReg *)vj;                                      \
603cbe44190SSong Gao     typedef __typeof(Vd->E(0)) TD;                              \
604cbe44190SSong Gao                                                                 \
605cbe44190SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
606cbe44190SSong Gao         Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max :               \
607cbe44190SSong Gao                    Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i);    \
608cbe44190SSong Gao     }                                                           \
609cbe44190SSong Gao }
610cbe44190SSong Gao 
611cbe44190SSong Gao VSAT_S(vsat_b, 8, B)
612cbe44190SSong Gao VSAT_S(vsat_h, 16, H)
613cbe44190SSong Gao VSAT_S(vsat_w, 32, W)
614cbe44190SSong Gao VSAT_S(vsat_d, 64, D)
615cbe44190SSong Gao 
616cbe44190SSong Gao #define VSAT_U(NAME, BIT, E)                                    \
617cbe44190SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
618cbe44190SSong Gao {                                                               \
619cbe44190SSong Gao     int i;                                                      \
620cbe44190SSong Gao     VReg *Vd = (VReg *)vd;                                      \
621cbe44190SSong Gao     VReg *Vj = (VReg *)vj;                                      \
622cbe44190SSong Gao     typedef __typeof(Vd->E(0)) TD;                              \
623cbe44190SSong Gao                                                                 \
624cbe44190SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
625cbe44190SSong Gao         Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i);     \
626cbe44190SSong Gao     }                                                           \
627cbe44190SSong Gao }
628cbe44190SSong Gao 
629cbe44190SSong Gao VSAT_U(vsat_bu, 8, UB)
630cbe44190SSong Gao VSAT_U(vsat_hu, 16, UH)
631cbe44190SSong Gao VSAT_U(vsat_wu, 32, UW)
632cbe44190SSong Gao VSAT_U(vsat_du, 64, UD)
6333734ad93SSong Gao 
6343734ad93SSong Gao #define VEXTH(NAME, BIT, E1, E2)                                    \
6353734ad93SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
6363734ad93SSong Gao {                                                                   \
6373734ad93SSong Gao     int i;                                                          \
6383734ad93SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
6393734ad93SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
6403734ad93SSong Gao                                                                     \
6413734ad93SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
6423734ad93SSong Gao         Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT);                        \
6433734ad93SSong Gao     }                                                               \
6443734ad93SSong Gao }
6453734ad93SSong Gao 
6463734ad93SSong Gao void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
6473734ad93SSong Gao {
6483734ad93SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
6493734ad93SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
6503734ad93SSong Gao 
6513734ad93SSong Gao     Vd->Q(0) = int128_makes64(Vj->D(1));
6523734ad93SSong Gao }
6533734ad93SSong Gao 
6543734ad93SSong Gao void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
6553734ad93SSong Gao {
6563734ad93SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
6573734ad93SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
6583734ad93SSong Gao 
6593734ad93SSong Gao     Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
6603734ad93SSong Gao }
6613734ad93SSong Gao 
6623734ad93SSong Gao VEXTH(vexth_h_b, 16, H, B)
6633734ad93SSong Gao VEXTH(vexth_w_h, 32, W, H)
6643734ad93SSong Gao VEXTH(vexth_d_w, 64, D, W)
6653734ad93SSong Gao VEXTH(vexth_hu_bu, 16, UH, UB)
6663734ad93SSong Gao VEXTH(vexth_wu_hu, 32, UW, UH)
6673734ad93SSong Gao VEXTH(vexth_du_wu, 64, UD, UW)
668f0e395dfSSong Gao 
669f0e395dfSSong Gao #define DO_SIGNCOV(a, b)  (a == 0 ? 0 : a < 0 ? -b : b)
670f0e395dfSSong Gao 
671f0e395dfSSong Gao DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
672f0e395dfSSong Gao DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
673f0e395dfSSong Gao DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
674f0e395dfSSong Gao DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
675789f4a4cSSong Gao 
676789f4a4cSSong Gao static uint64_t do_vmskltz_b(int64_t val)
677789f4a4cSSong Gao {
678789f4a4cSSong Gao     uint64_t m = 0x8080808080808080ULL;
679789f4a4cSSong Gao     uint64_t c =  val & m;
680789f4a4cSSong Gao     c |= c << 7;
681789f4a4cSSong Gao     c |= c << 14;
682789f4a4cSSong Gao     c |= c << 28;
683789f4a4cSSong Gao     return c >> 56;
684789f4a4cSSong Gao }
685789f4a4cSSong Gao 
686789f4a4cSSong Gao void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
687789f4a4cSSong Gao {
688789f4a4cSSong Gao     uint16_t temp = 0;
689789f4a4cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
690789f4a4cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
691789f4a4cSSong Gao 
692789f4a4cSSong Gao     temp = do_vmskltz_b(Vj->D(0));
693789f4a4cSSong Gao     temp |= (do_vmskltz_b(Vj->D(1)) << 8);
694789f4a4cSSong Gao     Vd->D(0) = temp;
695789f4a4cSSong Gao     Vd->D(1) = 0;
696789f4a4cSSong Gao }
697789f4a4cSSong Gao 
698789f4a4cSSong Gao static uint64_t do_vmskltz_h(int64_t val)
699789f4a4cSSong Gao {
700789f4a4cSSong Gao     uint64_t m = 0x8000800080008000ULL;
701789f4a4cSSong Gao     uint64_t c =  val & m;
702789f4a4cSSong Gao     c |= c << 15;
703789f4a4cSSong Gao     c |= c << 30;
704789f4a4cSSong Gao     return c >> 60;
705789f4a4cSSong Gao }
706789f4a4cSSong Gao 
707789f4a4cSSong Gao void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
708789f4a4cSSong Gao {
709789f4a4cSSong Gao     uint16_t temp = 0;
710789f4a4cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
711789f4a4cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
712789f4a4cSSong Gao 
713789f4a4cSSong Gao     temp = do_vmskltz_h(Vj->D(0));
714789f4a4cSSong Gao     temp |= (do_vmskltz_h(Vj->D(1)) << 4);
715789f4a4cSSong Gao     Vd->D(0) = temp;
716789f4a4cSSong Gao     Vd->D(1) = 0;
717789f4a4cSSong Gao }
718789f4a4cSSong Gao 
719789f4a4cSSong Gao static uint64_t do_vmskltz_w(int64_t val)
720789f4a4cSSong Gao {
721789f4a4cSSong Gao     uint64_t m = 0x8000000080000000ULL;
722789f4a4cSSong Gao     uint64_t c =  val & m;
723789f4a4cSSong Gao     c |= c << 31;
724789f4a4cSSong Gao     return c >> 62;
725789f4a4cSSong Gao }
726789f4a4cSSong Gao 
727789f4a4cSSong Gao void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
728789f4a4cSSong Gao {
729789f4a4cSSong Gao     uint16_t temp = 0;
730789f4a4cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
731789f4a4cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
732789f4a4cSSong Gao 
733789f4a4cSSong Gao     temp = do_vmskltz_w(Vj->D(0));
734789f4a4cSSong Gao     temp |= (do_vmskltz_w(Vj->D(1)) << 2);
735789f4a4cSSong Gao     Vd->D(0) = temp;
736789f4a4cSSong Gao     Vd->D(1) = 0;
737789f4a4cSSong Gao }
738789f4a4cSSong Gao 
739789f4a4cSSong Gao static uint64_t do_vmskltz_d(int64_t val)
740789f4a4cSSong Gao {
741789f4a4cSSong Gao     return (uint64_t)val >> 63;
742789f4a4cSSong Gao }
743789f4a4cSSong Gao void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
744789f4a4cSSong Gao {
745789f4a4cSSong Gao     uint16_t temp = 0;
746789f4a4cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
747789f4a4cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
748789f4a4cSSong Gao 
749789f4a4cSSong Gao     temp = do_vmskltz_d(Vj->D(0));
750789f4a4cSSong Gao     temp |= (do_vmskltz_d(Vj->D(1)) << 1);
751789f4a4cSSong Gao     Vd->D(0) = temp;
752789f4a4cSSong Gao     Vd->D(1) = 0;
753789f4a4cSSong Gao }
754789f4a4cSSong Gao 
755789f4a4cSSong Gao void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
756789f4a4cSSong Gao {
757789f4a4cSSong Gao     uint16_t temp = 0;
758789f4a4cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
759789f4a4cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
760789f4a4cSSong Gao 
761789f4a4cSSong Gao     temp =  do_vmskltz_b(Vj->D(0));
762789f4a4cSSong Gao     temp |= (do_vmskltz_b(Vj->D(1)) << 8);
763789f4a4cSSong Gao     Vd->D(0) = (uint16_t)(~temp);
764789f4a4cSSong Gao     Vd->D(1) = 0;
765789f4a4cSSong Gao }
766789f4a4cSSong Gao 
767789f4a4cSSong Gao static uint64_t do_vmskez_b(uint64_t a)
768789f4a4cSSong Gao {
769789f4a4cSSong Gao     uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
770789f4a4cSSong Gao     uint64_t c = ~(((a & m) + m) | a | m);
771789f4a4cSSong Gao     c |= c << 7;
772789f4a4cSSong Gao     c |= c << 14;
773789f4a4cSSong Gao     c |= c << 28;
774789f4a4cSSong Gao     return c >> 56;
775789f4a4cSSong Gao }
776789f4a4cSSong Gao 
777789f4a4cSSong Gao void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
778789f4a4cSSong Gao {
779789f4a4cSSong Gao     uint16_t temp = 0;
780789f4a4cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
781789f4a4cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
782789f4a4cSSong Gao 
783789f4a4cSSong Gao     temp = do_vmskez_b(Vj->D(0));
784789f4a4cSSong Gao     temp |= (do_vmskez_b(Vj->D(1)) << 8);
785789f4a4cSSong Gao     Vd->D(0) = (uint16_t)(~temp);
786789f4a4cSSong Gao     Vd->D(1) = 0;
787789f4a4cSSong Gao }
788f205a539SSong Gao 
789f205a539SSong Gao void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
790f205a539SSong Gao {
791f205a539SSong Gao     int i;
792f205a539SSong Gao     VReg *Vd = (VReg *)vd;
793f205a539SSong Gao     VReg *Vj = (VReg *)vj;
794f205a539SSong Gao 
795f205a539SSong Gao     for (i = 0; i < LSX_LEN/8; i++) {
796f205a539SSong Gao         Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
797f205a539SSong Gao     }
798f205a539SSong Gao }
7999b21a7a5SSong Gao 
8009b21a7a5SSong Gao #define VSLLWIL(NAME, BIT, E1, E2)                        \
8019b21a7a5SSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
8029b21a7a5SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
8039b21a7a5SSong Gao {                                                         \
8049b21a7a5SSong Gao     int i;                                                \
8059b21a7a5SSong Gao     VReg temp;                                            \
8069b21a7a5SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
8079b21a7a5SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
8089b21a7a5SSong Gao     typedef __typeof(temp.E1(0)) TD;                      \
8099b21a7a5SSong Gao                                                           \
8109b21a7a5SSong Gao     temp.D(0) = 0;                                        \
8119b21a7a5SSong Gao     temp.D(1) = 0;                                        \
8129b21a7a5SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                   \
8139b21a7a5SSong Gao         temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT);        \
8149b21a7a5SSong Gao     }                                                     \
8159b21a7a5SSong Gao     *Vd = temp;                                           \
8169b21a7a5SSong Gao }
8179b21a7a5SSong Gao 
8189b21a7a5SSong Gao void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
8199b21a7a5SSong Gao {
8209b21a7a5SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
8219b21a7a5SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
8229b21a7a5SSong Gao 
8239b21a7a5SSong Gao     Vd->Q(0) = int128_makes64(Vj->D(0));
8249b21a7a5SSong Gao }
8259b21a7a5SSong Gao 
8269b21a7a5SSong Gao void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
8279b21a7a5SSong Gao {
8289b21a7a5SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
8299b21a7a5SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
8309b21a7a5SSong Gao 
8319b21a7a5SSong Gao     Vd->Q(0) = int128_make64(Vj->D(0));
8329b21a7a5SSong Gao }
8339b21a7a5SSong Gao 
8349b21a7a5SSong Gao VSLLWIL(vsllwil_h_b, 16, H, B)
8359b21a7a5SSong Gao VSLLWIL(vsllwil_w_h, 32, W, H)
8369b21a7a5SSong Gao VSLLWIL(vsllwil_d_w, 64, D, W)
8379b21a7a5SSong Gao VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
8389b21a7a5SSong Gao VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
8399b21a7a5SSong Gao VSLLWIL(vsllwil_du_wu, 64, UD, UW)
840ecb93716SSong Gao 
841ecb93716SSong Gao #define do_vsrlr(E, T)                                  \
842ecb93716SSong Gao static T do_vsrlr_ ##E(T s1, int sh)                    \
843ecb93716SSong Gao {                                                       \
844ecb93716SSong Gao     if (sh == 0) {                                      \
845ecb93716SSong Gao         return s1;                                      \
846ecb93716SSong Gao     } else {                                            \
847ecb93716SSong Gao         return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
848ecb93716SSong Gao     }                                                   \
849ecb93716SSong Gao }
850ecb93716SSong Gao 
851ecb93716SSong Gao do_vsrlr(B, uint8_t)
852ecb93716SSong Gao do_vsrlr(H, uint16_t)
853ecb93716SSong Gao do_vsrlr(W, uint32_t)
854ecb93716SSong Gao do_vsrlr(D, uint64_t)
855ecb93716SSong Gao 
856ecb93716SSong Gao #define VSRLR(NAME, BIT, T, E)                                  \
857ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env,                       \
858ecb93716SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)        \
859ecb93716SSong Gao {                                                               \
860ecb93716SSong Gao     int i;                                                      \
861ecb93716SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                            \
862ecb93716SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                            \
863ecb93716SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                            \
864ecb93716SSong Gao                                                                 \
865ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
866ecb93716SSong Gao         Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
867ecb93716SSong Gao     }                                                           \
868ecb93716SSong Gao }
869ecb93716SSong Gao 
870ecb93716SSong Gao VSRLR(vsrlr_b, 8,  uint8_t, B)
871ecb93716SSong Gao VSRLR(vsrlr_h, 16, uint16_t, H)
872ecb93716SSong Gao VSRLR(vsrlr_w, 32, uint32_t, W)
873ecb93716SSong Gao VSRLR(vsrlr_d, 64, uint64_t, D)
874ecb93716SSong Gao 
875ecb93716SSong Gao #define VSRLRI(NAME, BIT, E)                              \
876ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
877ecb93716SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
878ecb93716SSong Gao {                                                         \
879ecb93716SSong Gao     int i;                                                \
880ecb93716SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
881ecb93716SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
882ecb93716SSong Gao                                                           \
883ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                   \
884ecb93716SSong Gao         Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm);         \
885ecb93716SSong Gao     }                                                     \
886ecb93716SSong Gao }
887ecb93716SSong Gao 
888ecb93716SSong Gao VSRLRI(vsrlri_b, 8, B)
889ecb93716SSong Gao VSRLRI(vsrlri_h, 16, H)
890ecb93716SSong Gao VSRLRI(vsrlri_w, 32, W)
891ecb93716SSong Gao VSRLRI(vsrlri_d, 64, D)
892ecb93716SSong Gao 
893ecb93716SSong Gao #define do_vsrar(E, T)                                  \
894ecb93716SSong Gao static T do_vsrar_ ##E(T s1, int sh)                    \
895ecb93716SSong Gao {                                                       \
896ecb93716SSong Gao     if (sh == 0) {                                      \
897ecb93716SSong Gao         return s1;                                      \
898ecb93716SSong Gao     } else {                                            \
899ecb93716SSong Gao         return  (s1 >> sh)  + ((s1 >> (sh - 1)) & 0x1); \
900ecb93716SSong Gao     }                                                   \
901ecb93716SSong Gao }
902ecb93716SSong Gao 
903ecb93716SSong Gao do_vsrar(B, int8_t)
904ecb93716SSong Gao do_vsrar(H, int16_t)
905ecb93716SSong Gao do_vsrar(W, int32_t)
906ecb93716SSong Gao do_vsrar(D, int64_t)
907ecb93716SSong Gao 
908ecb93716SSong Gao #define VSRAR(NAME, BIT, T, E)                                  \
909ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env,                       \
910ecb93716SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)        \
911ecb93716SSong Gao {                                                               \
912ecb93716SSong Gao     int i;                                                      \
913ecb93716SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                            \
914ecb93716SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                            \
915ecb93716SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                            \
916ecb93716SSong Gao                                                                 \
917ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
918ecb93716SSong Gao         Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
919ecb93716SSong Gao     }                                                           \
920ecb93716SSong Gao }
921ecb93716SSong Gao 
922ecb93716SSong Gao VSRAR(vsrar_b, 8,  uint8_t, B)
923ecb93716SSong Gao VSRAR(vsrar_h, 16, uint16_t, H)
924ecb93716SSong Gao VSRAR(vsrar_w, 32, uint32_t, W)
925ecb93716SSong Gao VSRAR(vsrar_d, 64, uint64_t, D)
926ecb93716SSong Gao 
927ecb93716SSong Gao #define VSRARI(NAME, BIT, E)                              \
928ecb93716SSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
929ecb93716SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
930ecb93716SSong Gao {                                                         \
931ecb93716SSong Gao     int i;                                                \
932ecb93716SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
933ecb93716SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
934ecb93716SSong Gao                                                           \
935ecb93716SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                   \
936ecb93716SSong Gao         Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm);         \
937ecb93716SSong Gao     }                                                     \
938ecb93716SSong Gao }
939ecb93716SSong Gao 
940ecb93716SSong Gao VSRARI(vsrari_b, 8, B)
941ecb93716SSong Gao VSRARI(vsrari_h, 16, H)
942ecb93716SSong Gao VSRARI(vsrari_w, 32, W)
943ecb93716SSong Gao VSRARI(vsrari_d, 64, D)
944d79fb8ddSSong Gao 
945d79fb8ddSSong Gao #define R_SHIFT(a, b) (a >> b)
946d79fb8ddSSong Gao 
947d79fb8ddSSong Gao #define VSRLN(NAME, BIT, T, E1, E2)                             \
948d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env,                       \
949d79fb8ddSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)        \
950d79fb8ddSSong Gao {                                                               \
951d79fb8ddSSong Gao     int i;                                                      \
952d79fb8ddSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                            \
953d79fb8ddSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                            \
954d79fb8ddSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                            \
955d79fb8ddSSong Gao                                                                 \
956d79fb8ddSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
957d79fb8ddSSong Gao         Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
958d79fb8ddSSong Gao     }                                                           \
959d79fb8ddSSong Gao     Vd->D(1) = 0;                                               \
960d79fb8ddSSong Gao }
961d79fb8ddSSong Gao 
962d79fb8ddSSong Gao VSRLN(vsrln_b_h, 16, uint16_t, B, H)
963d79fb8ddSSong Gao VSRLN(vsrln_h_w, 32, uint32_t, H, W)
964d79fb8ddSSong Gao VSRLN(vsrln_w_d, 64, uint64_t, W, D)
965d79fb8ddSSong Gao 
966d79fb8ddSSong Gao #define VSRAN(NAME, BIT, T, E1, E2)                           \
967d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env,                     \
968d79fb8ddSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)      \
969d79fb8ddSSong Gao {                                                             \
970d79fb8ddSSong Gao     int i;                                                    \
971d79fb8ddSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                          \
972d79fb8ddSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                          \
973d79fb8ddSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                          \
974d79fb8ddSSong Gao                                                               \
975d79fb8ddSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                       \
976d79fb8ddSSong Gao         Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
977d79fb8ddSSong Gao     }                                                         \
978d79fb8ddSSong Gao     Vd->D(1) = 0;                                             \
979d79fb8ddSSong Gao }
980d79fb8ddSSong Gao 
981d79fb8ddSSong Gao VSRAN(vsran_b_h, 16, uint16_t, B, H)
982d79fb8ddSSong Gao VSRAN(vsran_h_w, 32, uint32_t, H, W)
983d79fb8ddSSong Gao VSRAN(vsran_w_d, 64, uint64_t, W, D)
984d79fb8ddSSong Gao 
985d79fb8ddSSong Gao #define VSRLNI(NAME, BIT, T, E1, E2)                         \
986d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env,                    \
987d79fb8ddSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)    \
988d79fb8ddSSong Gao {                                                            \
989d79fb8ddSSong Gao     int i, max;                                              \
990d79fb8ddSSong Gao     VReg temp;                                               \
991d79fb8ddSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                         \
992d79fb8ddSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                         \
993d79fb8ddSSong Gao                                                              \
994d79fb8ddSSong Gao     temp.D(0) = 0;                                           \
995d79fb8ddSSong Gao     temp.D(1) = 0;                                           \
996d79fb8ddSSong Gao     max = LSX_LEN/BIT;                                       \
997d79fb8ddSSong Gao     for (i = 0; i < max; i++) {                              \
998d79fb8ddSSong Gao         temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm);             \
999d79fb8ddSSong Gao         temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm);       \
1000d79fb8ddSSong Gao     }                                                        \
1001d79fb8ddSSong Gao     *Vd = temp;                                              \
1002d79fb8ddSSong Gao }
1003d79fb8ddSSong Gao 
1004d79fb8ddSSong Gao void HELPER(vsrlni_d_q)(CPULoongArchState *env,
1005d79fb8ddSSong Gao                         uint32_t vd, uint32_t vj, uint32_t imm)
1006d79fb8ddSSong Gao {
1007d79fb8ddSSong Gao     VReg temp;
1008d79fb8ddSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
1009d79fb8ddSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
1010d79fb8ddSSong Gao 
1011d79fb8ddSSong Gao     temp.D(0) = 0;
1012d79fb8ddSSong Gao     temp.D(1) = 0;
1013d79fb8ddSSong Gao     temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128));
1014d79fb8ddSSong Gao     temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128));
1015d79fb8ddSSong Gao     *Vd = temp;
1016d79fb8ddSSong Gao }
1017d79fb8ddSSong Gao 
1018d79fb8ddSSong Gao VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
1019d79fb8ddSSong Gao VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
1020d79fb8ddSSong Gao VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
1021d79fb8ddSSong Gao 
1022d79fb8ddSSong Gao #define VSRANI(NAME, BIT, E1, E2)                         \
1023d79fb8ddSSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
1024d79fb8ddSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
1025d79fb8ddSSong Gao {                                                         \
1026d79fb8ddSSong Gao     int i, max;                                           \
1027d79fb8ddSSong Gao     VReg temp;                                            \
1028d79fb8ddSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
1029d79fb8ddSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
1030d79fb8ddSSong Gao                                                           \
1031d79fb8ddSSong Gao     temp.D(0) = 0;                                        \
1032d79fb8ddSSong Gao     temp.D(1) = 0;                                        \
1033d79fb8ddSSong Gao     max = LSX_LEN/BIT;                                    \
1034d79fb8ddSSong Gao     for (i = 0; i < max; i++) {                           \
1035d79fb8ddSSong Gao         temp.E1(i) = R_SHIFT(Vj->E2(i), imm);             \
1036d79fb8ddSSong Gao         temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm);       \
1037d79fb8ddSSong Gao     }                                                     \
1038d79fb8ddSSong Gao     *Vd = temp;                                           \
1039d79fb8ddSSong Gao }
1040d79fb8ddSSong Gao 
1041d79fb8ddSSong Gao void HELPER(vsrani_d_q)(CPULoongArchState *env,
1042d79fb8ddSSong Gao                         uint32_t vd, uint32_t vj, uint32_t imm)
1043d79fb8ddSSong Gao {
1044d79fb8ddSSong Gao     VReg temp;
1045d79fb8ddSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
1046d79fb8ddSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
1047d79fb8ddSSong Gao 
1048d79fb8ddSSong Gao     temp.D(0) = 0;
1049d79fb8ddSSong Gao     temp.D(1) = 0;
1050d79fb8ddSSong Gao     temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128));
1051d79fb8ddSSong Gao     temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128));
1052d79fb8ddSSong Gao     *Vd = temp;
1053d79fb8ddSSong Gao }
1054d79fb8ddSSong Gao 
1055d79fb8ddSSong Gao VSRANI(vsrani_b_h, 16, B, H)
1056d79fb8ddSSong Gao VSRANI(vsrani_h_w, 32, H, W)
1057d79fb8ddSSong Gao VSRANI(vsrani_w_d, 64, W, D)
1058a5200a17SSong Gao 
1059a5200a17SSong Gao #define VSRLRN(NAME, BIT, T, E1, E2)                                \
1060a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env,                           \
1061a5200a17SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)            \
1062a5200a17SSong Gao {                                                                   \
1063a5200a17SSong Gao     int i;                                                          \
1064a5200a17SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
1065a5200a17SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
1066a5200a17SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                \
1067a5200a17SSong Gao                                                                     \
1068a5200a17SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
1069a5200a17SSong Gao         Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
1070a5200a17SSong Gao     }                                                               \
1071a5200a17SSong Gao     Vd->D(1) = 0;                                                   \
1072a5200a17SSong Gao }
1073a5200a17SSong Gao 
1074a5200a17SSong Gao VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H)
1075a5200a17SSong Gao VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
1076a5200a17SSong Gao VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
1077a5200a17SSong Gao 
1078a5200a17SSong Gao #define VSRARN(NAME, BIT, T, E1, E2)                                \
1079a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env,                           \
1080a5200a17SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)            \
1081a5200a17SSong Gao {                                                                   \
1082a5200a17SSong Gao     int i;                                                          \
1083a5200a17SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
1084a5200a17SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
1085a5200a17SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                \
1086a5200a17SSong Gao                                                                     \
1087a5200a17SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
1088a5200a17SSong Gao         Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
1089a5200a17SSong Gao     }                                                               \
1090a5200a17SSong Gao     Vd->D(1) = 0;                                                   \
1091a5200a17SSong Gao }
1092a5200a17SSong Gao 
1093a5200a17SSong Gao VSRARN(vsrarn_b_h, 16, uint8_t,  B, H)
1094a5200a17SSong Gao VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
1095a5200a17SSong Gao VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
1096a5200a17SSong Gao 
1097a5200a17SSong Gao #define VSRLRNI(NAME, BIT, E1, E2)                          \
1098a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env,                   \
1099a5200a17SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)   \
1100a5200a17SSong Gao {                                                           \
1101a5200a17SSong Gao     int i, max;                                             \
1102a5200a17SSong Gao     VReg temp;                                              \
1103a5200a17SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                        \
1104a5200a17SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                        \
1105a5200a17SSong Gao                                                             \
1106a5200a17SSong Gao     temp.D(0) = 0;                                          \
1107a5200a17SSong Gao     temp.D(1) = 0;                                          \
1108a5200a17SSong Gao     max = LSX_LEN/BIT;                                      \
1109a5200a17SSong Gao     for (i = 0; i < max; i++) {                             \
1110a5200a17SSong Gao         temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm);       \
1111a5200a17SSong Gao         temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
1112a5200a17SSong Gao     }                                                       \
1113a5200a17SSong Gao     *Vd = temp;                                             \
1114a5200a17SSong Gao }
1115a5200a17SSong Gao 
1116a5200a17SSong Gao void HELPER(vsrlrni_d_q)(CPULoongArchState *env,
1117a5200a17SSong Gao                          uint32_t vd, uint32_t vj, uint32_t imm)
1118a5200a17SSong Gao {
1119a5200a17SSong Gao     VReg temp;
1120a5200a17SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
1121a5200a17SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
1122a5200a17SSong Gao     Int128 r1, r2;
1123a5200a17SSong Gao 
1124a5200a17SSong Gao     if (imm == 0) {
1125a5200a17SSong Gao         temp.D(0) = int128_getlo(Vj->Q(0));
1126a5200a17SSong Gao         temp.D(1) = int128_getlo(Vd->Q(0));
1127a5200a17SSong Gao     } else {
1128a5200a17SSong Gao         r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one());
1129a5200a17SSong Gao         r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one());
1130a5200a17SSong Gao 
1131a5200a17SSong Gao        temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1));
1132a5200a17SSong Gao        temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2));
1133a5200a17SSong Gao     }
1134a5200a17SSong Gao     *Vd = temp;
1135a5200a17SSong Gao }
1136a5200a17SSong Gao 
1137a5200a17SSong Gao VSRLRNI(vsrlrni_b_h, 16, B, H)
1138a5200a17SSong Gao VSRLRNI(vsrlrni_h_w, 32, H, W)
1139a5200a17SSong Gao VSRLRNI(vsrlrni_w_d, 64, W, D)
1140a5200a17SSong Gao 
1141a5200a17SSong Gao #define VSRARNI(NAME, BIT, E1, E2)                          \
1142a5200a17SSong Gao void HELPER(NAME)(CPULoongArchState *env,                   \
1143a5200a17SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)   \
1144a5200a17SSong Gao {                                                           \
1145a5200a17SSong Gao     int i, max;                                             \
1146a5200a17SSong Gao     VReg temp;                                              \
1147a5200a17SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                        \
1148a5200a17SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                        \
1149a5200a17SSong Gao                                                             \
1150a5200a17SSong Gao     temp.D(0) = 0;                                          \
1151a5200a17SSong Gao     temp.D(1) = 0;                                          \
1152a5200a17SSong Gao     max = LSX_LEN/BIT;                                      \
1153a5200a17SSong Gao     for (i = 0; i < max; i++) {                             \
1154a5200a17SSong Gao         temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm);       \
1155a5200a17SSong Gao         temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
1156a5200a17SSong Gao     }                                                       \
1157a5200a17SSong Gao     *Vd = temp;                                             \
1158a5200a17SSong Gao }
1159a5200a17SSong Gao 
1160a5200a17SSong Gao void HELPER(vsrarni_d_q)(CPULoongArchState *env,
1161a5200a17SSong Gao                          uint32_t vd, uint32_t vj, uint32_t imm)
1162a5200a17SSong Gao {
1163a5200a17SSong Gao     VReg temp;
1164a5200a17SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
1165a5200a17SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
1166a5200a17SSong Gao     Int128 r1, r2;
1167a5200a17SSong Gao 
1168a5200a17SSong Gao     if (imm == 0) {
1169a5200a17SSong Gao         temp.D(0) = int128_getlo(Vj->Q(0));
1170a5200a17SSong Gao         temp.D(1) = int128_getlo(Vd->Q(0));
1171a5200a17SSong Gao     } else {
1172a5200a17SSong Gao         r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
1173a5200a17SSong Gao         r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
1174a5200a17SSong Gao 
1175a5200a17SSong Gao        temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1));
1176a5200a17SSong Gao        temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2));
1177a5200a17SSong Gao     }
1178a5200a17SSong Gao     *Vd = temp;
1179a5200a17SSong Gao }
1180a5200a17SSong Gao 
1181a5200a17SSong Gao VSRARNI(vsrarni_b_h, 16, B, H)
1182a5200a17SSong Gao VSRARNI(vsrarni_h_w, 32, H, W)
1183a5200a17SSong Gao VSRARNI(vsrarni_w_d, 64, W, D)
118483b3815dSSong Gao 
118583b3815dSSong Gao #define SSRLNS(NAME, T1, T2, T3)                    \
118683b3815dSSong Gao static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
118783b3815dSSong Gao {                                                   \
118883b3815dSSong Gao         T1 shft_res;                                \
118983b3815dSSong Gao         if (sa == 0) {                              \
119083b3815dSSong Gao             shft_res = e2;                          \
119183b3815dSSong Gao         } else {                                    \
119283b3815dSSong Gao             shft_res = (((T1)e2) >> sa);            \
119383b3815dSSong Gao         }                                           \
119483b3815dSSong Gao         T3 mask;                                    \
119583b3815dSSong Gao         mask = (1ull << sh) -1;                     \
119683b3815dSSong Gao         if (shft_res > mask) {                      \
119783b3815dSSong Gao             return mask;                            \
119883b3815dSSong Gao         } else {                                    \
119983b3815dSSong Gao             return  shft_res;                       \
120083b3815dSSong Gao         }                                           \
120183b3815dSSong Gao }
120283b3815dSSong Gao 
120383b3815dSSong Gao SSRLNS(B, uint16_t, int16_t, uint8_t)
120483b3815dSSong Gao SSRLNS(H, uint32_t, int32_t, uint16_t)
120583b3815dSSong Gao SSRLNS(W, uint64_t, int64_t, uint32_t)
120683b3815dSSong Gao 
120783b3815dSSong Gao #define VSSRLN(NAME, BIT, T, E1, E2)                                          \
120883b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                     \
120983b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                      \
121083b3815dSSong Gao {                                                                             \
121183b3815dSSong Gao     int i;                                                                    \
121283b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                          \
121383b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                          \
121483b3815dSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                          \
121583b3815dSSong Gao                                                                               \
121683b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
121783b3815dSSong Gao         Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
121883b3815dSSong Gao     }                                                                         \
121983b3815dSSong Gao     Vd->D(1) = 0;                                                             \
122083b3815dSSong Gao }
122183b3815dSSong Gao 
122283b3815dSSong Gao VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
122383b3815dSSong Gao VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
122483b3815dSSong Gao VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
122583b3815dSSong Gao 
122683b3815dSSong Gao #define SSRANS(E, T1, T2)                        \
122783b3815dSSong Gao static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
122883b3815dSSong Gao {                                                \
122983b3815dSSong Gao         T1 shft_res;                             \
123083b3815dSSong Gao         if (sa == 0) {                           \
123183b3815dSSong Gao             shft_res = e2;                       \
123283b3815dSSong Gao         } else {                                 \
123383b3815dSSong Gao             shft_res = e2 >> sa;                 \
123483b3815dSSong Gao         }                                        \
123583b3815dSSong Gao         T2 mask;                                 \
123683b3815dSSong Gao         mask = (1ll << sh) -1;                   \
123783b3815dSSong Gao         if (shft_res > mask) {                   \
123883b3815dSSong Gao             return  mask;                        \
123983b3815dSSong Gao         } else if (shft_res < -(mask +1)) {      \
124083b3815dSSong Gao             return  ~mask;                       \
124183b3815dSSong Gao         } else {                                 \
124283b3815dSSong Gao             return shft_res;                     \
124383b3815dSSong Gao         }                                        \
124483b3815dSSong Gao }
124583b3815dSSong Gao 
124683b3815dSSong Gao SSRANS(B, int16_t, int8_t)
124783b3815dSSong Gao SSRANS(H, int32_t, int16_t)
124883b3815dSSong Gao SSRANS(W, int64_t, int32_t)
124983b3815dSSong Gao 
125083b3815dSSong Gao #define VSSRAN(NAME, BIT, T, E1, E2)                                         \
125183b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                    \
125283b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                     \
125383b3815dSSong Gao {                                                                            \
125483b3815dSSong Gao     int i;                                                                   \
125583b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                         \
125683b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                         \
125783b3815dSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                         \
125883b3815dSSong Gao                                                                              \
125983b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
126083b3815dSSong Gao         Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
126183b3815dSSong Gao     }                                                                        \
126283b3815dSSong Gao     Vd->D(1) = 0;                                                            \
126383b3815dSSong Gao }
126483b3815dSSong Gao 
126583b3815dSSong Gao VSSRAN(vssran_b_h, 16, uint16_t, B, H)
126683b3815dSSong Gao VSSRAN(vssran_h_w, 32, uint32_t, H, W)
126783b3815dSSong Gao VSSRAN(vssran_w_d, 64, uint64_t, W, D)
126883b3815dSSong Gao 
126983b3815dSSong Gao #define SSRLNU(E, T1, T2, T3)                    \
127083b3815dSSong Gao static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
127183b3815dSSong Gao {                                                \
127283b3815dSSong Gao         T1 shft_res;                             \
127383b3815dSSong Gao         if (sa == 0) {                           \
127483b3815dSSong Gao             shft_res = e2;                       \
127583b3815dSSong Gao         } else {                                 \
127683b3815dSSong Gao             shft_res = (((T1)e2) >> sa);         \
127783b3815dSSong Gao         }                                        \
127883b3815dSSong Gao         T2 mask;                                 \
127983b3815dSSong Gao         mask = (1ull << sh) -1;                  \
128083b3815dSSong Gao         if (shft_res > mask) {                   \
128183b3815dSSong Gao             return mask;                         \
128283b3815dSSong Gao         } else {                                 \
128383b3815dSSong Gao             return shft_res;                     \
128483b3815dSSong Gao         }                                        \
128583b3815dSSong Gao }
128683b3815dSSong Gao 
128783b3815dSSong Gao SSRLNU(B, uint16_t, uint8_t,  int16_t)
128883b3815dSSong Gao SSRLNU(H, uint32_t, uint16_t, int32_t)
128983b3815dSSong Gao SSRLNU(W, uint64_t, uint32_t, int64_t)
129083b3815dSSong Gao 
129183b3815dSSong Gao #define VSSRLNU(NAME, BIT, T, E1, E2)                                     \
129283b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                 \
129383b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                  \
129483b3815dSSong Gao {                                                                         \
129583b3815dSSong Gao     int i;                                                                \
129683b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                      \
129783b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                      \
129883b3815dSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                      \
129983b3815dSSong Gao                                                                           \
130083b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                   \
130183b3815dSSong Gao         Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
130283b3815dSSong Gao     }                                                                     \
130383b3815dSSong Gao     Vd->D(1) = 0;                                                         \
130483b3815dSSong Gao }
130583b3815dSSong Gao 
130683b3815dSSong Gao VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
130783b3815dSSong Gao VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
130883b3815dSSong Gao VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
130983b3815dSSong Gao 
131083b3815dSSong Gao #define SSRANU(E, T1, T2, T3)                    \
131183b3815dSSong Gao static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
131283b3815dSSong Gao {                                                \
131383b3815dSSong Gao         T1 shft_res;                             \
131483b3815dSSong Gao         if (sa == 0) {                           \
131583b3815dSSong Gao             shft_res = e2;                       \
131683b3815dSSong Gao         } else {                                 \
131783b3815dSSong Gao             shft_res = e2 >> sa;                 \
131883b3815dSSong Gao         }                                        \
131983b3815dSSong Gao         if (e2 < 0) {                            \
132083b3815dSSong Gao             shft_res = 0;                        \
132183b3815dSSong Gao         }                                        \
132283b3815dSSong Gao         T2 mask;                                 \
132383b3815dSSong Gao         mask = (1ull << sh) -1;                  \
132483b3815dSSong Gao         if (shft_res > mask) {                   \
132583b3815dSSong Gao             return mask;                         \
132683b3815dSSong Gao         } else {                                 \
132783b3815dSSong Gao             return shft_res;                     \
132883b3815dSSong Gao         }                                        \
132983b3815dSSong Gao }
133083b3815dSSong Gao 
133183b3815dSSong Gao SSRANU(B, uint16_t, uint8_t,  int16_t)
133283b3815dSSong Gao SSRANU(H, uint32_t, uint16_t, int32_t)
133383b3815dSSong Gao SSRANU(W, uint64_t, uint32_t, int64_t)
133483b3815dSSong Gao 
133583b3815dSSong Gao #define VSSRANU(NAME, BIT, T, E1, E2)                                     \
133683b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                 \
133783b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                  \
133883b3815dSSong Gao {                                                                         \
133983b3815dSSong Gao     int i;                                                                \
134083b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                      \
134183b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                      \
134283b3815dSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                      \
134383b3815dSSong Gao                                                                           \
134483b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                   \
134583b3815dSSong Gao         Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
134683b3815dSSong Gao     }                                                                     \
134783b3815dSSong Gao     Vd->D(1) = 0;                                                         \
134883b3815dSSong Gao }
134983b3815dSSong Gao 
135083b3815dSSong Gao VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
135183b3815dSSong Gao VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
135283b3815dSSong Gao VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
135383b3815dSSong Gao 
135483b3815dSSong Gao #define VSSRLNI(NAME, BIT, E1, E2)                                            \
135583b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                     \
135683b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                     \
135783b3815dSSong Gao {                                                                             \
135883b3815dSSong Gao     int i;                                                                    \
135983b3815dSSong Gao     VReg temp;                                                                \
136083b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                          \
136183b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                          \
136283b3815dSSong Gao                                                                               \
136383b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
136483b3815dSSong Gao         temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1);              \
136583b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
136683b3815dSSong Gao     }                                                                         \
136783b3815dSSong Gao     *Vd = temp;                                                               \
136883b3815dSSong Gao }
136983b3815dSSong Gao 
137083b3815dSSong Gao void HELPER(vssrlni_d_q)(CPULoongArchState *env,
137183b3815dSSong Gao                          uint32_t vd, uint32_t vj, uint32_t imm)
137283b3815dSSong Gao {
137383b3815dSSong Gao     Int128 shft_res1, shft_res2, mask;
137483b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
137583b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
137683b3815dSSong Gao 
137783b3815dSSong Gao     if (imm == 0) {
137883b3815dSSong Gao         shft_res1 = Vj->Q(0);
137983b3815dSSong Gao         shft_res2 = Vd->Q(0);
138083b3815dSSong Gao     } else {
138183b3815dSSong Gao         shft_res1 = int128_urshift(Vj->Q(0), imm);
138283b3815dSSong Gao         shft_res2 = int128_urshift(Vd->Q(0), imm);
138383b3815dSSong Gao     }
138483b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
138583b3815dSSong Gao 
138683b3815dSSong Gao     if (int128_ult(mask, shft_res1)) {
138783b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
138883b3815dSSong Gao     }else {
138983b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
139083b3815dSSong Gao     }
139183b3815dSSong Gao 
139283b3815dSSong Gao     if (int128_ult(mask, shft_res2)) {
139383b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
139483b3815dSSong Gao     }else {
139583b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
139683b3815dSSong Gao     }
139783b3815dSSong Gao }
139883b3815dSSong Gao 
139983b3815dSSong Gao VSSRLNI(vssrlni_b_h, 16, B, H)
140083b3815dSSong Gao VSSRLNI(vssrlni_h_w, 32, H, W)
140183b3815dSSong Gao VSSRLNI(vssrlni_w_d, 64, W, D)
140283b3815dSSong Gao 
140383b3815dSSong Gao #define VSSRANI(NAME, BIT, E1, E2)                                             \
140483b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                      \
140583b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                      \
140683b3815dSSong Gao {                                                                              \
140783b3815dSSong Gao     int i;                                                                     \
140883b3815dSSong Gao     VReg temp;                                                                 \
140983b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                           \
141083b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                           \
141183b3815dSSong Gao                                                                                \
141283b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                        \
141383b3815dSSong Gao         temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1);               \
141483b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
141583b3815dSSong Gao     }                                                                          \
141683b3815dSSong Gao     *Vd = temp;                                                                \
141783b3815dSSong Gao }
141883b3815dSSong Gao 
141983b3815dSSong Gao void HELPER(vssrani_d_q)(CPULoongArchState *env,
142083b3815dSSong Gao                          uint32_t vd, uint32_t vj, uint32_t imm)
142183b3815dSSong Gao {
142283b3815dSSong Gao     Int128 shft_res1, shft_res2, mask, min;
142383b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
142483b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
142583b3815dSSong Gao 
142683b3815dSSong Gao     if (imm == 0) {
142783b3815dSSong Gao         shft_res1 = Vj->Q(0);
142883b3815dSSong Gao         shft_res2 = Vd->Q(0);
142983b3815dSSong Gao     } else {
143083b3815dSSong Gao         shft_res1 = int128_rshift(Vj->Q(0), imm);
143183b3815dSSong Gao         shft_res2 = int128_rshift(Vd->Q(0), imm);
143283b3815dSSong Gao     }
143383b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
143483b3815dSSong Gao     min  = int128_lshift(int128_one(), 63);
143583b3815dSSong Gao 
143683b3815dSSong Gao     if (int128_gt(shft_res1,  mask)) {
143783b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
143883b3815dSSong Gao     } else if (int128_lt(shft_res1, int128_neg(min))) {
143983b3815dSSong Gao         Vd->D(0) = int128_getlo(min);
144083b3815dSSong Gao     } else {
144183b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
144283b3815dSSong Gao     }
144383b3815dSSong Gao 
144483b3815dSSong Gao     if (int128_gt(shft_res2, mask)) {
144583b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
144683b3815dSSong Gao     } else if (int128_lt(shft_res2, int128_neg(min))) {
144783b3815dSSong Gao         Vd->D(1) = int128_getlo(min);
144883b3815dSSong Gao     } else {
144983b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
145083b3815dSSong Gao     }
145183b3815dSSong Gao }
145283b3815dSSong Gao 
145383b3815dSSong Gao VSSRANI(vssrani_b_h, 16, B, H)
145483b3815dSSong Gao VSSRANI(vssrani_h_w, 32, H, W)
145583b3815dSSong Gao VSSRANI(vssrani_w_d, 64, W, D)
145683b3815dSSong Gao 
145783b3815dSSong Gao #define VSSRLNUI(NAME, BIT, E1, E2)                                         \
145883b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                   \
145983b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                   \
146083b3815dSSong Gao {                                                                           \
146183b3815dSSong Gao     int i;                                                                  \
146283b3815dSSong Gao     VReg temp;                                                              \
146383b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                        \
146483b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                        \
146583b3815dSSong Gao                                                                             \
146683b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
146783b3815dSSong Gao         temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
146883b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
146983b3815dSSong Gao     }                                                                       \
147083b3815dSSong Gao     *Vd = temp;                                                             \
147183b3815dSSong Gao }
147283b3815dSSong Gao 
147383b3815dSSong Gao void HELPER(vssrlni_du_q)(CPULoongArchState *env,
147483b3815dSSong Gao                          uint32_t vd, uint32_t vj, uint32_t imm)
147583b3815dSSong Gao {
147683b3815dSSong Gao     Int128 shft_res1, shft_res2, mask;
147783b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
147883b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
147983b3815dSSong Gao 
148083b3815dSSong Gao     if (imm == 0) {
148183b3815dSSong Gao         shft_res1 = Vj->Q(0);
148283b3815dSSong Gao         shft_res2 = Vd->Q(0);
148383b3815dSSong Gao     } else {
148483b3815dSSong Gao         shft_res1 = int128_urshift(Vj->Q(0), imm);
148583b3815dSSong Gao         shft_res2 = int128_urshift(Vd->Q(0), imm);
148683b3815dSSong Gao     }
148783b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
148883b3815dSSong Gao 
148983b3815dSSong Gao     if (int128_ult(mask, shft_res1)) {
149083b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
149183b3815dSSong Gao     }else {
149283b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
149383b3815dSSong Gao     }
149483b3815dSSong Gao 
149583b3815dSSong Gao     if (int128_ult(mask, shft_res2)) {
149683b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
149783b3815dSSong Gao     }else {
149883b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
149983b3815dSSong Gao     }
150083b3815dSSong Gao }
150183b3815dSSong Gao 
150283b3815dSSong Gao VSSRLNUI(vssrlni_bu_h, 16, B, H)
150383b3815dSSong Gao VSSRLNUI(vssrlni_hu_w, 32, H, W)
150483b3815dSSong Gao VSSRLNUI(vssrlni_wu_d, 64, W, D)
150583b3815dSSong Gao 
150683b3815dSSong Gao #define VSSRANUI(NAME, BIT, E1, E2)                                         \
150783b3815dSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                   \
150883b3815dSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                   \
150983b3815dSSong Gao {                                                                           \
151083b3815dSSong Gao     int i;                                                                  \
151183b3815dSSong Gao     VReg temp;                                                              \
151283b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                        \
151383b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                        \
151483b3815dSSong Gao                                                                             \
151583b3815dSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
151683b3815dSSong Gao         temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2);               \
151783b3815dSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
151883b3815dSSong Gao     }                                                                       \
151983b3815dSSong Gao     *Vd = temp;                                                             \
152083b3815dSSong Gao }
152183b3815dSSong Gao 
152283b3815dSSong Gao void HELPER(vssrani_du_q)(CPULoongArchState *env,
152383b3815dSSong Gao                          uint32_t vd, uint32_t vj, uint32_t imm)
152483b3815dSSong Gao {
152583b3815dSSong Gao     Int128 shft_res1, shft_res2, mask;
152683b3815dSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
152783b3815dSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
152883b3815dSSong Gao 
152983b3815dSSong Gao     if (imm == 0) {
153083b3815dSSong Gao         shft_res1 = Vj->Q(0);
153183b3815dSSong Gao         shft_res2 = Vd->Q(0);
153283b3815dSSong Gao     } else {
153383b3815dSSong Gao         shft_res1 = int128_rshift(Vj->Q(0), imm);
153483b3815dSSong Gao         shft_res2 = int128_rshift(Vd->Q(0), imm);
153583b3815dSSong Gao     }
153683b3815dSSong Gao 
153783b3815dSSong Gao     if (int128_lt(Vj->Q(0), int128_zero())) {
153883b3815dSSong Gao         shft_res1 = int128_zero();
153983b3815dSSong Gao     }
154083b3815dSSong Gao 
154183b3815dSSong Gao     if (int128_lt(Vd->Q(0), int128_zero())) {
154283b3815dSSong Gao         shft_res2 = int128_zero();
154383b3815dSSong Gao     }
154483b3815dSSong Gao 
154583b3815dSSong Gao     mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
154683b3815dSSong Gao 
154783b3815dSSong Gao     if (int128_ult(mask, shft_res1)) {
154883b3815dSSong Gao         Vd->D(0) = int128_getlo(mask);
154983b3815dSSong Gao     }else {
155083b3815dSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
155183b3815dSSong Gao     }
155283b3815dSSong Gao 
155383b3815dSSong Gao     if (int128_ult(mask, shft_res2)) {
155483b3815dSSong Gao         Vd->D(1) = int128_getlo(mask);
155583b3815dSSong Gao     }else {
155683b3815dSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
155783b3815dSSong Gao     }
155883b3815dSSong Gao }
155983b3815dSSong Gao 
156083b3815dSSong Gao VSSRANUI(vssrani_bu_h, 16, B, H)
156183b3815dSSong Gao VSSRANUI(vssrani_hu_w, 32, H, W)
156283b3815dSSong Gao VSSRANUI(vssrani_wu_d, 64, W, D)
1563162cd32cSSong Gao 
1564162cd32cSSong Gao #define SSRLRNS(E1, E2, T1, T2, T3)                \
1565162cd32cSSong Gao static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
1566162cd32cSSong Gao {                                                  \
1567162cd32cSSong Gao     T1 shft_res;                                   \
1568162cd32cSSong Gao                                                    \
1569162cd32cSSong Gao     shft_res = do_vsrlr_ ## E2(e2, sa);            \
1570162cd32cSSong Gao     T1 mask;                                       \
1571162cd32cSSong Gao     mask = (1ull << sh) -1;                        \
1572162cd32cSSong Gao     if (shft_res > mask) {                         \
1573162cd32cSSong Gao         return mask;                               \
1574162cd32cSSong Gao     } else {                                       \
1575162cd32cSSong Gao         return  shft_res;                          \
1576162cd32cSSong Gao     }                                              \
1577162cd32cSSong Gao }
1578162cd32cSSong Gao 
1579162cd32cSSong Gao SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
1580162cd32cSSong Gao SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
1581162cd32cSSong Gao SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
1582162cd32cSSong Gao 
1583162cd32cSSong Gao #define VSSRLRN(NAME, BIT, T, E1, E2)                                         \
1584162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                     \
1585162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                      \
1586162cd32cSSong Gao {                                                                             \
1587162cd32cSSong Gao     int i;                                                                    \
1588162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                          \
1589162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                          \
1590162cd32cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                          \
1591162cd32cSSong Gao                                                                               \
1592162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
1593162cd32cSSong Gao         Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
1594162cd32cSSong Gao     }                                                                         \
1595162cd32cSSong Gao     Vd->D(1) = 0;                                                             \
1596162cd32cSSong Gao }
1597162cd32cSSong Gao 
1598162cd32cSSong Gao VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H)
1599162cd32cSSong Gao VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W)
1600162cd32cSSong Gao VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D)
1601162cd32cSSong Gao 
1602162cd32cSSong Gao #define SSRARNS(E1, E2, T1, T2)                    \
1603162cd32cSSong Gao static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
1604162cd32cSSong Gao {                                                  \
1605162cd32cSSong Gao     T1 shft_res;                                   \
1606162cd32cSSong Gao                                                    \
1607162cd32cSSong Gao     shft_res = do_vsrar_ ## E2(e2, sa);            \
1608162cd32cSSong Gao     T2 mask;                                       \
1609162cd32cSSong Gao     mask = (1ll << sh) -1;                         \
1610162cd32cSSong Gao     if (shft_res > mask) {                         \
1611162cd32cSSong Gao         return  mask;                              \
1612162cd32cSSong Gao     } else if (shft_res < -(mask +1)) {            \
1613162cd32cSSong Gao         return  ~mask;                             \
1614162cd32cSSong Gao     } else {                                       \
1615162cd32cSSong Gao         return shft_res;                           \
1616162cd32cSSong Gao     }                                              \
1617162cd32cSSong Gao }
1618162cd32cSSong Gao 
1619162cd32cSSong Gao SSRARNS(B, H, int16_t, int8_t)
1620162cd32cSSong Gao SSRARNS(H, W, int32_t, int16_t)
1621162cd32cSSong Gao SSRARNS(W, D, int64_t, int32_t)
1622162cd32cSSong Gao 
1623162cd32cSSong Gao #define VSSRARN(NAME, BIT, T, E1, E2)                                         \
1624162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                     \
1625162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                      \
1626162cd32cSSong Gao {                                                                             \
1627162cd32cSSong Gao     int i;                                                                    \
1628162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                          \
1629162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                          \
1630162cd32cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                          \
1631162cd32cSSong Gao                                                                               \
1632162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                       \
1633162cd32cSSong Gao         Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
1634162cd32cSSong Gao     }                                                                         \
1635162cd32cSSong Gao     Vd->D(1) = 0;                                                             \
1636162cd32cSSong Gao }
1637162cd32cSSong Gao 
1638162cd32cSSong Gao VSSRARN(vssrarn_b_h, 16, uint16_t, B, H)
1639162cd32cSSong Gao VSSRARN(vssrarn_h_w, 32, uint32_t, H, W)
1640162cd32cSSong Gao VSSRARN(vssrarn_w_d, 64, uint64_t, W, D)
1641162cd32cSSong Gao 
1642162cd32cSSong Gao #define SSRLRNU(E1, E2, T1, T2, T3)                \
1643162cd32cSSong Gao static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
1644162cd32cSSong Gao {                                                  \
1645162cd32cSSong Gao     T1 shft_res;                                   \
1646162cd32cSSong Gao                                                    \
1647162cd32cSSong Gao     shft_res = do_vsrlr_ ## E2(e2, sa);            \
1648162cd32cSSong Gao                                                    \
1649162cd32cSSong Gao     T2 mask;                                       \
1650162cd32cSSong Gao     mask = (1ull << sh) -1;                        \
1651162cd32cSSong Gao     if (shft_res > mask) {                         \
1652162cd32cSSong Gao         return mask;                               \
1653162cd32cSSong Gao     } else {                                       \
1654162cd32cSSong Gao         return shft_res;                           \
1655162cd32cSSong Gao     }                                              \
1656162cd32cSSong Gao }
1657162cd32cSSong Gao 
1658162cd32cSSong Gao SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
1659162cd32cSSong Gao SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
1660162cd32cSSong Gao SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
1661162cd32cSSong Gao 
1662162cd32cSSong Gao #define VSSRLRNU(NAME, BIT, T, E1, E2)                                     \
1663162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                  \
1664162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                   \
1665162cd32cSSong Gao {                                                                          \
1666162cd32cSSong Gao     int i;                                                                 \
1667162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                       \
1668162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                       \
1669162cd32cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                       \
1670162cd32cSSong Gao                                                                            \
1671162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                    \
1672162cd32cSSong Gao         Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
1673162cd32cSSong Gao     }                                                                      \
1674162cd32cSSong Gao     Vd->D(1) = 0;                                                          \
1675162cd32cSSong Gao }
1676162cd32cSSong Gao 
1677162cd32cSSong Gao VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H)
1678162cd32cSSong Gao VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W)
1679162cd32cSSong Gao VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D)
1680162cd32cSSong Gao 
1681162cd32cSSong Gao #define SSRARNU(E1, E2, T1, T2, T3)                \
1682162cd32cSSong Gao static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
1683162cd32cSSong Gao {                                                  \
1684162cd32cSSong Gao     T1 shft_res;                                   \
1685162cd32cSSong Gao                                                    \
1686162cd32cSSong Gao     if (e2 < 0) {                                  \
1687162cd32cSSong Gao         shft_res = 0;                              \
1688162cd32cSSong Gao     } else {                                       \
1689162cd32cSSong Gao         shft_res = do_vsrar_ ## E2(e2, sa);        \
1690162cd32cSSong Gao     }                                              \
1691162cd32cSSong Gao     T2 mask;                                       \
1692162cd32cSSong Gao     mask = (1ull << sh) -1;                        \
1693162cd32cSSong Gao     if (shft_res > mask) {                         \
1694162cd32cSSong Gao         return mask;                               \
1695162cd32cSSong Gao     } else {                                       \
1696162cd32cSSong Gao         return shft_res;                           \
1697162cd32cSSong Gao     }                                              \
1698162cd32cSSong Gao }
1699162cd32cSSong Gao 
1700162cd32cSSong Gao SSRARNU(B, H, uint16_t, uint8_t, int16_t)
1701162cd32cSSong Gao SSRARNU(H, W, uint32_t, uint16_t, int32_t)
1702162cd32cSSong Gao SSRARNU(W, D, uint64_t, uint32_t, int64_t)
1703162cd32cSSong Gao 
1704162cd32cSSong Gao #define VSSRARNU(NAME, BIT, T, E1, E2)                                     \
1705162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                  \
1706162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)                   \
1707162cd32cSSong Gao {                                                                          \
1708162cd32cSSong Gao     int i;                                                                 \
1709162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                       \
1710162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                       \
1711162cd32cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                       \
1712162cd32cSSong Gao                                                                            \
1713162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                    \
1714162cd32cSSong Gao         Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
1715162cd32cSSong Gao     }                                                                      \
1716162cd32cSSong Gao     Vd->D(1) = 0;                                                          \
1717162cd32cSSong Gao }
1718162cd32cSSong Gao 
1719162cd32cSSong Gao VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H)
1720162cd32cSSong Gao VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
1721162cd32cSSong Gao VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
1722162cd32cSSong Gao 
1723162cd32cSSong Gao #define VSSRLRNI(NAME, BIT, E1, E2)                                            \
1724162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                      \
1725162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                      \
1726162cd32cSSong Gao {                                                                              \
1727162cd32cSSong Gao     int i;                                                                     \
1728162cd32cSSong Gao     VReg temp;                                                                 \
1729162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                           \
1730162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                           \
1731162cd32cSSong Gao                                                                                \
1732162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                        \
1733162cd32cSSong Gao         temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1);              \
1734162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
1735162cd32cSSong Gao     }                                                                          \
1736162cd32cSSong Gao     *Vd = temp;                                                                \
1737162cd32cSSong Gao }
1738162cd32cSSong Gao 
1739162cd32cSSong Gao #define VSSRLRNI_Q(NAME, sh)                                               \
1740162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                  \
1741162cd32cSSong Gao                           uint32_t vd, uint32_t vj, uint32_t imm)          \
1742162cd32cSSong Gao {                                                                          \
1743162cd32cSSong Gao     Int128 shft_res1, shft_res2, mask, r1, r2;                             \
1744162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                       \
1745162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                       \
1746162cd32cSSong Gao                                                                            \
1747162cd32cSSong Gao     if (imm == 0) {                                                        \
1748162cd32cSSong Gao         shft_res1 = Vj->Q(0);                                              \
1749162cd32cSSong Gao         shft_res2 = Vd->Q(0);                                              \
1750162cd32cSSong Gao     } else {                                                               \
1751162cd32cSSong Gao         r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \
1752162cd32cSSong Gao         r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \
1753162cd32cSSong Gao                                                                            \
1754162cd32cSSong Gao         shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1));       \
1755162cd32cSSong Gao         shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2));       \
1756162cd32cSSong Gao     }                                                                      \
1757162cd32cSSong Gao                                                                            \
1758162cd32cSSong Gao     mask = int128_sub(int128_lshift(int128_one(), sh), int128_one());      \
1759162cd32cSSong Gao                                                                            \
1760162cd32cSSong Gao     if (int128_ult(mask, shft_res1)) {                                     \
1761162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask);                                     \
1762162cd32cSSong Gao     }else {                                                                \
1763162cd32cSSong Gao         Vd->D(0) = int128_getlo(shft_res1);                                \
1764162cd32cSSong Gao     }                                                                      \
1765162cd32cSSong Gao                                                                            \
1766162cd32cSSong Gao     if (int128_ult(mask, shft_res2)) {                                     \
1767162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask);                                     \
1768162cd32cSSong Gao     }else {                                                                \
1769162cd32cSSong Gao         Vd->D(1) = int128_getlo(shft_res2);                                \
1770162cd32cSSong Gao     }                                                                      \
1771162cd32cSSong Gao }
1772162cd32cSSong Gao 
1773162cd32cSSong Gao VSSRLRNI(vssrlrni_b_h, 16, B, H)
1774162cd32cSSong Gao VSSRLRNI(vssrlrni_h_w, 32, H, W)
1775162cd32cSSong Gao VSSRLRNI(vssrlrni_w_d, 64, W, D)
1776162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_d_q, 63)
1777162cd32cSSong Gao 
1778162cd32cSSong Gao #define VSSRARNI(NAME, BIT, E1, E2)                                             \
1779162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                       \
1780162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                       \
1781162cd32cSSong Gao {                                                                               \
1782162cd32cSSong Gao     int i;                                                                      \
1783162cd32cSSong Gao     VReg temp;                                                                  \
1784162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                            \
1785162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                            \
1786162cd32cSSong Gao                                                                                 \
1787162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                         \
1788162cd32cSSong Gao         temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1);               \
1789162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
1790162cd32cSSong Gao     }                                                                           \
1791162cd32cSSong Gao     *Vd = temp;                                                                 \
1792162cd32cSSong Gao }
1793162cd32cSSong Gao 
1794162cd32cSSong Gao void HELPER(vssrarni_d_q)(CPULoongArchState *env,
1795162cd32cSSong Gao                           uint32_t vd, uint32_t vj, uint32_t imm)
1796162cd32cSSong Gao {
1797162cd32cSSong Gao     Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
1798162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
1799162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
1800162cd32cSSong Gao 
1801162cd32cSSong Gao     if (imm == 0) {
1802162cd32cSSong Gao         shft_res1 = Vj->Q(0);
1803162cd32cSSong Gao         shft_res2 = Vd->Q(0);
1804162cd32cSSong Gao     } else {
1805162cd32cSSong Gao         r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
1806162cd32cSSong Gao         r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
1807162cd32cSSong Gao 
1808162cd32cSSong Gao         shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
1809162cd32cSSong Gao         shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
1810162cd32cSSong Gao     }
1811162cd32cSSong Gao 
1812162cd32cSSong Gao     mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
1813162cd32cSSong Gao     mask2  = int128_lshift(int128_one(), 63);
1814162cd32cSSong Gao 
1815162cd32cSSong Gao     if (int128_gt(shft_res1,  mask1)) {
1816162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask1);
1817162cd32cSSong Gao     } else if (int128_lt(shft_res1, int128_neg(mask2))) {
1818162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask2);
1819162cd32cSSong Gao     } else {
1820162cd32cSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
1821162cd32cSSong Gao     }
1822162cd32cSSong Gao 
1823162cd32cSSong Gao     if (int128_gt(shft_res2, mask1)) {
1824162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask1);
1825162cd32cSSong Gao     } else if (int128_lt(shft_res2, int128_neg(mask2))) {
1826162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask2);
1827162cd32cSSong Gao     } else {
1828162cd32cSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
1829162cd32cSSong Gao     }
1830162cd32cSSong Gao }
1831162cd32cSSong Gao 
1832162cd32cSSong Gao VSSRARNI(vssrarni_b_h, 16, B, H)
1833162cd32cSSong Gao VSSRARNI(vssrarni_h_w, 32, H, W)
1834162cd32cSSong Gao VSSRARNI(vssrarni_w_d, 64, W, D)
1835162cd32cSSong Gao 
1836162cd32cSSong Gao #define VSSRLRNUI(NAME, BIT, E1, E2)                                         \
1837162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                    \
1838162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                    \
1839162cd32cSSong Gao {                                                                            \
1840162cd32cSSong Gao     int i;                                                                   \
1841162cd32cSSong Gao     VReg temp;                                                               \
1842162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                         \
1843162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                         \
1844162cd32cSSong Gao                                                                              \
1845162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
1846162cd32cSSong Gao         temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
1847162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \
1848162cd32cSSong Gao     }                                                                        \
1849162cd32cSSong Gao     *Vd = temp;                                                              \
1850162cd32cSSong Gao }
1851162cd32cSSong Gao 
1852162cd32cSSong Gao VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
1853162cd32cSSong Gao VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
1854162cd32cSSong Gao VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
1855162cd32cSSong Gao VSSRLRNI_Q(vssrlrni_du_q, 64)
1856162cd32cSSong Gao 
1857162cd32cSSong Gao #define VSSRARNUI(NAME, BIT, E1, E2)                                         \
1858162cd32cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                                    \
1859162cd32cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm)                    \
1860162cd32cSSong Gao {                                                                            \
1861162cd32cSSong Gao     int i;                                                                   \
1862162cd32cSSong Gao     VReg temp;                                                               \
1863162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                         \
1864162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                         \
1865162cd32cSSong Gao                                                                              \
1866162cd32cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
1867162cd32cSSong Gao         temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2);               \
1868162cd32cSSong Gao         temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \
1869162cd32cSSong Gao     }                                                                        \
1870162cd32cSSong Gao     *Vd = temp;                                                              \
1871162cd32cSSong Gao }
1872162cd32cSSong Gao 
1873162cd32cSSong Gao void HELPER(vssrarni_du_q)(CPULoongArchState *env,
1874162cd32cSSong Gao                            uint32_t vd, uint32_t vj, uint32_t imm)
1875162cd32cSSong Gao {
1876162cd32cSSong Gao     Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
1877162cd32cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
1878162cd32cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
1879162cd32cSSong Gao 
1880162cd32cSSong Gao     if (imm == 0) {
1881162cd32cSSong Gao         shft_res1 = Vj->Q(0);
1882162cd32cSSong Gao         shft_res2 = Vd->Q(0);
1883162cd32cSSong Gao     } else {
1884162cd32cSSong Gao         r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
1885162cd32cSSong Gao         r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
1886162cd32cSSong Gao 
1887162cd32cSSong Gao         shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
1888162cd32cSSong Gao         shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
1889162cd32cSSong Gao     }
1890162cd32cSSong Gao 
1891162cd32cSSong Gao     if (int128_lt(Vj->Q(0), int128_zero())) {
1892162cd32cSSong Gao         shft_res1 = int128_zero();
1893162cd32cSSong Gao     }
1894162cd32cSSong Gao     if (int128_lt(Vd->Q(0), int128_zero())) {
1895162cd32cSSong Gao         shft_res2 = int128_zero();
1896162cd32cSSong Gao     }
1897162cd32cSSong Gao 
1898162cd32cSSong Gao     mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
1899162cd32cSSong Gao     mask2  = int128_lshift(int128_one(), 64);
1900162cd32cSSong Gao 
1901162cd32cSSong Gao     if (int128_gt(shft_res1,  mask1)) {
1902162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask1);
1903162cd32cSSong Gao     } else if (int128_lt(shft_res1, int128_neg(mask2))) {
1904162cd32cSSong Gao         Vd->D(0) = int128_getlo(mask2);
1905162cd32cSSong Gao     } else {
1906162cd32cSSong Gao         Vd->D(0) = int128_getlo(shft_res1);
1907162cd32cSSong Gao     }
1908162cd32cSSong Gao 
1909162cd32cSSong Gao     if (int128_gt(shft_res2, mask1)) {
1910162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask1);
1911162cd32cSSong Gao     } else if (int128_lt(shft_res2, int128_neg(mask2))) {
1912162cd32cSSong Gao         Vd->D(1) = int128_getlo(mask2);
1913162cd32cSSong Gao     } else {
1914162cd32cSSong Gao         Vd->D(1) = int128_getlo(shft_res2);
1915162cd32cSSong Gao     }
1916162cd32cSSong Gao }
1917162cd32cSSong Gao 
1918162cd32cSSong Gao VSSRARNUI(vssrarni_bu_h, 16, B, H)
1919162cd32cSSong Gao VSSRARNUI(vssrarni_hu_w, 32, H, W)
1920162cd32cSSong Gao VSSRARNUI(vssrarni_wu_d, 64, W, D)
19212e105e12SSong Gao 
19222e105e12SSong Gao #define DO_2OP(NAME, BIT, E, DO_OP)                                 \
19232e105e12SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
19242e105e12SSong Gao {                                                                   \
19252e105e12SSong Gao     int i;                                                          \
19262e105e12SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
19272e105e12SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
19282e105e12SSong Gao                                                                     \
19292e105e12SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++)                               \
19302e105e12SSong Gao     {                                                               \
19312e105e12SSong Gao         Vd->E(i) = DO_OP(Vj->E(i));                                 \
19322e105e12SSong Gao     }                                                               \
19332e105e12SSong Gao }
19342e105e12SSong Gao 
19352e105e12SSong Gao #define DO_CLO_B(N)  (clz32(~N & 0xff) - 24)
19362e105e12SSong Gao #define DO_CLO_H(N)  (clz32(~N & 0xffff) - 16)
19372e105e12SSong Gao #define DO_CLO_W(N)  (clz32(~N))
19382e105e12SSong Gao #define DO_CLO_D(N)  (clz64(~N))
19392e105e12SSong Gao #define DO_CLZ_B(N)  (clz32(N) - 24)
19402e105e12SSong Gao #define DO_CLZ_H(N)  (clz32(N) - 16)
19412e105e12SSong Gao #define DO_CLZ_W(N)  (clz32(N))
19422e105e12SSong Gao #define DO_CLZ_D(N)  (clz64(N))
19432e105e12SSong Gao 
19442e105e12SSong Gao DO_2OP(vclo_b, 8, UB, DO_CLO_B)
19452e105e12SSong Gao DO_2OP(vclo_h, 16, UH, DO_CLO_H)
19462e105e12SSong Gao DO_2OP(vclo_w, 32, UW, DO_CLO_W)
19472e105e12SSong Gao DO_2OP(vclo_d, 64, UD, DO_CLO_D)
19482e105e12SSong Gao DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
19492e105e12SSong Gao DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
19502e105e12SSong Gao DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
19512e105e12SSong Gao DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
1952bb22ee57SSong Gao 
1953bb22ee57SSong Gao #define VPCNT(NAME, BIT, E, FN)                                     \
1954bb22ee57SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
1955bb22ee57SSong Gao {                                                                   \
1956bb22ee57SSong Gao     int i;                                                          \
1957bb22ee57SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
1958bb22ee57SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
1959bb22ee57SSong Gao                                                                     \
1960bb22ee57SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++)                               \
1961bb22ee57SSong Gao     {                                                               \
1962bb22ee57SSong Gao         Vd->E(i) = FN(Vj->E(i));                                    \
1963bb22ee57SSong Gao     }                                                               \
1964bb22ee57SSong Gao }
1965bb22ee57SSong Gao 
1966bb22ee57SSong Gao VPCNT(vpcnt_b, 8, UB, ctpop8)
1967bb22ee57SSong Gao VPCNT(vpcnt_h, 16, UH, ctpop16)
1968bb22ee57SSong Gao VPCNT(vpcnt_w, 32, UW, ctpop32)
1969bb22ee57SSong Gao VPCNT(vpcnt_d, 64, UD, ctpop64)
19700b1e6705SSong Gao 
19710b1e6705SSong Gao #define DO_BITCLR(a, bit) (a & ~(1ull << bit))
19720b1e6705SSong Gao #define DO_BITSET(a, bit) (a | 1ull << bit)
19730b1e6705SSong Gao #define DO_BITREV(a, bit) (a ^ (1ull << bit))
19740b1e6705SSong Gao 
19750b1e6705SSong Gao #define DO_BIT(NAME, BIT, E, DO_OP)                         \
19760b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
19770b1e6705SSong Gao {                                                           \
19780b1e6705SSong Gao     int i;                                                  \
19790b1e6705SSong Gao     VReg *Vd = (VReg *)vd;                                  \
19800b1e6705SSong Gao     VReg *Vj = (VReg *)vj;                                  \
19810b1e6705SSong Gao     VReg *Vk = (VReg *)vk;                                  \
19820b1e6705SSong Gao                                                             \
19830b1e6705SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
19840b1e6705SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT);           \
19850b1e6705SSong Gao     }                                                       \
19860b1e6705SSong Gao }
19870b1e6705SSong Gao 
19880b1e6705SSong Gao DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
19890b1e6705SSong Gao DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
19900b1e6705SSong Gao DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
19910b1e6705SSong Gao DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
19920b1e6705SSong Gao DO_BIT(vbitset_b, 8, UB, DO_BITSET)
19930b1e6705SSong Gao DO_BIT(vbitset_h, 16, UH, DO_BITSET)
19940b1e6705SSong Gao DO_BIT(vbitset_w, 32, UW, DO_BITSET)
19950b1e6705SSong Gao DO_BIT(vbitset_d, 64, UD, DO_BITSET)
19960b1e6705SSong Gao DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
19970b1e6705SSong Gao DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
19980b1e6705SSong Gao DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
19990b1e6705SSong Gao DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
20000b1e6705SSong Gao 
20010b1e6705SSong Gao #define DO_BITI(NAME, BIT, E, DO_OP)                            \
20020b1e6705SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
20030b1e6705SSong Gao {                                                               \
20040b1e6705SSong Gao     int i;                                                      \
20050b1e6705SSong Gao     VReg *Vd = (VReg *)vd;                                      \
20060b1e6705SSong Gao     VReg *Vj = (VReg *)vj;                                      \
20070b1e6705SSong Gao                                                                 \
20080b1e6705SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
20090b1e6705SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), imm);                        \
20100b1e6705SSong Gao     }                                                           \
20110b1e6705SSong Gao }
20120b1e6705SSong Gao 
20130b1e6705SSong Gao DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
20140b1e6705SSong Gao DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
20150b1e6705SSong Gao DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
20160b1e6705SSong Gao DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
20170b1e6705SSong Gao DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
20180b1e6705SSong Gao DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
20190b1e6705SSong Gao DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
20200b1e6705SSong Gao DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
20210b1e6705SSong Gao DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
20220b1e6705SSong Gao DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
20230b1e6705SSong Gao DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
20240b1e6705SSong Gao DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
2025ac95a0b9SSong Gao 
2026ac95a0b9SSong Gao #define VFRSTP(NAME, BIT, MASK, E)                       \
2027ac95a0b9SSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2028ac95a0b9SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2029ac95a0b9SSong Gao {                                                        \
2030ac95a0b9SSong Gao     int i, m;                                            \
2031ac95a0b9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2032ac95a0b9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2033ac95a0b9SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2034ac95a0b9SSong Gao                                                          \
2035ac95a0b9SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2036ac95a0b9SSong Gao         if (Vj->E(i) < 0) {                              \
2037ac95a0b9SSong Gao             break;                                       \
2038ac95a0b9SSong Gao         }                                                \
2039ac95a0b9SSong Gao     }                                                    \
2040ac95a0b9SSong Gao     m = Vk->E(0) & MASK;                                 \
2041ac95a0b9SSong Gao     Vd->E(m) = i;                                        \
2042ac95a0b9SSong Gao }
2043ac95a0b9SSong Gao 
2044ac95a0b9SSong Gao VFRSTP(vfrstp_b, 8, 0xf, B)
2045ac95a0b9SSong Gao VFRSTP(vfrstp_h, 16, 0x7, H)
2046ac95a0b9SSong Gao 
2047ac95a0b9SSong Gao #define VFRSTPI(NAME, BIT, E)                             \
2048ac95a0b9SSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
2049ac95a0b9SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
2050ac95a0b9SSong Gao {                                                         \
2051ac95a0b9SSong Gao     int i, m;                                             \
2052ac95a0b9SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
2053ac95a0b9SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
2054ac95a0b9SSong Gao                                                           \
2055ac95a0b9SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                   \
2056ac95a0b9SSong Gao         if (Vj->E(i) < 0) {                               \
2057ac95a0b9SSong Gao             break;                                        \
2058ac95a0b9SSong Gao         }                                                 \
2059ac95a0b9SSong Gao     }                                                     \
2060ac95a0b9SSong Gao     m = imm % (LSX_LEN/BIT);                              \
2061ac95a0b9SSong Gao     Vd->E(m) = i;                                         \
2062ac95a0b9SSong Gao }
2063ac95a0b9SSong Gao 
2064ac95a0b9SSong Gao VFRSTPI(vfrstpi_b, 8,  B)
2065ac95a0b9SSong Gao VFRSTPI(vfrstpi_h, 16, H)
2066aca67472SSong Gao 
2067aca67472SSong Gao static void vec_update_fcsr0_mask(CPULoongArchState *env,
2068aca67472SSong Gao                                   uintptr_t pc, int mask)
2069aca67472SSong Gao {
2070aca67472SSong Gao     int flags = get_float_exception_flags(&env->fp_status);
2071aca67472SSong Gao 
2072aca67472SSong Gao     set_float_exception_flags(0, &env->fp_status);
2073aca67472SSong Gao 
2074aca67472SSong Gao     flags &= ~mask;
2075aca67472SSong Gao 
2076aca67472SSong Gao     if (flags) {
2077aca67472SSong Gao         flags = ieee_ex_to_loongarch(flags);
2078aca67472SSong Gao         UPDATE_FP_CAUSE(env->fcsr0, flags);
2079aca67472SSong Gao     }
2080aca67472SSong Gao 
2081aca67472SSong Gao     if (GET_FP_ENABLES(env->fcsr0) & flags) {
2082aca67472SSong Gao         do_raise_exception(env, EXCCODE_FPE, pc);
2083aca67472SSong Gao     } else {
2084aca67472SSong Gao         UPDATE_FP_FLAGS(env->fcsr0, flags);
2085aca67472SSong Gao     }
2086aca67472SSong Gao }
2087aca67472SSong Gao 
2088aca67472SSong Gao static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
2089aca67472SSong Gao {
2090aca67472SSong Gao     vec_update_fcsr0_mask(env, pc, 0);
2091aca67472SSong Gao }
2092aca67472SSong Gao 
2093aca67472SSong Gao static inline void vec_clear_cause(CPULoongArchState *env)
2094aca67472SSong Gao {
2095aca67472SSong Gao     SET_FP_CAUSE(env->fcsr0, 0);
2096aca67472SSong Gao }
2097aca67472SSong Gao 
2098aca67472SSong Gao #define DO_3OP_F(NAME, BIT, E, FN)                          \
2099aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env,                   \
2100aca67472SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk)    \
2101aca67472SSong Gao {                                                           \
2102aca67472SSong Gao     int i;                                                  \
2103aca67472SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                        \
2104aca67472SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                        \
2105aca67472SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                        \
2106aca67472SSong Gao                                                             \
2107aca67472SSong Gao     vec_clear_cause(env);                                   \
2108aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                     \
2109aca67472SSong Gao         Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
2110aca67472SSong Gao         vec_update_fcsr0(env, GETPC());                     \
2111aca67472SSong Gao     }                                                       \
2112aca67472SSong Gao }
2113aca67472SSong Gao 
2114aca67472SSong Gao DO_3OP_F(vfadd_s, 32, UW, float32_add)
2115aca67472SSong Gao DO_3OP_F(vfadd_d, 64, UD, float64_add)
2116aca67472SSong Gao DO_3OP_F(vfsub_s, 32, UW, float32_sub)
2117aca67472SSong Gao DO_3OP_F(vfsub_d, 64, UD, float64_sub)
2118aca67472SSong Gao DO_3OP_F(vfmul_s, 32, UW, float32_mul)
2119aca67472SSong Gao DO_3OP_F(vfmul_d, 64, UD, float64_mul)
2120aca67472SSong Gao DO_3OP_F(vfdiv_s, 32, UW, float32_div)
2121aca67472SSong Gao DO_3OP_F(vfdiv_d, 64, UD, float64_div)
2122aca67472SSong Gao DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
2123aca67472SSong Gao DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
2124aca67472SSong Gao DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
2125aca67472SSong Gao DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
2126aca67472SSong Gao DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
2127aca67472SSong Gao DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
2128aca67472SSong Gao DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
2129aca67472SSong Gao DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
2130aca67472SSong Gao 
2131aca67472SSong Gao #define DO_4OP_F(NAME, BIT, E, FN, flags)                                    \
2132aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env,                                    \
2133aca67472SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)        \
2134aca67472SSong Gao {                                                                            \
2135aca67472SSong Gao     int i;                                                                   \
2136aca67472SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                         \
2137aca67472SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                         \
2138aca67472SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                         \
2139aca67472SSong Gao     VReg *Va = &(env->fpr[va].vreg);                                         \
2140aca67472SSong Gao                                                                              \
2141aca67472SSong Gao     vec_clear_cause(env);                                                    \
2142aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                      \
2143aca67472SSong Gao         Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
2144aca67472SSong Gao         vec_update_fcsr0(env, GETPC());                                      \
2145aca67472SSong Gao     }                                                                        \
2146aca67472SSong Gao }
2147aca67472SSong Gao 
2148aca67472SSong Gao DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
2149aca67472SSong Gao DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
2150aca67472SSong Gao DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
2151aca67472SSong Gao DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
2152aca67472SSong Gao DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
2153aca67472SSong Gao DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
2154aca67472SSong Gao DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
2155aca67472SSong Gao          float_muladd_negate_c | float_muladd_negate_result)
2156aca67472SSong Gao DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
2157aca67472SSong Gao          float_muladd_negate_c | float_muladd_negate_result)
2158aca67472SSong Gao 
2159aca67472SSong Gao #define DO_2OP_F(NAME, BIT, E, FN)                                  \
2160aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
2161aca67472SSong Gao {                                                                   \
2162aca67472SSong Gao     int i;                                                          \
2163aca67472SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
2164aca67472SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2165aca67472SSong Gao                                                                     \
2166aca67472SSong Gao     vec_clear_cause(env);                                           \
2167aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
2168aca67472SSong Gao         Vd->E(i) = FN(env, Vj->E(i));                               \
2169aca67472SSong Gao     }                                                               \
2170aca67472SSong Gao }
2171aca67472SSong Gao 
2172aca67472SSong Gao #define FLOGB(BIT, T)                                            \
2173aca67472SSong Gao static T do_flogb_## BIT(CPULoongArchState *env, T fj)           \
2174aca67472SSong Gao {                                                                \
2175aca67472SSong Gao     T fp, fd;                                                    \
2176aca67472SSong Gao     float_status *status = &env->fp_status;                      \
2177aca67472SSong Gao     FloatRoundMode old_mode = get_float_rounding_mode(status);   \
2178aca67472SSong Gao                                                                  \
2179aca67472SSong Gao     set_float_rounding_mode(float_round_down, status);           \
2180aca67472SSong Gao     fp = float ## BIT ##_log2(fj, status);                       \
2181aca67472SSong Gao     fd = float ## BIT ##_round_to_int(fp, status);               \
2182aca67472SSong Gao     set_float_rounding_mode(old_mode, status);                   \
2183aca67472SSong Gao     vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact);     \
2184aca67472SSong Gao     return fd;                                                   \
2185aca67472SSong Gao }
2186aca67472SSong Gao 
2187aca67472SSong Gao FLOGB(32, uint32_t)
2188aca67472SSong Gao FLOGB(64, uint64_t)
2189aca67472SSong Gao 
2190aca67472SSong Gao #define FCLASS(NAME, BIT, E, FN)                                    \
2191aca67472SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
2192aca67472SSong Gao {                                                                   \
2193aca67472SSong Gao     int i;                                                          \
2194aca67472SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
2195aca67472SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2196aca67472SSong Gao                                                                     \
2197aca67472SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                             \
2198aca67472SSong Gao         Vd->E(i) = FN(env, Vj->E(i));                               \
2199aca67472SSong Gao     }                                                               \
2200aca67472SSong Gao }
2201aca67472SSong Gao 
2202aca67472SSong Gao FCLASS(vfclass_s, 32, UW, helper_fclass_s)
2203aca67472SSong Gao FCLASS(vfclass_d, 64, UD, helper_fclass_d)
2204aca67472SSong Gao 
2205aca67472SSong Gao #define FSQRT(BIT, T)                                  \
2206aca67472SSong Gao static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
2207aca67472SSong Gao {                                                      \
2208aca67472SSong Gao     T fd;                                              \
2209aca67472SSong Gao     fd = float ## BIT ##_sqrt(fj, &env->fp_status);    \
2210aca67472SSong Gao     vec_update_fcsr0(env, GETPC());                    \
2211aca67472SSong Gao     return fd;                                         \
2212aca67472SSong Gao }
2213aca67472SSong Gao 
2214aca67472SSong Gao FSQRT(32, uint32_t)
2215aca67472SSong Gao FSQRT(64, uint64_t)
2216aca67472SSong Gao 
2217aca67472SSong Gao #define FRECIP(BIT, T)                                                  \
2218aca67472SSong Gao static T do_frecip_## BIT(CPULoongArchState *env, T fj)                 \
2219aca67472SSong Gao {                                                                       \
2220aca67472SSong Gao     T fd;                                                               \
2221aca67472SSong Gao     fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
2222aca67472SSong Gao     vec_update_fcsr0(env, GETPC());                                     \
2223aca67472SSong Gao     return fd;                                                          \
2224aca67472SSong Gao }
2225aca67472SSong Gao 
2226aca67472SSong Gao FRECIP(32, uint32_t)
2227aca67472SSong Gao FRECIP(64, uint64_t)
2228aca67472SSong Gao 
2229aca67472SSong Gao #define FRSQRT(BIT, T)                                                  \
2230aca67472SSong Gao static T do_frsqrt_## BIT(CPULoongArchState *env, T fj)                 \
2231aca67472SSong Gao {                                                                       \
2232aca67472SSong Gao     T fd, fp;                                                           \
2233aca67472SSong Gao     fp = float ## BIT ##_sqrt(fj, &env->fp_status);                     \
2234aca67472SSong Gao     fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
2235aca67472SSong Gao     vec_update_fcsr0(env, GETPC());                                     \
2236aca67472SSong Gao     return fd;                                                          \
2237aca67472SSong Gao }
2238aca67472SSong Gao 
2239aca67472SSong Gao FRSQRT(32, uint32_t)
2240aca67472SSong Gao FRSQRT(64, uint64_t)
2241aca67472SSong Gao 
2242aca67472SSong Gao DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
2243aca67472SSong Gao DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
2244aca67472SSong Gao DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
2245aca67472SSong Gao DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
2246aca67472SSong Gao DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
2247aca67472SSong Gao DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
2248aca67472SSong Gao DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
2249aca67472SSong Gao DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
2250399665d2SSong Gao 
2251399665d2SSong Gao static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
2252399665d2SSong Gao {
2253399665d2SSong Gao     return float16_to_float32(h, true, status);
2254399665d2SSong Gao }
2255399665d2SSong Gao static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
2256399665d2SSong Gao {
2257399665d2SSong Gao     return float32_to_float64(s, status);
2258399665d2SSong Gao }
2259399665d2SSong Gao 
2260399665d2SSong Gao static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
2261399665d2SSong Gao {
2262399665d2SSong Gao     return float32_to_float16(s, true, status);
2263399665d2SSong Gao }
2264399665d2SSong Gao static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
2265399665d2SSong Gao {
2266399665d2SSong Gao     return float64_to_float32(d, status);
2267399665d2SSong Gao }
2268399665d2SSong Gao 
2269399665d2SSong Gao void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2270399665d2SSong Gao {
2271399665d2SSong Gao     int i;
2272399665d2SSong Gao     VReg temp;
2273399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2274399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2275399665d2SSong Gao 
2276399665d2SSong Gao     vec_clear_cause(env);
2277399665d2SSong Gao     for (i = 0; i < LSX_LEN/32; i++) {
2278399665d2SSong Gao         temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status);
2279399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2280399665d2SSong Gao     }
2281399665d2SSong Gao     *Vd = temp;
2282399665d2SSong Gao }
2283399665d2SSong Gao 
2284399665d2SSong Gao void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2285399665d2SSong Gao {
2286399665d2SSong Gao     int i;
2287399665d2SSong Gao     VReg temp;
2288399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2289399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2290399665d2SSong Gao 
2291399665d2SSong Gao     vec_clear_cause(env);
2292399665d2SSong Gao     for (i = 0; i < LSX_LEN/64; i++) {
2293399665d2SSong Gao         temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status);
2294399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2295399665d2SSong Gao     }
2296399665d2SSong Gao     *Vd = temp;
2297399665d2SSong Gao }
2298399665d2SSong Gao 
2299399665d2SSong Gao void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2300399665d2SSong Gao {
2301399665d2SSong Gao     int i;
2302399665d2SSong Gao     VReg temp;
2303399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2304399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2305399665d2SSong Gao 
2306399665d2SSong Gao     vec_clear_cause(env);
2307399665d2SSong Gao     for (i = 0; i < LSX_LEN/32; i++) {
2308399665d2SSong Gao         temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status);
2309399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2310399665d2SSong Gao     }
2311399665d2SSong Gao     *Vd = temp;
2312399665d2SSong Gao }
2313399665d2SSong Gao 
2314399665d2SSong Gao void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2315399665d2SSong Gao {
2316399665d2SSong Gao     int i;
2317399665d2SSong Gao     VReg temp;
2318399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2319399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2320399665d2SSong Gao 
2321399665d2SSong Gao     vec_clear_cause(env);
2322399665d2SSong Gao     for (i = 0; i < LSX_LEN/64; i++) {
2323399665d2SSong Gao         temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status);
2324399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2325399665d2SSong Gao     }
2326399665d2SSong Gao     *Vd = temp;
2327399665d2SSong Gao }
2328399665d2SSong Gao 
2329399665d2SSong Gao void HELPER(vfcvt_h_s)(CPULoongArchState *env,
2330399665d2SSong Gao                        uint32_t vd, uint32_t vj, uint32_t vk)
2331399665d2SSong Gao {
2332399665d2SSong Gao     int i;
2333399665d2SSong Gao     VReg temp;
2334399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2335399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2336399665d2SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
2337399665d2SSong Gao 
2338399665d2SSong Gao     vec_clear_cause(env);
2339399665d2SSong Gao     for(i = 0; i < LSX_LEN/32; i++) {
2340399665d2SSong Gao         temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status);
2341399665d2SSong Gao         temp.UH(i)  = float32_cvt_float16(Vk->UW(i), &env->fp_status);
2342399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2343399665d2SSong Gao     }
2344399665d2SSong Gao     *Vd = temp;
2345399665d2SSong Gao }
2346399665d2SSong Gao 
2347399665d2SSong Gao void HELPER(vfcvt_s_d)(CPULoongArchState *env,
2348399665d2SSong Gao                        uint32_t vd, uint32_t vj, uint32_t vk)
2349399665d2SSong Gao {
2350399665d2SSong Gao     int i;
2351399665d2SSong Gao     VReg temp;
2352399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2353399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2354399665d2SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
2355399665d2SSong Gao 
2356399665d2SSong Gao     vec_clear_cause(env);
2357399665d2SSong Gao     for(i = 0; i < LSX_LEN/64; i++) {
2358399665d2SSong Gao         temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status);
2359399665d2SSong Gao         temp.UW(i)  = float64_cvt_float32(Vk->UD(i), &env->fp_status);
2360399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2361399665d2SSong Gao     }
2362399665d2SSong Gao     *Vd = temp;
2363399665d2SSong Gao }
2364399665d2SSong Gao 
2365399665d2SSong Gao void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2366399665d2SSong Gao {
2367399665d2SSong Gao     int i;
2368399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2369399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2370399665d2SSong Gao 
2371399665d2SSong Gao     vec_clear_cause(env);
2372399665d2SSong Gao     for (i = 0; i < 4; i++) {
2373399665d2SSong Gao         Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
2374399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2375399665d2SSong Gao     }
2376399665d2SSong Gao }
2377399665d2SSong Gao 
2378399665d2SSong Gao void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2379399665d2SSong Gao {
2380399665d2SSong Gao     int i;
2381399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2382399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2383399665d2SSong Gao 
2384399665d2SSong Gao     vec_clear_cause(env);
2385399665d2SSong Gao     for (i = 0; i < 2; i++) {
2386399665d2SSong Gao         Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
2387399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2388399665d2SSong Gao     }
2389399665d2SSong Gao }
2390399665d2SSong Gao 
2391399665d2SSong Gao #define FCVT_2OP(NAME, BIT, E, MODE)                                        \
2392399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj)         \
2393399665d2SSong Gao {                                                                           \
2394399665d2SSong Gao     int i;                                                                  \
2395399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                        \
2396399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                        \
2397399665d2SSong Gao                                                                             \
2398399665d2SSong Gao     vec_clear_cause(env);                                                   \
2399399665d2SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                                     \
2400399665d2SSong Gao         FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
2401399665d2SSong Gao         set_float_rounding_mode(MODE, &env->fp_status);                     \
2402399665d2SSong Gao         Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
2403399665d2SSong Gao         set_float_rounding_mode(old_mode, &env->fp_status);                 \
2404399665d2SSong Gao         vec_update_fcsr0(env, GETPC());                                     \
2405399665d2SSong Gao     }                                                                       \
2406399665d2SSong Gao }
2407399665d2SSong Gao 
2408399665d2SSong Gao FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
2409399665d2SSong Gao FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
2410399665d2SSong Gao FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
2411399665d2SSong Gao FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
2412399665d2SSong Gao FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
2413399665d2SSong Gao FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
2414399665d2SSong Gao FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
2415399665d2SSong Gao FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
2416399665d2SSong Gao 
2417399665d2SSong Gao #define FTINT(NAME, FMT1, FMT2, T1, T2,  MODE)                          \
2418399665d2SSong Gao static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj)               \
2419399665d2SSong Gao {                                                                       \
2420399665d2SSong Gao     T2 fd;                                                              \
2421399665d2SSong Gao     FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
2422399665d2SSong Gao                                                                         \
2423399665d2SSong Gao     set_float_rounding_mode(MODE, &env->fp_status);                     \
2424399665d2SSong Gao     fd = do_## FMT1 ##_to_## FMT2(env, fj);                             \
2425399665d2SSong Gao     set_float_rounding_mode(old_mode, &env->fp_status);                 \
2426399665d2SSong Gao     return fd;                                                          \
2427399665d2SSong Gao }
2428399665d2SSong Gao 
2429399665d2SSong Gao #define DO_FTINT(FMT1, FMT2, T1, T2)                                         \
2430399665d2SSong Gao static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj)            \
2431399665d2SSong Gao {                                                                            \
2432399665d2SSong Gao     T2 fd;                                                                   \
2433399665d2SSong Gao                                                                              \
2434399665d2SSong Gao     fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);                            \
2435399665d2SSong Gao     if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
2436399665d2SSong Gao         if (FMT1 ##_is_any_nan(fj)) {                                        \
2437399665d2SSong Gao             fd = 0;                                                          \
2438399665d2SSong Gao         }                                                                    \
2439399665d2SSong Gao     }                                                                        \
2440399665d2SSong Gao     vec_update_fcsr0(env, GETPC());                                          \
2441399665d2SSong Gao     return fd;                                                               \
2442399665d2SSong Gao }
2443399665d2SSong Gao 
2444399665d2SSong Gao DO_FTINT(float32, int32, uint32_t, uint32_t)
2445399665d2SSong Gao DO_FTINT(float64, int64, uint64_t, uint64_t)
2446399665d2SSong Gao DO_FTINT(float32, uint32, uint32_t, uint32_t)
2447399665d2SSong Gao DO_FTINT(float64, uint64, uint64_t, uint64_t)
2448399665d2SSong Gao DO_FTINT(float64, int32, uint64_t, uint32_t)
2449399665d2SSong Gao DO_FTINT(float32, int64, uint32_t, uint64_t)
2450399665d2SSong Gao 
2451399665d2SSong Gao FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
2452399665d2SSong Gao FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
2453399665d2SSong Gao FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
2454399665d2SSong Gao FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
2455399665d2SSong Gao FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
2456399665d2SSong Gao FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
2457399665d2SSong Gao FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
2458399665d2SSong Gao FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
2459399665d2SSong Gao 
2460399665d2SSong Gao DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
2461399665d2SSong Gao DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
2462399665d2SSong Gao DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
2463399665d2SSong Gao DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
2464399665d2SSong Gao DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
2465399665d2SSong Gao DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
2466399665d2SSong Gao DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
2467399665d2SSong Gao DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
2468399665d2SSong Gao DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
2469399665d2SSong Gao DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
2470399665d2SSong Gao 
2471399665d2SSong Gao FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
2472399665d2SSong Gao FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
2473399665d2SSong Gao 
2474399665d2SSong Gao DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
2475399665d2SSong Gao DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
2476399665d2SSong Gao DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
2477399665d2SSong Gao DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
2478399665d2SSong Gao 
2479399665d2SSong Gao FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
2480399665d2SSong Gao FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
2481399665d2SSong Gao FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
2482399665d2SSong Gao FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
2483399665d2SSong Gao 
2484399665d2SSong Gao #define FTINT_W_D(NAME, FN)                              \
2485399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2486399665d2SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2487399665d2SSong Gao {                                                        \
2488399665d2SSong Gao     int i;                                               \
2489399665d2SSong Gao     VReg temp;                                           \
2490399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2491399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2492399665d2SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2493399665d2SSong Gao                                                          \
2494399665d2SSong Gao     vec_clear_cause(env);                                \
2495399665d2SSong Gao     for (i = 0; i < 2; i++) {                            \
2496399665d2SSong Gao         temp.W(i + 2) = FN(env, Vj->UD(i));              \
2497399665d2SSong Gao         temp.W(i) = FN(env, Vk->UD(i));                  \
2498399665d2SSong Gao     }                                                    \
2499399665d2SSong Gao     *Vd = temp;                                          \
2500399665d2SSong Gao }
2501399665d2SSong Gao 
2502399665d2SSong Gao FTINT_W_D(vftint_w_d, do_float64_to_int32)
2503399665d2SSong Gao FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
2504399665d2SSong Gao FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
2505399665d2SSong Gao FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
2506399665d2SSong Gao FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
2507399665d2SSong Gao 
2508399665d2SSong Gao FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
2509399665d2SSong Gao FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
2510399665d2SSong Gao FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
2511399665d2SSong Gao FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
2512399665d2SSong Gao FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
2513399665d2SSong Gao FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
2514399665d2SSong Gao FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
2515399665d2SSong Gao FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
2516399665d2SSong Gao 
2517399665d2SSong Gao #define FTINTL_L_S(NAME, FN)                                        \
2518399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
2519399665d2SSong Gao {                                                                   \
2520399665d2SSong Gao     int i;                                                          \
2521399665d2SSong Gao     VReg temp;                                                      \
2522399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
2523399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2524399665d2SSong Gao                                                                     \
2525399665d2SSong Gao     vec_clear_cause(env);                                           \
2526399665d2SSong Gao     for (i = 0; i < 2; i++) {                                       \
2527399665d2SSong Gao         temp.D(i) = FN(env, Vj->UW(i));                             \
2528399665d2SSong Gao     }                                                               \
2529399665d2SSong Gao     *Vd = temp;                                                     \
2530399665d2SSong Gao }
2531399665d2SSong Gao 
2532399665d2SSong Gao FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
2533399665d2SSong Gao FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
2534399665d2SSong Gao FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
2535399665d2SSong Gao FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
2536399665d2SSong Gao FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
2537399665d2SSong Gao 
2538399665d2SSong Gao #define FTINTH_L_S(NAME, FN)                                        \
2539399665d2SSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
2540399665d2SSong Gao {                                                                   \
2541399665d2SSong Gao     int i;                                                          \
2542399665d2SSong Gao     VReg temp;                                                      \
2543399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                \
2544399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2545399665d2SSong Gao                                                                     \
2546399665d2SSong Gao     vec_clear_cause(env);                                           \
2547399665d2SSong Gao     for (i = 0; i < 2; i++) {                                       \
2548399665d2SSong Gao         temp.D(i) = FN(env, Vj->UW(i + 2));                         \
2549399665d2SSong Gao     }                                                               \
2550399665d2SSong Gao     *Vd = temp;                                                     \
2551399665d2SSong Gao }
2552399665d2SSong Gao 
2553399665d2SSong Gao FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
2554399665d2SSong Gao FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
2555399665d2SSong Gao FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
2556399665d2SSong Gao FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
2557399665d2SSong Gao FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
2558399665d2SSong Gao 
2559399665d2SSong Gao #define FFINT(NAME, FMT1, FMT2, T1, T2)                    \
2560399665d2SSong Gao static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
2561399665d2SSong Gao {                                                          \
2562399665d2SSong Gao     T2 fd;                                                 \
2563399665d2SSong Gao                                                            \
2564399665d2SSong Gao     fd = FMT1 ##_to_## FMT2(fj, &env->fp_status);          \
2565399665d2SSong Gao     vec_update_fcsr0(env, GETPC());                        \
2566399665d2SSong Gao     return fd;                                             \
2567399665d2SSong Gao }
2568399665d2SSong Gao 
2569399665d2SSong Gao FFINT(s_w, int32, float32, int32_t, uint32_t)
2570399665d2SSong Gao FFINT(d_l, int64, float64, int64_t, uint64_t)
2571399665d2SSong Gao FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
2572399665d2SSong Gao FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
2573399665d2SSong Gao 
2574399665d2SSong Gao DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
2575399665d2SSong Gao DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
2576399665d2SSong Gao DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
2577399665d2SSong Gao DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
2578399665d2SSong Gao 
2579399665d2SSong Gao void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2580399665d2SSong Gao {
2581399665d2SSong Gao     int i;
2582399665d2SSong Gao     VReg temp;
2583399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2584399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2585399665d2SSong Gao 
2586399665d2SSong Gao     vec_clear_cause(env);
2587399665d2SSong Gao     for (i = 0; i < 2; i++) {
2588399665d2SSong Gao         temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status);
2589399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2590399665d2SSong Gao     }
2591399665d2SSong Gao     *Vd = temp;
2592399665d2SSong Gao }
2593399665d2SSong Gao 
2594399665d2SSong Gao void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
2595399665d2SSong Gao {
2596399665d2SSong Gao     int i;
2597399665d2SSong Gao     VReg temp;
2598399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2599399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2600399665d2SSong Gao 
2601399665d2SSong Gao     vec_clear_cause(env);
2602399665d2SSong Gao     for (i = 0; i < 2; i++) {
2603399665d2SSong Gao         temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status);
2604399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2605399665d2SSong Gao     }
2606399665d2SSong Gao     *Vd = temp;
2607399665d2SSong Gao }
2608399665d2SSong Gao 
2609399665d2SSong Gao void HELPER(vffint_s_l)(CPULoongArchState *env,
2610399665d2SSong Gao                         uint32_t vd, uint32_t vj, uint32_t vk)
2611399665d2SSong Gao {
2612399665d2SSong Gao     int i;
2613399665d2SSong Gao     VReg temp;
2614399665d2SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2615399665d2SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2616399665d2SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
2617399665d2SSong Gao 
2618399665d2SSong Gao     vec_clear_cause(env);
2619399665d2SSong Gao     for (i = 0; i < 2; i++) {
2620399665d2SSong Gao         temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status);
2621399665d2SSong Gao         temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status);
2622399665d2SSong Gao         vec_update_fcsr0(env, GETPC());
2623399665d2SSong Gao     }
2624399665d2SSong Gao     *Vd = temp;
2625399665d2SSong Gao }
2626f435e1e5SSong Gao 
2627f435e1e5SSong Gao #define VSEQ(a, b) (a == b ? -1 : 0)
2628f435e1e5SSong Gao #define VSLE(a, b) (a <= b ? -1 : 0)
2629f435e1e5SSong Gao #define VSLT(a, b) (a < b ? -1 : 0)
2630f435e1e5SSong Gao 
2631f435e1e5SSong Gao #define VCMPI(NAME, BIT, E, DO_OP)                              \
2632f435e1e5SSong Gao void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
2633f435e1e5SSong Gao {                                                               \
2634f435e1e5SSong Gao     int i;                                                      \
2635f435e1e5SSong Gao     VReg *Vd = (VReg *)vd;                                      \
2636f435e1e5SSong Gao     VReg *Vj = (VReg *)vj;                                      \
2637f435e1e5SSong Gao     typedef __typeof(Vd->E(0)) TD;                              \
2638f435e1e5SSong Gao                                                                 \
2639f435e1e5SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                         \
2640f435e1e5SSong Gao         Vd->E(i) = DO_OP(Vj->E(i), (TD)imm);                    \
2641f435e1e5SSong Gao     }                                                           \
2642f435e1e5SSong Gao }
2643f435e1e5SSong Gao 
2644f435e1e5SSong Gao VCMPI(vseqi_b, 8, B, VSEQ)
2645f435e1e5SSong Gao VCMPI(vseqi_h, 16, H, VSEQ)
2646f435e1e5SSong Gao VCMPI(vseqi_w, 32, W, VSEQ)
2647f435e1e5SSong Gao VCMPI(vseqi_d, 64, D, VSEQ)
2648f435e1e5SSong Gao VCMPI(vslei_b, 8, B, VSLE)
2649f435e1e5SSong Gao VCMPI(vslei_h, 16, H, VSLE)
2650f435e1e5SSong Gao VCMPI(vslei_w, 32, W, VSLE)
2651f435e1e5SSong Gao VCMPI(vslei_d, 64, D, VSLE)
2652f435e1e5SSong Gao VCMPI(vslei_bu, 8, UB, VSLE)
2653f435e1e5SSong Gao VCMPI(vslei_hu, 16, UH, VSLE)
2654f435e1e5SSong Gao VCMPI(vslei_wu, 32, UW, VSLE)
2655f435e1e5SSong Gao VCMPI(vslei_du, 64, UD, VSLE)
2656f435e1e5SSong Gao VCMPI(vslti_b, 8, B, VSLT)
2657f435e1e5SSong Gao VCMPI(vslti_h, 16, H, VSLT)
2658f435e1e5SSong Gao VCMPI(vslti_w, 32, W, VSLT)
2659f435e1e5SSong Gao VCMPI(vslti_d, 64, D, VSLT)
2660f435e1e5SSong Gao VCMPI(vslti_bu, 8, UB, VSLT)
2661f435e1e5SSong Gao VCMPI(vslti_hu, 16, UH, VSLT)
2662f435e1e5SSong Gao VCMPI(vslti_wu, 32, UW, VSLT)
2663f435e1e5SSong Gao VCMPI(vslti_du, 64, UD, VSLT)
2664386c4e86SSong Gao 
2665386c4e86SSong Gao static uint64_t vfcmp_common(CPULoongArchState *env,
2666386c4e86SSong Gao                              FloatRelation cmp, uint32_t flags)
2667386c4e86SSong Gao {
2668386c4e86SSong Gao     uint64_t ret = 0;
2669386c4e86SSong Gao 
2670386c4e86SSong Gao     switch (cmp) {
2671386c4e86SSong Gao     case float_relation_less:
2672386c4e86SSong Gao         ret = (flags & FCMP_LT);
2673386c4e86SSong Gao         break;
2674386c4e86SSong Gao     case float_relation_equal:
2675386c4e86SSong Gao         ret = (flags & FCMP_EQ);
2676386c4e86SSong Gao         break;
2677386c4e86SSong Gao     case float_relation_greater:
2678386c4e86SSong Gao         ret = (flags & FCMP_GT);
2679386c4e86SSong Gao         break;
2680386c4e86SSong Gao     case float_relation_unordered:
2681386c4e86SSong Gao         ret = (flags & FCMP_UN);
2682386c4e86SSong Gao         break;
2683386c4e86SSong Gao     default:
2684386c4e86SSong Gao         g_assert_not_reached();
2685386c4e86SSong Gao     }
2686386c4e86SSong Gao 
2687386c4e86SSong Gao     if (ret) {
2688386c4e86SSong Gao         ret = -1;
2689386c4e86SSong Gao     }
2690386c4e86SSong Gao 
2691386c4e86SSong Gao     return ret;
2692386c4e86SSong Gao }
2693386c4e86SSong Gao 
2694386c4e86SSong Gao #define VFCMP(NAME, BIT, E, FN)                                          \
2695386c4e86SSong Gao void HELPER(NAME)(CPULoongArchState *env,                                \
2696386c4e86SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
2697386c4e86SSong Gao {                                                                        \
2698386c4e86SSong Gao     int i;                                                               \
2699386c4e86SSong Gao     VReg t;                                                              \
2700386c4e86SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                                     \
2701386c4e86SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                     \
2702386c4e86SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                                     \
2703386c4e86SSong Gao                                                                          \
2704386c4e86SSong Gao     vec_clear_cause(env);                                                \
2705386c4e86SSong Gao     for (i = 0; i < LSX_LEN/BIT ; i++) {                                 \
2706386c4e86SSong Gao         FloatRelation cmp;                                               \
2707386c4e86SSong Gao         cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status);                   \
2708386c4e86SSong Gao         t.E(i) = vfcmp_common(env, cmp, flags);                          \
2709386c4e86SSong Gao         vec_update_fcsr0(env, GETPC());                                  \
2710386c4e86SSong Gao     }                                                                    \
2711386c4e86SSong Gao     *Vd = t;                                                             \
2712386c4e86SSong Gao }
2713386c4e86SSong Gao 
2714386c4e86SSong Gao VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
2715386c4e86SSong Gao VFCMP(vfcmp_s_s, 32, UW, float32_compare)
2716386c4e86SSong Gao VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
2717386c4e86SSong Gao VFCMP(vfcmp_s_d, 64, UD, float64_compare)
2718d0dfa19aSSong Gao 
2719d0dfa19aSSong Gao void HELPER(vbitseli_b)(void *vd, void *vj,  uint64_t imm, uint32_t v)
2720d0dfa19aSSong Gao {
2721d0dfa19aSSong Gao     int i;
2722d0dfa19aSSong Gao     VReg *Vd = (VReg *)vd;
2723d0dfa19aSSong Gao     VReg *Vj = (VReg *)vj;
2724d0dfa19aSSong Gao 
2725d0dfa19aSSong Gao     for (i = 0; i < 16; i++) {
2726d0dfa19aSSong Gao         Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
2727d0dfa19aSSong Gao     }
2728d0dfa19aSSong Gao }
2729d0dfa19aSSong Gao 
2730d0dfa19aSSong Gao /* Copy from target/arm/tcg/sve_helper.c */
2731d0dfa19aSSong Gao static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
2732d0dfa19aSSong Gao {
2733d0dfa19aSSong Gao     uint64_t bits = 8 << esz;
2734d0dfa19aSSong Gao     uint64_t ones = dup_const(esz, 1);
2735d0dfa19aSSong Gao     uint64_t signs = ones << (bits - 1);
2736d0dfa19aSSong Gao     uint64_t cmp0, cmp1;
2737d0dfa19aSSong Gao 
2738d0dfa19aSSong Gao     cmp1 = dup_const(esz, n);
2739d0dfa19aSSong Gao     cmp0 = cmp1 ^ m0;
2740d0dfa19aSSong Gao     cmp1 = cmp1 ^ m1;
2741d0dfa19aSSong Gao     cmp0 = (cmp0 - ones) & ~cmp0;
2742d0dfa19aSSong Gao     cmp1 = (cmp1 - ones) & ~cmp1;
2743d0dfa19aSSong Gao     return (cmp0 | cmp1) & signs;
2744d0dfa19aSSong Gao }
2745d0dfa19aSSong Gao 
2746d0dfa19aSSong Gao #define SETANYEQZ(NAME, MO)                                         \
2747d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
2748d0dfa19aSSong Gao {                                                                   \
2749d0dfa19aSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2750d0dfa19aSSong Gao                                                                     \
2751d0dfa19aSSong Gao     env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO);       \
2752d0dfa19aSSong Gao }
2753d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_b, MO_8)
2754d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_h, MO_16)
2755d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_w, MO_32)
2756d0dfa19aSSong Gao SETANYEQZ(vsetanyeqz_d, MO_64)
2757d0dfa19aSSong Gao 
2758d0dfa19aSSong Gao #define SETALLNEZ(NAME, MO)                                         \
2759d0dfa19aSSong Gao void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
2760d0dfa19aSSong Gao {                                                                   \
2761d0dfa19aSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                                \
2762d0dfa19aSSong Gao                                                                     \
2763d0dfa19aSSong Gao     env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO);       \
2764d0dfa19aSSong Gao }
2765d0dfa19aSSong Gao SETALLNEZ(vsetallnez_b, MO_8)
2766d0dfa19aSSong Gao SETALLNEZ(vsetallnez_h, MO_16)
2767d0dfa19aSSong Gao SETALLNEZ(vsetallnez_w, MO_32)
2768d0dfa19aSSong Gao SETALLNEZ(vsetallnez_d, MO_64)
2769d5e5563cSSong Gao 
2770d5e5563cSSong Gao #define VPACKEV(NAME, BIT, E)                            \
2771d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2772d5e5563cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2773d5e5563cSSong Gao {                                                        \
2774d5e5563cSSong Gao     int i;                                               \
2775d5e5563cSSong Gao     VReg temp;                                           \
2776d5e5563cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2777d5e5563cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2778d5e5563cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2779d5e5563cSSong Gao                                                          \
2780d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2781d5e5563cSSong Gao         temp.E(2 * i + 1) = Vj->E(2 * i);                \
2782d5e5563cSSong Gao         temp.E(2 *i) = Vk->E(2 * i);                     \
2783d5e5563cSSong Gao     }                                                    \
2784d5e5563cSSong Gao     *Vd = temp;                                          \
2785d5e5563cSSong Gao }
2786d5e5563cSSong Gao 
2787d5e5563cSSong Gao VPACKEV(vpackev_b, 16, B)
2788d5e5563cSSong Gao VPACKEV(vpackev_h, 32, H)
2789d5e5563cSSong Gao VPACKEV(vpackev_w, 64, W)
2790d5e5563cSSong Gao VPACKEV(vpackev_d, 128, D)
2791d5e5563cSSong Gao 
2792d5e5563cSSong Gao #define VPACKOD(NAME, BIT, E)                            \
2793d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2794d5e5563cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2795d5e5563cSSong Gao {                                                        \
2796d5e5563cSSong Gao     int i;                                               \
2797d5e5563cSSong Gao     VReg temp;                                           \
2798d5e5563cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2799d5e5563cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2800d5e5563cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2801d5e5563cSSong Gao                                                          \
2802d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2803d5e5563cSSong Gao         temp.E(2 * i + 1) = Vj->E(2 * i + 1);            \
2804d5e5563cSSong Gao         temp.E(2 * i) = Vk->E(2 * i + 1);                \
2805d5e5563cSSong Gao     }                                                    \
2806d5e5563cSSong Gao     *Vd = temp;                                          \
2807d5e5563cSSong Gao }
2808d5e5563cSSong Gao 
2809d5e5563cSSong Gao VPACKOD(vpackod_b, 16, B)
2810d5e5563cSSong Gao VPACKOD(vpackod_h, 32, H)
2811d5e5563cSSong Gao VPACKOD(vpackod_w, 64, W)
2812d5e5563cSSong Gao VPACKOD(vpackod_d, 128, D)
2813d5e5563cSSong Gao 
2814d5e5563cSSong Gao #define VPICKEV(NAME, BIT, E)                            \
2815d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2816d5e5563cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2817d5e5563cSSong Gao {                                                        \
2818d5e5563cSSong Gao     int i;                                               \
2819d5e5563cSSong Gao     VReg temp;                                           \
2820d5e5563cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2821d5e5563cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2822d5e5563cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2823d5e5563cSSong Gao                                                          \
2824d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2825d5e5563cSSong Gao         temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i);          \
2826d5e5563cSSong Gao         temp.E(i) = Vk->E(2 * i);                        \
2827d5e5563cSSong Gao     }                                                    \
2828d5e5563cSSong Gao     *Vd = temp;                                          \
2829d5e5563cSSong Gao }
2830d5e5563cSSong Gao 
2831d5e5563cSSong Gao VPICKEV(vpickev_b, 16, B)
2832d5e5563cSSong Gao VPICKEV(vpickev_h, 32, H)
2833d5e5563cSSong Gao VPICKEV(vpickev_w, 64, W)
2834d5e5563cSSong Gao VPICKEV(vpickev_d, 128, D)
2835d5e5563cSSong Gao 
2836d5e5563cSSong Gao #define VPICKOD(NAME, BIT, E)                            \
2837d5e5563cSSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2838d5e5563cSSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2839d5e5563cSSong Gao {                                                        \
2840d5e5563cSSong Gao     int i;                                               \
2841d5e5563cSSong Gao     VReg temp;                                           \
2842d5e5563cSSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2843d5e5563cSSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2844d5e5563cSSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2845d5e5563cSSong Gao                                                          \
2846d5e5563cSSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2847d5e5563cSSong Gao         temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1);      \
2848d5e5563cSSong Gao         temp.E(i) = Vk->E(2 * i + 1);                    \
2849d5e5563cSSong Gao     }                                                    \
2850d5e5563cSSong Gao     *Vd = temp;                                          \
2851d5e5563cSSong Gao }
2852d5e5563cSSong Gao 
2853d5e5563cSSong Gao VPICKOD(vpickod_b, 16, B)
2854d5e5563cSSong Gao VPICKOD(vpickod_h, 32, H)
2855d5e5563cSSong Gao VPICKOD(vpickod_w, 64, W)
2856d5e5563cSSong Gao VPICKOD(vpickod_d, 128, D)
2857e93dd431SSong Gao 
2858e93dd431SSong Gao #define VILVL(NAME, BIT, E)                              \
2859e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2860e93dd431SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2861e93dd431SSong Gao {                                                        \
2862e93dd431SSong Gao     int i;                                               \
2863e93dd431SSong Gao     VReg temp;                                           \
2864e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2865e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2866e93dd431SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2867e93dd431SSong Gao                                                          \
2868e93dd431SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2869e93dd431SSong Gao         temp.E(2 * i + 1) = Vj->E(i);                    \
2870e93dd431SSong Gao         temp.E(2 * i) = Vk->E(i);                        \
2871e93dd431SSong Gao     }                                                    \
2872e93dd431SSong Gao     *Vd = temp;                                          \
2873e93dd431SSong Gao }
2874e93dd431SSong Gao 
2875e93dd431SSong Gao VILVL(vilvl_b, 16, B)
2876e93dd431SSong Gao VILVL(vilvl_h, 32, H)
2877e93dd431SSong Gao VILVL(vilvl_w, 64, W)
2878e93dd431SSong Gao VILVL(vilvl_d, 128, D)
2879e93dd431SSong Gao 
2880e93dd431SSong Gao #define VILVH(NAME, BIT, E)                              \
2881e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2882e93dd431SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2883e93dd431SSong Gao {                                                        \
2884e93dd431SSong Gao     int i;                                               \
2885e93dd431SSong Gao     VReg temp;                                           \
2886e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2887e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2888e93dd431SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2889e93dd431SSong Gao                                                          \
2890e93dd431SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                  \
2891e93dd431SSong Gao         temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT);      \
2892e93dd431SSong Gao         temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT);          \
2893e93dd431SSong Gao     }                                                    \
2894e93dd431SSong Gao     *Vd = temp;                                          \
2895e93dd431SSong Gao }
2896e93dd431SSong Gao 
2897e93dd431SSong Gao VILVH(vilvh_b, 16, B)
2898e93dd431SSong Gao VILVH(vilvh_h, 32, H)
2899e93dd431SSong Gao VILVH(vilvh_w, 64, W)
2900e93dd431SSong Gao VILVH(vilvh_d, 128, D)
2901e93dd431SSong Gao 
2902e93dd431SSong Gao void HELPER(vshuf_b)(CPULoongArchState *env,
2903e93dd431SSong Gao                      uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)
2904e93dd431SSong Gao {
2905e93dd431SSong Gao     int i, m;
2906e93dd431SSong Gao     VReg temp;
2907e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2908e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2909e93dd431SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);
2910e93dd431SSong Gao     VReg *Va = &(env->fpr[va].vreg);
2911e93dd431SSong Gao 
2912e93dd431SSong Gao     m = LSX_LEN/8;
2913e93dd431SSong Gao     for (i = 0; i < m ; i++) {
2914e93dd431SSong Gao         uint64_t k = (uint8_t)Va->B(i) % (2 * m);
2915e93dd431SSong Gao         temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
2916e93dd431SSong Gao     }
2917e93dd431SSong Gao     *Vd = temp;
2918e93dd431SSong Gao }
2919e93dd431SSong Gao 
2920e93dd431SSong Gao #define VSHUF(NAME, BIT, E)                              \
2921e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env,                \
2922e93dd431SSong Gao                   uint32_t vd, uint32_t vj, uint32_t vk) \
2923e93dd431SSong Gao {                                                        \
2924e93dd431SSong Gao     int i, m;                                            \
2925e93dd431SSong Gao     VReg temp;                                           \
2926e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                     \
2927e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                     \
2928e93dd431SSong Gao     VReg *Vk = &(env->fpr[vk].vreg);                     \
2929e93dd431SSong Gao                                                          \
2930e93dd431SSong Gao     m = LSX_LEN/BIT;                                     \
2931e93dd431SSong Gao     for (i = 0; i < m; i++) {                            \
2932e93dd431SSong Gao         uint64_t k  = ((uint8_t) Vd->E(i)) % (2 * m);    \
2933e93dd431SSong Gao         temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m);     \
2934e93dd431SSong Gao     }                                                    \
2935e93dd431SSong Gao     *Vd = temp;                                          \
2936e93dd431SSong Gao }
2937e93dd431SSong Gao 
2938e93dd431SSong Gao VSHUF(vshuf_h, 16, H)
2939e93dd431SSong Gao VSHUF(vshuf_w, 32, W)
2940e93dd431SSong Gao VSHUF(vshuf_d, 64, D)
2941e93dd431SSong Gao 
2942e93dd431SSong Gao #define VSHUF4I(NAME, BIT, E)                             \
2943e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
2944e93dd431SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
2945e93dd431SSong Gao {                                                         \
2946e93dd431SSong Gao     int i;                                                \
2947e93dd431SSong Gao     VReg temp;                                            \
2948e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
2949e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
2950e93dd431SSong Gao                                                           \
2951e93dd431SSong Gao     for (i = 0; i < LSX_LEN/BIT; i++) {                   \
2952e93dd431SSong Gao          temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >>      \
2953e93dd431SSong Gao                            (2 * ((i) & 0x03))) & 0x03));  \
2954e93dd431SSong Gao     }                                                     \
2955e93dd431SSong Gao     *Vd = temp;                                           \
2956e93dd431SSong Gao }
2957e93dd431SSong Gao 
2958e93dd431SSong Gao VSHUF4I(vshuf4i_b, 8, B)
2959e93dd431SSong Gao VSHUF4I(vshuf4i_h, 16, H)
2960e93dd431SSong Gao VSHUF4I(vshuf4i_w, 32, W)
2961e93dd431SSong Gao 
2962e93dd431SSong Gao void HELPER(vshuf4i_d)(CPULoongArchState *env,
2963e93dd431SSong Gao                        uint32_t vd, uint32_t vj, uint32_t imm)
2964e93dd431SSong Gao {
2965e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2966e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2967e93dd431SSong Gao 
2968e93dd431SSong Gao     VReg temp;
2969e93dd431SSong Gao     temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
2970e93dd431SSong Gao     temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
2971e93dd431SSong Gao     *Vd = temp;
2972e93dd431SSong Gao }
2973e93dd431SSong Gao 
2974e93dd431SSong Gao void HELPER(vpermi_w)(CPULoongArchState *env,
2975e93dd431SSong Gao                       uint32_t vd, uint32_t vj, uint32_t imm)
2976e93dd431SSong Gao {
2977e93dd431SSong Gao     VReg temp;
2978e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);
2979e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);
2980e93dd431SSong Gao 
2981e93dd431SSong Gao     temp.W(0) = Vj->W(imm & 0x3);
2982e93dd431SSong Gao     temp.W(1) = Vj->W((imm >> 2) & 0x3);
2983e93dd431SSong Gao     temp.W(2) = Vd->W((imm >> 4) & 0x3);
2984e93dd431SSong Gao     temp.W(3) = Vd->W((imm >> 6) & 0x3);
2985e93dd431SSong Gao     *Vd = temp;
2986e93dd431SSong Gao }
2987e93dd431SSong Gao 
2988e93dd431SSong Gao #define VEXTRINS(NAME, BIT, E, MASK)                      \
2989e93dd431SSong Gao void HELPER(NAME)(CPULoongArchState *env,                 \
2990e93dd431SSong Gao                   uint32_t vd, uint32_t vj, uint32_t imm) \
2991e93dd431SSong Gao {                                                         \
2992e93dd431SSong Gao     int ins, extr;                                        \
2993e93dd431SSong Gao     VReg *Vd = &(env->fpr[vd].vreg);                      \
2994e93dd431SSong Gao     VReg *Vj = &(env->fpr[vj].vreg);                      \
2995e93dd431SSong Gao                                                           \
2996e93dd431SSong Gao     ins = (imm >> 4) & MASK;                              \
2997e93dd431SSong Gao     extr = imm & MASK;                                    \
2998e93dd431SSong Gao     Vd->E(ins) = Vj->E(extr);                             \
2999e93dd431SSong Gao }
3000e93dd431SSong Gao 
3001e93dd431SSong Gao VEXTRINS(vextrins_b, 8, B, 0xf)
3002e93dd431SSong Gao VEXTRINS(vextrins_h, 16, H, 0x7)
3003e93dd431SSong Gao VEXTRINS(vextrins_w, 32, W, 0x3)
3004e93dd431SSong Gao VEXTRINS(vextrins_d, 64, D, 0x1)
3005