Lines Matching +full:- +full:j
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
10 #include "exec/helper-proto.h"
15 #include "tcg/tcg-gvec-desc.h"
24 typedef __typeof(Vd->E1(0)) TD; \
28 Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
45 Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), in HELPER()
46 int128_makes64(Vk->D(2 * i))); in HELPER()
63 Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), in HELPER()
64 int128_makes64(Vk->D(2 * i))); in HELPER()
81 Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), in HELPER()
82 int128_make64(Vk->UD(2 * i))); in HELPER()
99 Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), in HELPER()
100 int128_make64(Vk->UD(2 * i))); in HELPER()
111 typedef __typeof(Vd->E1(0)) TD; \
115 Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
126 typedef __typeof(Vd->E1(0)) TD; \
130 Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
143 Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), in HELPER()
144 int128_makes64(Vk->D(2 * i))); in HELPER()
161 Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), in HELPER()
162 int128_makes64(Vk->D(2 * i +1))); in HELPER()
179 Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), in HELPER()
180 int128_makes64(Vk->D(2 * i))); in HELPER()
197 Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), in HELPER()
198 int128_makes64(Vk->D(2 * i + 1))); in HELPER()
215 Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), in HELPER()
216 int128_make64(Vk->UD(2 * i))); in HELPER()
233 Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), in HELPER()
234 int128_make64(Vk->UD(2 * i + 1))); in HELPER()
251 Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), in HELPER()
252 int128_make64(Vk->UD(2 * i))); in HELPER()
269 Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), in HELPER()
270 int128_make64(Vk->UD(2 * i + 1))); in HELPER()
285 typedef __typeof(Vd->ES1(0)) TDS; \
286 typedef __typeof(Vd->EU1(0)) TDU; \
290 Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
301 typedef __typeof(Vd->ES1(0)) TDS; \
302 typedef __typeof(Vd->EU1(0)) TDU; \
306 Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
319 Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), in HELPER()
320 int128_makes64(Vk->D(2 * i))); in HELPER()
337 Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), in HELPER()
338 int128_makes64(Vk->D(2 * i + 1))); in HELPER()
356 Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
396 Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \
411 typedef __typeof(Vd->E(0)) TD; \
415 Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
443 typedef __typeof(Vd->E1(0)) T; \
447 Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
461 muls64(&l, &h, Vj->D(i), Vk->D(i)); in HELPER()
462 Vd->D(i) = h; in HELPER()
480 mulu64(&l, &h, Vj->D(i), Vk->D(i)); in HELPER()
481 Vd->D(i) = h; in HELPER()
523 Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
543 typedef __typeof(Vd->E1(0)) TD; \
547 Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
565 typedef __typeof(Vd->E1(0)) TD; \
569 Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
570 (TD)Vk->E2(2 * i + 1)); \
588 typedef __typeof(Vd->ES1(0)) TS1; \
589 typedef __typeof(Vd->EU1(0)) TU1; \
593 Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
594 (TS1)Vk->ES2(2 * i)); \
609 typedef __typeof(Vd->ES1(0)) TS1; \
610 typedef __typeof(Vd->EU1(0)) TU1; \
614 Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
615 (TS1)Vk->ES2(2 * i + 1)); \
633 Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
660 typedef __typeof(Vd->E(0)) TD; \
664 Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
665 Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
680 typedef __typeof(Vd->E(0)) TD; \
684 Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
696 int i, j, ofs; \
703 for (j = 0; j < ofs; j++) { \
704 Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
717 Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1)); in HELPER()
729 Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1)); in HELPER()
750 temp.E1(i) = Vj->E2(i); \
793 temp = do_vmskltz_b(Vj->D(2 * i)); in HELPER()
794 temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); in HELPER()
795 Vd->D(2 * i) = temp; in HELPER()
796 Vd->D(2 * i + 1) = 0; in HELPER()
819 temp = do_vmskltz_h(Vj->D(2 * i)); in HELPER()
820 temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4); in HELPER()
821 Vd->D(2 * i) = temp; in HELPER()
822 Vd->D(2 * i + 1) = 0; in HELPER()
844 temp = do_vmskltz_w(Vj->D(2 * i)); in HELPER()
845 temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2); in HELPER()
846 Vd->D(2 * i) = temp; in HELPER()
847 Vd->D(2 * i + 1) = 0; in HELPER()
865 temp = do_vmskltz_d(Vj->D(2 * i)); in HELPER()
866 temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1); in HELPER()
867 Vd->D(2 * i) = temp; in HELPER()
868 Vd->D(2 * i + 1) = 0; in HELPER()
882 temp = do_vmskltz_b(Vj->D(2 * i)); in HELPER()
883 temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); in HELPER()
884 Vd->D(2 * i) = (uint16_t)(~temp); in HELPER()
885 Vd->D(2 * i + 1) = 0; in HELPER()
909 temp = do_vmskez_b(Vj->D(2 * i)); in HELPER()
910 temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8); in HELPER()
911 Vd->D(2 * i) = (uint16_t)(~temp); in HELPER()
912 Vd->D(2 * i + 1) = 0; in HELPER()
923 Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); in HELPER()
930 int i, j, ofs; \
939 for (j = 0; j < ofs; j++) { \
940 temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \
955 Vd->Q(i) = int128_makes64(Vj->D(2 * i)); in HELPER()
967 Vd->Q(i) = int128_make64(Vj->UD(2 * i)); in HELPER()
984 return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
1003 Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ in do_vsrlr()
1021 Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
1036 return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
1055 Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
1073 Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
1085 int i, j, ofs; \
1093 for (j = 0; j < ofs; j++) { \
1094 Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
1095 Vk->E2(j + ofs * i) % BIT); \
1097 Vd->D(2 * i + 1) = 0; \
1108 int i, j, ofs; \
1116 for (j = 0; j < ofs; j++) { \
1117 Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
1118 Vk->E3(j + ofs * i) % BIT); \
1120 Vd->D(2 * i + 1) = 0; \
1131 int i, j, ofs; \
1139 for (j = 0; j < ofs; j++) { \
1140 temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
1141 temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
1156 temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128));
1157 temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128));
1169 int i, j, ofs; \
1177 for (j = 0; j < ofs; j++) { \
1178 temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
1179 temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
1194 temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128)); in HELPER()
1195 temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128)); in HELPER()
1207 int i, j, ofs; \
1215 for (j = 0; j < ofs; j++) { \
1216 Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \
1217 Vk->E3(j + ofs * i) % BIT); \
1219 Vd->D(2 * i + 1) = 0; \
1230 int i, j, ofs; \
1238 for (j = 0; j < ofs; j++) { \
1239 Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \
1240 Vk->E3(j + ofs * i) % BIT); \
1242 Vd->D(2 * i + 1) = 0; \
1253 int i, j, ofs; \
1261 for (j = 0; j < ofs; j++) { \
1262 temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \
1263 temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \
1281 temp.D(2 * i) = int128_getlo(Vj->Q(i)); in HELPER()
1282 temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); in HELPER()
1284 r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)), in HELPER()
1286 r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)), in HELPER()
1288 temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i), in HELPER()
1290 temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i), in HELPER()
1304 int i, j, ofs; \
1312 for (j = 0; j < ofs; j++) { \
1313 temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \
1314 temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \
1332 temp.D(2 * i) = int128_getlo(Vj->Q(i)); in HELPER()
1333 temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); in HELPER()
1335 r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)), in HELPER()
1337 r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)), in HELPER()
1339 temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i), in HELPER()
1341 temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i), in HELPER()
1362 mask = (1ull << sh) -1; \
1377 int i, j, ofs; \ in SSRLNS()
1385 for (j = 0; j < ofs; j++) { \ in SSRLNS()
1386 Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ in SSRLNS()
1387 Vk->E3(j + ofs * i) % BIT, \ in SSRLNS()
1388 BIT / 2 - 1); \ in SSRLNS()
1390 Vd->D(2 * i + 1) = 0; \ in SSRLNS()
1408 mask = (1ll << sh) - 1; \
1411 } else if (shft_res < -(mask + 1)) { \
1425 int i, j, ofs; \
1433 for (j = 0; j < ofs; j++) { \
1434 Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
1435 Vk->E3(j + ofs * i) % BIT, \
1436 BIT / 2 - 1); \
1438 Vd->D(2 * i + 1) = 0; \
1456 mask = (1ull << sh) - 1; \
1471 int i, j, ofs; \
1479 for (j = 0; j < ofs; j++) { \
1480 Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
1481 Vk->E3(j + ofs * i) % BIT, \
1484 Vd->D(2 * i + 1) = 0; \
1505 mask = (1ull << sh) - 1; \
1520 int i, j, ofs; \
1528 for (j = 0; j < ofs; j++) { \
1529 Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
1530 Vk->E3(j + ofs * i) % BIT, \
1533 Vd->D(2 * i + 1) = 0; \
1544 int i, j, ofs; \
1552 for (j = 0; j < ofs; j++) { \
1553 temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
1554 imm, BIT / 2 - 1); \
1555 temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \
1556 imm, BIT / 2 - 1); \
1568 shft_res1 = Vj->Q(idx);
1569 shft_res2 = Vd->Q(idx);
1571 shft_res1 = int128_urshift(Vj->Q(idx), imm);
1572 shft_res2 = int128_urshift(Vd->Q(idx), imm);
1576 Vd->D(idx * 2) = int128_getlo(mask);
1578 Vd->D(idx * 2) = int128_getlo(shft_res1);
1582 Vd->D(idx * 2 + 1) = int128_getlo(mask);
1584 Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
1610 int i, j, ofs; \
1618 for (j = 0; j < ofs; j++) { \
1619 temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
1620 imm, BIT / 2 - 1); \
1621 temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \
1622 imm, BIT / 2 - 1); \
1634 shft_res1 = Vj->Q(idx); in do_vssrani_d_q()
1635 shft_res2 = Vd->Q(idx); in do_vssrani_d_q()
1637 shft_res1 = int128_rshift(Vj->Q(idx), imm); in do_vssrani_d_q()
1638 shft_res2 = int128_rshift(Vd->Q(idx), imm); in do_vssrani_d_q()
1642 Vd->D(idx * 2) = int128_getlo(mask); in do_vssrani_d_q()
1644 Vd->D(idx * 2) = int128_getlo(min); in do_vssrani_d_q()
1646 Vd->D(idx * 2) = int128_getlo(shft_res1); in do_vssrani_d_q()
1650 Vd->D(idx * 2 + 1) = int128_getlo(mask); in do_vssrani_d_q()
1652 Vd->D(idx * 2 + 1) = int128_getlo(min); in do_vssrani_d_q()
1654 Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); in do_vssrani_d_q()
1682 int i, j, ofs; \
1690 for (j = 0; j < ofs; j++) { \
1691 temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
1693 temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \
1722 int i, j, ofs; \
1730 for (j = 0; j < ofs; j++) { \
1731 temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
1733 temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \
1746 shft_res1 = Vj->Q(idx); in do_vssrani_du_q()
1747 shft_res2 = Vd->Q(idx); in do_vssrani_du_q()
1749 shft_res1 = int128_rshift(Vj->Q(idx), imm); in do_vssrani_du_q()
1750 shft_res2 = int128_rshift(Vd->Q(idx), imm); in do_vssrani_du_q()
1753 if (int128_lt(Vj->Q(idx), int128_zero())) { in do_vssrani_du_q()
1757 if (int128_lt(Vd->Q(idx), int128_zero())) { in do_vssrani_du_q()
1761 Vd->D(idx * 2) = int128_getlo(mask); in do_vssrani_du_q()
1763 Vd->D(idx * 2) = int128_getlo(shft_res1); in do_vssrani_du_q()
1767 Vd->D(idx * 2 + 1) = int128_getlo(mask); in do_vssrani_du_q()
1769 Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); in do_vssrani_du_q()
1800 mask = (1ull << sh) - 1; \
1815 int i, j, ofs; \ in SSRLRNS()
1823 for (j = 0; j < ofs; j++) { \ in SSRLRNS()
1824 Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ in SSRLRNS()
1825 Vk->E3(j + ofs * i) % BIT, \ in SSRLRNS()
1826 BIT / 2 - 1); \ in SSRLRNS()
1828 Vd->D(2 * i + 1) = 0; \ in SSRLRNS()
1843 mask = (1ll << sh) - 1; \
1846 } else if (shft_res < -(mask +1)) { \
1860 int i, j, ofs; \
1868 for (j = 0; j < ofs; j++) { \
1869 Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
1870 Vk->E3(j + ofs * i) % BIT, \
1871 BIT/ 2 - 1); \
1873 Vd->D(2 * i + 1) = 0; \
1889 mask = (1ull << sh) - 1; \
1904 int i, j, ofs; \
1912 for (j = 0; j < ofs; j++) { \
1913 Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
1914 Vk->E3(j + ofs * i) % BIT, \
1917 Vd->D(2 * i + 1) = 0; \
1936 mask = (1ull << sh) - 1; \
1951 int i, j, ofs; \
1959 for (j = 0; j < ofs; j++) { \
1960 Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
1961 Vk->E3(j + ofs * i) % BIT, \
1964 Vd->D(2 * i + 1) = 0; \
1975 int i, j, ofs; \
1983 for (j = 0; j < ofs; j++) { \
1984 temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
1985 imm, BIT / 2 - 1); \
1986 temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \
1987 imm, BIT / 2 - 1); \
1998 shft_res1 = Vj->Q(idx);
1999 shft_res2 = Vd->Q(idx);
2001 r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one());
2002 r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one());
2003 shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1));
2004 shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2));
2008 Vd->D(idx * 2) = int128_getlo(mask);
2010 Vd->D(idx * 2) = int128_getlo(shft_res1);
2014 Vd->D(idx * 2 + 1) = int128_getlo(mask);
2016 Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
2042 int i, j, ofs; \
2050 for (j = 0; j < ofs; j++) { \
2051 temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
2052 imm, BIT / 2 - 1); \
2053 temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \
2054 imm, BIT / 2 - 1); \
2066 shft_res1 = Vj->Q(idx); in do_vssrarni_d_q()
2067 shft_res2 = Vd->Q(idx); in do_vssrarni_d_q()
2069 r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); in do_vssrarni_d_q()
2070 r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); in do_vssrarni_d_q()
2071 shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); in do_vssrarni_d_q()
2072 shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); in do_vssrarni_d_q()
2075 Vd->D(idx * 2) = int128_getlo(mask1); in do_vssrarni_d_q()
2077 Vd->D(idx * 2) = int128_getlo(mask2); in do_vssrarni_d_q()
2079 Vd->D(idx * 2) = int128_getlo(shft_res1); in do_vssrarni_d_q()
2083 Vd->D(idx * 2 + 1) = int128_getlo(mask1); in do_vssrarni_d_q()
2085 Vd->D(idx * 2 + 1) = int128_getlo(mask2); in do_vssrarni_d_q()
2087 Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); in do_vssrarni_d_q()
2114 int i, j, ofs; \
2122 for (j = 0; j < ofs; j++) { \
2123 temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
2125 temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \
2154 int i, j, ofs; \
2162 for (j = 0; j < ofs; j++) { \
2163 temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
2165 temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \
2178 shft_res1 = Vj->Q(idx); in do_vssrarni_du_q()
2179 shft_res2 = Vd->Q(idx); in do_vssrarni_du_q()
2181 r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); in do_vssrarni_du_q()
2182 r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); in do_vssrarni_du_q()
2183 shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); in do_vssrarni_du_q()
2184 shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); in do_vssrarni_du_q()
2187 if (int128_lt(Vj->Q(idx), int128_zero())) { in do_vssrarni_du_q()
2190 if (int128_lt(Vd->Q(idx), int128_zero())) { in do_vssrarni_du_q()
2195 Vd->D(idx * 2) = int128_getlo(mask1); in do_vssrarni_du_q()
2197 Vd->D(idx * 2) = int128_getlo(mask2); in do_vssrarni_du_q()
2199 Vd->D(idx * 2) = int128_getlo(shft_res1); in do_vssrarni_du_q()
2203 Vd->D(idx * 2 + 1) = int128_getlo(mask1); in do_vssrarni_du_q()
2205 Vd->D(idx * 2 + 1) = int128_getlo(mask2); in do_vssrarni_du_q()
2207 Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); in do_vssrarni_du_q()
2241 Vd->E(i) = DO_OP(Vj->E(i)); \
2264 Vd->E(i) = FN(Vj->E(i)); \
2283 Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
2309 Vd->E(i) = DO_OP(Vj->E(i), imm); \
2329 int i, j, m, ofs; \
2337 m = Vk->E(i * ofs) & MASK; \
2338 for (j = 0; j < ofs; j++) { \
2339 if (Vj->E(j + ofs * i) < 0) { \
2343 Vd->E(m + i * ofs) = j; \
2353 int i, j, m, ofs; \
2361 for (j = 0; j < ofs; j++) { \
2362 if (Vj->E(j + ofs * i) < 0) { \
2366 Vd->E(m + i * ofs) = j; \
2376 int flags = get_float_exception_flags(&env->fp_status); in vec_update_fcsr0_mask()
2378 set_float_exception_flags(0, &env->fp_status); in vec_update_fcsr0_mask()
2384 UPDATE_FP_CAUSE(env->fcsr0, flags); in vec_update_fcsr0_mask()
2387 if (GET_FP_ENABLES(env->fcsr0) & flags) { in vec_update_fcsr0_mask()
2390 UPDATE_FP_FLAGS(env->fcsr0, flags); in vec_update_fcsr0_mask()
2401 SET_FP_CAUSE(env->fcsr0, 0); in vec_clear_cause()
2416 Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
2451 Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
2478 Vd->E(i) = FN(env, Vj->E(i)); \
2486 float_status *status = &env->fp_status; \
2510 Vd->E(i) = FN(env, Vj->E(i)); \
2521 fd = float ## BIT ##_sqrt(fj, &env->fp_status); \
2533 fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
2545 fp = float ## BIT ##_sqrt(fj, &env->fp_status); \
2546 fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
2584 int i, j, ofs; in HELPER() local
2593 for (j = 0; j < ofs; j++) { in HELPER()
2594 temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i), in HELPER()
2595 &env->fp_status); in HELPER()
2605 int i, j, ofs; in HELPER() local
2614 for (j = 0; j < ofs; j++) { in HELPER()
2615 temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i), in HELPER()
2616 &env->fp_status); in HELPER()
2626 int i, j, ofs; in HELPER() local
2635 for (j = 0; j < ofs; j++) { in HELPER()
2636 temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)), in HELPER()
2637 &env->fp_status); in HELPER()
2647 int i, j, ofs; in HELPER() local
2656 for (j = 0; j < ofs; j++) { in HELPER()
2657 temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)), in HELPER()
2658 &env->fp_status); in HELPER()
2668 int i, j, ofs; in HELPER() local
2678 for (j = 0; j < ofs; j++) { in HELPER()
2679 temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i), in HELPER()
2680 &env->fp_status); in HELPER()
2681 temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i), in HELPER()
2682 &env->fp_status); in HELPER()
2692 int i, j, ofs; in HELPER() local
2702 for (j = 0; j < ofs; j++) { in HELPER()
2703 temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i), in HELPER()
2704 &env->fp_status); in HELPER()
2705 temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i), in HELPER()
2706 &env->fp_status); in HELPER()
2723 Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); in HELPER()
2738 Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); in HELPER()
2754 FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
2755 set_float_rounding_mode(MODE, &env->fp_status); \
2756 Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
2757 set_float_rounding_mode(old_mode, &env->fp_status); \
2775 FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
2777 set_float_rounding_mode(MODE, &env->fp_status); \
2779 set_float_rounding_mode(old_mode, &env->fp_status); \
2788 fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
2789 if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
2842 int i, j, ofs; \ in DO_FTINT()
2852 for (j = 0; j < ofs; j++) { \ in DO_FTINT()
2853 temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \ in DO_FTINT()
2854 temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \ in DO_FTINT()
2879 int i, j, ofs; \
2888 for (j = 0; j < ofs; j++) { \
2889 temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \
2905 int i, j, ofs; \
2914 for (j = 0; j < ofs; j++) { \
2915 temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \
2932 fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
2950 int i, j, ofs; local
2959 for (j = 0; j < ofs; j++) {
2960 temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i),
2961 &env->fp_status);
2971 int i, j, ofs; in HELPER() local
2980 for (j = 0; j < ofs; j++) { in HELPER()
2981 temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)), in HELPER()
2982 &env->fp_status); in HELPER()
2992 int i, j, ofs; in HELPER() local
3002 for (j = 0; j < ofs; j++) { in HELPER()
3003 temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i), in HELPER()
3004 &env->fp_status); in HELPER()
3005 temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i), in HELPER()
3006 &env->fp_status); in HELPER()
3019 typedef __typeof(Vd->E(0)) TD; \
3023 Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
3071 ret = -1; in vfcmp_common()
3083 VReg *Vd = &(env->fpr[vd].vreg); \
3084 VReg *Vj = &(env->fpr[vj].vreg); \
3085 VReg *Vk = &(env->fpr[vk].vreg); \
3090 cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
3109 Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); in HELPER()
3118 uint64_t signs = ones << (bits - 1); in do_match2()
3124 cmp0 = (cmp0 - ones) & ~cmp0; in do_match2()
3125 cmp1 = (cmp1 - ones) & ~cmp1; in do_match2()
3133 VReg *Vj = &(env->fpr[vj].vreg); \
3135 env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
3137 env->cf[cd & 0x7] = env->cf[cd & 0x7] || \
3138 do_match2(0, Vj->D(2), Vj->D(3), MO); \
3151 VReg *Vj = &(env->fpr[vj].vreg); \ in SETANYEQZ()
3153 env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ in SETANYEQZ()
3155 env->cf[cd & 0x7] = env->cf[cd & 0x7] && \ in SETANYEQZ()
3156 !do_match2(0, Vj->D(2), Vj->D(3), MO); \ in SETANYEQZ()
3170 Vd->E(imm & MASK) = Vj->E(0); \
3184 Vd->E(0) = Vj->E(imm & MASK); \
3186 Vd->E(i) = 0; \
3204 temp.E(2 * i + 1) = Vj->E(2 * i); \
3205 temp.E(2 *i) = Vk->E(2 * i); \
3226 temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
3227 temp.E(2 * i) = Vk->E(2 * i + 1); \
3240 int i, j, ofs; \
3249 for (j = 0; j < ofs; j++) { \
3250 temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \
3251 temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \
3265 int i, j, ofs; \
3274 for (j = 0; j < ofs; j++) { \
3275 temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \
3276 temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \
3290 int i, j, ofs; \
3299 for (j = 0; j < ofs; j++) { \
3300 temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \
3301 temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \
3315 int i, j, ofs; \
3324 for (j = 0; j < ofs; j++) { \
3325 temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \
3326 temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \
3339 int i, j, m; local
3349 j = i < m ? 0 : 1;
3350 uint64_t k = (uint8_t)Va->B(i) % (2 * m);
3351 temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m);
3359 int i, j, m; \
3368 j = i < m ? 0 : 1; \
3369 uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \
3370 temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \
3382 int i, j, max; \
3390 j = i < max ? 1 : 2; \
3391 temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \
3409 temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i); in HELPER()
3410 temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i); in HELPER()
3425 uint64_t k = (uint8_t)Vk->W(i) % 8; in HELPER()
3426 temp.W(i) = Vj->W(k); in HELPER()
3440 temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i); in HELPER()
3441 temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i); in HELPER()
3442 temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i); in HELPER()
3443 temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i); in HELPER()
3454 temp.D(0) = Vj->D(imm & 0x3); in HELPER()
3455 temp.D(1) = Vj->D((imm >> 2) & 0x3); in HELPER()
3456 temp.D(2) = Vj->D((imm >> 4) & 0x3); in HELPER()
3457 temp.D(3) = Vj->D((imm >> 6) & 0x3); in HELPER()
3469 temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1); in HELPER()
3486 Vd->E(ins + i * max) = Vj->E(extr + i * max); \