Lines Matching +full:- +full:e

2  * M-profile MVE Operations
24 #include "exec/helper-proto.h"
25 #include "accel/tcg/cpu-ldst.h"
39 if ((env->condexec_bits & 0xf) != 0) { in mve_eci_mask()
43 eci = env->condexec_bits >> 4; in mve_eci_mask()
66 * (3) low-overhead-branch tail predication will mask out part in mve_element_mask()
70 * We combine all these into a 16-bit result with the same semantics in mve_element_mask()
72 * 8-bit vector ops will look at all bits of the result; in mve_element_mask()
73 * 16-bit ops will look at bits 0, 2, 4, ...; in mve_element_mask()
74 * 32-bit ops will look at bits 0, 4, 8 and 12. in mve_element_mask()
76 * the 4-bit slice of the mask corresponding to a single beat. in mve_element_mask()
78 uint16_t mask = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0); in mve_element_mask()
80 if (!(env->v7m.vpr & R_V7M_VPR_MASK01_MASK)) { in mve_element_mask()
83 if (!(env->v7m.vpr & R_V7M_VPR_MASK23_MASK)) { in mve_element_mask()
87 if (env->v7m.ltpsize < 4 && in mve_element_mask()
88 env->regs[14] <= (1 << (4 - env->v7m.ltpsize))) { in mve_element_mask()
95 int masklen = env->regs[14] << env->v7m.ltpsize; in mve_element_mask()
112 uint32_t vpr = env->v7m.vpr; in mve_advance_vpt()
117 if ((env->condexec_bits & 0xf) == 0) { in mve_advance_vpt()
118 env->condexec_bits = (env->condexec_bits == (ECI_A0A1A2B0 << 4)) ? in mve_advance_vpt()
147 env->v7m.vpr = vpr; in mve_advance_vpt()
157 unsigned b, e; \
163 for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
165 d[H##ESIZE(e)] = (mask & (1 << b)) ? \
178 unsigned b, e; \
179 for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
181 cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
225 unsigned e; \
227 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
231 addr = ADDRFN(base, m[H##ESIZE(e)]); \
232 d[H##ESIZE(e)] = (mask & 1) ? \
235 m[H##ESIZE(e)] = addr; \
250 unsigned e; \
252 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
256 addr = ADDRFN(base, m[H##ESIZE(e)]); \
258 cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
261 m[H##ESIZE(e)] = addr; \
268 * 64-bit accesses are slightly different: they are done as two 32-bit
270 * and with a single 32-bit offset in the first of the two Qm elements.
273 * stored in the even-beat element.
283 unsigned e; \
285 for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
289 addr = ADDRFN(base, m[H4(e & ~1)]); \
290 addr += 4 * (e & 1); \
291 d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \
292 if (WB && (e & 1)) { \
293 m[H4(e & ~1)] = addr - 4; \
307 unsigned e; \
309 for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
313 addr = ADDRFN(base, m[H4(e & ~1)]); \
314 addr += 4 * (e & 1); \
316 cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \
318 if (WB && (e & 1)) { \
319 m[H4(e & ~1)] = addr - 4; \
380 * one 32-bit memory access per beat. in DO_VLDR64_SG()
386 int beat, e; \ in DO_VLDR64_SG()
397 for (e = 0; e < 4; e++, data >>= 8) { \ in DO_VLDR64_SG()
398 uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \ in DO_VLDR64_SG()
471 int beat, e; \
483 for (e = 0; e < 4; e++, data >>= 8) { \
484 qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
485 qd[H1(off[beat] + (e >> 1))] = data; \
498 int e; \
507 for (e = 0; e < 2; e++, data >>= 16) { \
508 qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
548 int beat, e; \
559 for (e = 3; e >= 0; e--) { \
560 uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
633 int beat, e; \
645 for (e = 3; e >= 0; e--) { \
646 qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
647 data = (data << 8) | qd[H1(off[beat] + (e >> 1))]; \
661 int e; \
670 for (e = 1; e >= 0; e--) { \
671 qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
775 * into the 32-bit value, so we only need to write the 32-bit in HELPER()
780 unsigned e; in HELPER() local
781 for (e = 0; e < 16 / 4; e++, mask >>= 4) { in HELPER()
782 mergemask(&d[H4(e)], val, mask); in HELPER()
792 unsigned e; \
793 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
794 mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)]), mask); \
799 #define DO_CLS_B(N) (clrsb32(N) - 24)
800 #define DO_CLS_H(N) (clrsb32(N) - 16)
806 #define DO_CLZ_B(N) (clz32(N) - 24)
807 #define DO_CLZ_H(N) (clz32(N) - 16)
824 #define DO_ABS(N) ((N) < 0 ? -(N) : (N))
836 #define DO_NEG(N) (-(N))
850 * All these insns work at 64-bit widths.
857 unsigned e; \
858 for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
859 mergemask(&da[H8(e)], FN(da[H8(e)], imm), mask); \
878 unsigned e; \ in DO_1OP_IMM()
879 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ in DO_1OP_IMM()
880 mergemask(&d[H##ESIZE(e)], \ in DO_1OP_IMM()
881 FN(n[H##ESIZE(e)], m[H##ESIZE(e)]), mask); \ in DO_1OP_IMM()
886 /* provide unsigned 2-op helpers for all sizes */
892 /* provide signed 2-op helpers for all sizes */
899 * "Long" operations where two half-sized inputs (taken from either the
900 * top or the bottom of the input vector) produce a double-width result.
923 unsigned e; \
925 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
927 TYPE r_ = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], &sat); \
928 mergemask(&d[H##ESIZE(e)], r_, mask); \
932 env->vfp.qc[0] = qc; \
937 /* provide unsigned 2-op helpers for all sizes */
943 /* provide signed 2-op helpers for all sizes */
962 #define DO_SUB(N, M) ((N) - (M))
1048 #define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) in DO_2OP_S()
1065 return ((uint64_t)n - m) >> 1; in do_vhsub_u()
1070 return ((int64_t)n - m) >> 1; in do_vhsub_s()
1098 unsigned e; in DO_2OP_S() local
1105 for (e = 0; e < 16 / 4; e++, mask >>= 4) { in DO_2OP_S()
1107 r += n[H4(e)]; in DO_2OP_S()
1108 r += m[H4(e)] ^ inv; in DO_2OP_S()
1112 mergemask(&d[H4(e)], r, mask); in DO_2OP_S()
1117 env->vfp.fpsr &= ~FPSR_NZCV_MASK; in DO_2OP_S()
1118 env->vfp.fpsr |= carry_in * FPSR_C; in DO_2OP_S()
1125 bool carry_in = env->vfp.fpsr & FPSR_C; in HELPER()
1131 bool carry_in = env->vfp.fpsr & FPSR_C; in HELPER()
1132 do_vadc(env, vd, vn, vm, -1, carry_in, false); in HELPER()
1143 do_vadc(env, vd, vn, vm, -1, 1, true); in HELPER()
1151 unsigned e; \
1154 for (e = 0; e < 16 / ESIZE; e++) { \
1155 if (!(e & 1)) { \
1156 r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)]); \
1158 r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)]); \
1161 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1162 mergemask(&d[H##ESIZE(e)], r[e], mask); \
1197 #define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
1198 #define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
1199 #define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
1201 #define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
1202 #define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
1203 #define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
1207 * "shift by esize-1", adjusting the QRDMULH rounding constant to match.
1281 * (A * B - C * D) etc for VQDMLSDH. in DO_2OP_SAT_S()
1289 unsigned e; \ in DO_2OP_SAT_S()
1291 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ in DO_2OP_SAT_S()
1293 if ((e & 1) == XCHG) { \ in DO_2OP_SAT_S()
1294 TYPE vqdmladh_ret = FN(n[H##ESIZE(e)], \ in DO_2OP_SAT_S()
1295 m[H##ESIZE(e - XCHG)], \ in DO_2OP_SAT_S()
1296 n[H##ESIZE(e + (1 - 2 * XCHG))], \ in DO_2OP_SAT_S()
1297 m[H##ESIZE(e + (1 - XCHG))], \ in DO_2OP_SAT_S()
1299 mergemask(&d[H##ESIZE(e)], vqdmladh_ret, mask); \ in DO_2OP_SAT_S()
1304 env->vfp.qc[0] = qc; \ in DO_2OP_SAT_S()
1334 * bring it back into the non-saturated range. However, if in do_vqdmladh_w()
1353 int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 7); in do_vqdmlsdh_b()
1360 int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 15); in do_vqdmlsdh_h()
1415 unsigned e; \
1416 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1417 mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m), mask); \
1429 unsigned e; \
1431 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1433 mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m, &sat), \
1438 env->vfp.qc[0] = qc; \
1451 unsigned e; \
1452 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1453 mergemask(&d[H##ESIZE(e)], \
1454 FN(d[H##ESIZE(e)], n[H##ESIZE(e)], m), mask); \
1466 unsigned e; \
1468 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1470 mergemask(&d[H##ESIZE(e)], \
1471 FN(d[H##ESIZE(e)], n[H##ESIZE(e)], m, &sat), \
1476 env->vfp.qc[0] = qc; \
1481 /* provide unsigned 2-op scalar helpers for all sizes */
1546 * bring it back into the non-saturated range. However, if in do_vqdmlah_w()
1612 * whether to propagate a saturation indication into FPSCR.QC -- for in DO_2OP_ACC_SCALAR_U()
1613 * the 16x16->32 case we must check only the bit corresponding to the T or B in DO_2OP_ACC_SCALAR_U()
1614 * half that we used, but for the 32x32->64 case we propagate if the mask in DO_2OP_ACC_SCALAR_U()
1634 env->vfp.qc[0] = qc; \ in DO_2OP_ACC_SCALAR_U()
1693 env->vfp.qc[0] = qc; \
1711 n >>= 8 - m; in do_vbrsrb()
1724 n >>= 16 - m; in do_vbrsrh()
1737 n >>= 32 - m; in do_vbrsrw()
1754 unsigned e; \
1756 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1758 if (e & 1) { \
1760 (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
1763 (int64_t)n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
1779 DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
1780 DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
1781 DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
1782 DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
1792 unsigned e; \
1794 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1796 if (e & 1) { \
1798 n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
1801 n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
1821 DO_DAV_S(vmlsdav, false, +=, -=)
1823 DO_DAV_S(vmlsdavx, true, +=, -=)
1827 * this is implemented with a 72-bit internal accumulator value of which
1829 * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
1830 * is squashed back into 64-bits after each beat.
1837 unsigned e; \
1839 for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
1842 if (e & 1) { \
1843 mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \
1845 mul = -mul; \
1848 mul = (LTYPE)n[H4(e + 1 * XCHG)] * m[H4(e)]; \
1872 unsigned e; \ in DO_LDAVH()
1874 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ in DO_LDAVH()
1876 ra += m[H##ESIZE(e)]; \ in DO_LDAVH()
1900 unsigned e; \
1903 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
1905 ra = FN(ra, m[H##ESIZE(e)]); \
1928 m = -m;
1936 m = -m; in do_mina()
1961 unsigned e; \ in DO_VMAXMINV_S()
1963 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ in DO_VMAXMINV_S()
1965 int64_t n0 = n[H##ESIZE(e)]; \ in DO_VMAXMINV_S()
1966 int64_t m0 = m[H##ESIZE(e)]; \ in DO_VMAXMINV_S()
1967 uint32_t r = n0 >= m0 ? (n0 - m0) : (m0 - n0); \ in DO_VMAXMINV_S()
1987 unsigned e; \
1989 for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
1991 ra += (LTYPE)m[H4(e)]; \
2008 unsigned e; \
2009 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2010 mergemask(&d[H##ESIZE(e)], \
2011 FN(m[H##ESIZE(e)], shift), mask); \
2022 unsigned e; \
2024 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2026 mergemask(&d[H##ESIZE(e)], \
2027 FN(m[H##ESIZE(e)], shift, &sat), mask); \
2031 env->vfp.qc[0] = qc; \
2036 /* provide unsigned 2-op shift helpers for all sizes */
2065 /* Shift-and-insert; we always work with 64 bits at a time */
2073 unsigned e; \
2078 * this because it would try to shift by an out-of-range \
2087 for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
2088 uint64_t r = (SHIFTFN(m[H8(e)], shift) & shiftmask) | \
2089 (d[H8(e)] & ~shiftmask); \
2090 mergemask(&d[H8(e)], r, mask); \
2098 #define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
2099 #define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
2109 * Long shifts taking half-sized inputs from top or bottom of the input
2110 * vector and producing a double-width result. ESIZE, TYPE are for
2113 * because the long shift is strictly left-only.
2170 return (x >> sh) + ((x >> (sh - 1)) & 1);
2181 return (x >> sh) + ((x >> (sh - 1)) & 1); in do_srshr()
2223 env->vfp.qc[0] = qc; \
2322 env->vfp.qc[0] = qc; \
2369 unsigned e; local
2373 * For each 32-bit element, we shift it left, bringing in the
2380 for (e = 0; e < 16 / 4; e++, mask >>= 4) {
2383 rdm = d[H4(e)];
2385 mergemask(&d[H4(e)], r, mask);
2390 for (e = 0; e < 16 / 4; e++, mask >>= 4) {
2391 r = (d[H4(e)] << shift) | (rdm & shiftmask);
2393 rdm = d[H4(e)] >> (32 - shift);
2395 mergemask(&d[H4(e)], r, mask);
2404 return do_sqrshl_d(n, -(int8_t)shift, false, NULL); in HELPER()
2414 return do_sqrshl_d(n, (int8_t)shift, false, &env->QF); in HELPER()
2419 return do_uqrshl_d(n, (int8_t)shift, false, &env->QF); in HELPER()
2424 return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF); in HELPER()
2429 return do_uqrshl_d(n, (int8_t)shift, true, &env->QF); in HELPER()
2432 /* Operate on 64-bit values, but saturate at 48 bits */
2438 if (shift <= -48) { in do_sqrshl48_d()
2446 src >>= -shift - 1; in do_sqrshl48_d()
2449 val = src >> -shift; in do_sqrshl48_d()
2468 /* Operate on 64-bit values, but saturate at 48 bits */
2474 if (shift <= -(48 + round)) { in do_uqrshl48_d()
2478 val = src >> (-shift - 1); in do_uqrshl48_d()
2481 val = src >> -shift; in do_uqrshl48_d()
2502 return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF); in HELPER()
2507 return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF); in HELPER()
2512 return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); in HELPER()
2517 return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); in HELPER()
2522 return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF); in HELPER()
2527 return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF); in HELPER()
2536 unsigned e; \
2537 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2538 mergemask(&d[H##ESIZE(e)], offset, mask); \
2552 unsigned e; \
2553 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2554 mergemask(&d[H##ESIZE(e)], offset, mask); \
2585 offset -= imm; in do_sub_wrap()
2595 * P0 bits for non-executed beats (where eci_mask is 0) are unchanged. in DO_VIDUP_ALL()
2608 unsigned e; \ in DO_VIDUP_ALL()
2609 for (e = 0; e < 16 / ESIZE; e++) { \ in DO_VIDUP_ALL()
2610 bool r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)]); \ in DO_VIDUP_ALL()
2616 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \ in DO_VIDUP_ALL()
2630 unsigned e; \
2631 for (e = 0; e < 16 / ESIZE; e++) { \
2632 bool r = FN(n[H##ESIZE(e)], (TYPE)rm); \
2638 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
2686 uint16_t p0 = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
2687 unsigned e; local
2688 for (e = 0; e < 16 / 8; e++, mask >>= 8, p0 >>= 8) {
2689 uint64_t r = m[H8(e)];
2690 mergemask(&r, n[H8(e)], p0);
2691 mergemask(&d[H8(e)], r, mask);
2703 * This insn is itself subject to predication and to beat-wise execution, in HELPER()
2708 uint16_t beatpred = ~env->v7m.vpr & mask; in HELPER()
2709 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (beatpred & eci_mask); in HELPER()
2717 * ltpmask in mve_element_mask(), but we have pre-calculated
2729 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask); in HELPER()
2738 unsigned e; \
2740 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2742 mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)], &sat), mask); \
2746 env->vfp.qc[0] = qc; \
2758 #define DO_VQNEG_B(N, SATP) do_sat_bhs(-(int64_t)N, INT8_MIN, INT8_MAX, SATP)
2759 #define DO_VQNEG_H(N, SATP) do_sat_bhs(-(int64_t)N, INT16_MIN, INT16_MAX, SATP)
2760 #define DO_VQNEG_W(N, SATP) do_sat_bhs(-(int64_t)N, INT32_MIN, INT32_MAX, SATP)
2780 unsigned e; \
2781 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2782 UTYPE r = DO_ABS(m[H##ESIZE(e)]); \
2783 r = FN(d[H##ESIZE(e)], r); \
2784 mergemask(&d[H##ESIZE(e)], r, mask); \
2797 * 2-operand floating point. Note that if an element is partially
2798 * predicated we must do the FP operation to update the non-predicated
2809 unsigned e; \
2812 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2816 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2822 r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst); \
2823 mergemask(&d[H##ESIZE(e)], r, mask); \
2880 unsigned e; \ in DO_2OP_FP_ALL()
2884 for (e = 0, tm = mask; e < 16 / ESIZE; e++, tm >>= ESIZE) { \ in DO_2OP_FP_ALL()
2886 r[e] = 0; \ in DO_2OP_FP_ALL()
2889 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ in DO_2OP_FP_ALL()
2895 if (!(e & 1)) { \ in DO_2OP_FP_ALL()
2896 r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)], fpst); \ in DO_2OP_FP_ALL()
2898 r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)], fpst); \ in DO_2OP_FP_ALL()
2901 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ in DO_2OP_FP_ALL()
2902 mergemask(&d[H##ESIZE(e)], r[e], mask); \ in DO_2OP_FP_ALL()
2919 unsigned e; \
2922 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
2926 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2932 r = n[H##ESIZE(e)]; \
2936 r = TYPE##_muladd(r, m[H##ESIZE(e)], d[H##ESIZE(e)], \
2938 mergemask(&d[H##ESIZE(e)], r, mask); \
2955 unsigned e; \
2959 for (e = 0; e < 16 / ESIZE; e += 2, mask >>= ESIZE * 2) { \
2963 fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2975 e1 = m[H##ESIZE(e)]; \
2976 e2 = n[H##ESIZE(e)]; \
2977 e3 = m[H##ESIZE(e + 1)]; \
2978 e4 = n[H##ESIZE(e)]; \
2981 e1 = TYPE##_chs(m[H##ESIZE(e + 1)]); \
2982 e2 = n[H##ESIZE(e + 1)]; \
2983 e3 = m[H##ESIZE(e)]; \
2984 e4 = n[H##ESIZE(e + 1)]; \
2987 e1 = TYPE##_chs(m[H##ESIZE(e)]); \
2988 e2 = n[H##ESIZE(e)]; \
2989 e3 = TYPE##_chs(m[H##ESIZE(e + 1)]); \
2990 e4 = n[H##ESIZE(e)]; \
2993 e1 = m[H##ESIZE(e + 1)]; \
2994 e2 = n[H##ESIZE(e + 1)]; \
2995 e3 = TYPE##_chs(m[H##ESIZE(e)]); \
2996 e4 = n[H##ESIZE(e + 1)]; \
3001 r0 = FN(e2, e1, d[H##ESIZE(e)], fpst0); \
3002 r1 = FN(e4, e3, d[H##ESIZE(e + 1)], fpst1); \
3003 mergemask(&d[H##ESIZE(e)], r0, mask); \
3004 mergemask(&d[H##ESIZE(e + 1)], r1, mask >> ESIZE); \
3040 unsigned e; \
3043 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
3047 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3053 r = FN(n[H##ESIZE(e)], m, fpst); \
3054 mergemask(&d[H##ESIZE(e)], r, mask); \
3074 unsigned e; \
3077 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
3081 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3087 r = FN(n[H##ESIZE(e)], m, d[H##ESIZE(e)], 0, fpst); \
3088 mergemask(&d[H##ESIZE(e)], r, mask); \
3109 unsigned e; \
3113 &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3114 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
3116 TYPE v = m[H##ESIZE(e)]; \
3155 unsigned e; \
3159 for (e = 0; e < 16 / ESIZE; e++, emask <<= ESIZE) { \
3163 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3164 if (!(mask & (1 << (e * ESIZE)))) { \
3169 r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst); \
3174 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
3188 unsigned e; \
3192 for (e = 0; e < 16 / ESIZE; e++, emask <<= ESIZE) { \
3196 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3197 if (!(mask & (1 << (e * ESIZE)))) { \
3202 r = FN(n[H##ESIZE(e)], (TYPE)rm, fpst); \
3207 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
3253 unsigned e; \
3256 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
3260 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3266 r = FN(m[H##ESIZE(e)], shift, fpst); \
3267 mergemask(&d[H##ESIZE(e)], r, mask); \
3289 unsigned e; \
3293 &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3296 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
3306 r = FN(m[H##ESIZE(e)], 0, fpst); \
3307 mergemask(&d[H##ESIZE(e)], r, mask); \
3334 bool ieee = !(env->vfp.fpcr & FPCR_AHP);
3335 unsigned e; local
3338 float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
3341 for (e = 0; e < 16 / 4; e++, mask >>= 4) {
3351 r = float32_to_float16(m[H4(e)], ieee, fpst);
3352 mergemask(&d[H2(e * 2 + top)], r, mask >> (top * 2));
3364 bool ieee = !(env->vfp.fpcr & FPCR_AHP); in do_vcvt_hs()
3365 unsigned e; in do_vcvt_hs() local
3368 float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; in do_vcvt_hs()
3371 for (e = 0; e < 16 / 4; e++, mask >>= 4) { in do_vcvt_hs()
3381 r = float16_to_float32(m[H2(e * 2 + top)], ieee, fpst); in do_vcvt_hs()
3382 mergemask(&d[H4(e)], r, mask); in do_vcvt_hs()
3411 unsigned e; \
3414 for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
3418 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3424 r = FN(m[H##ESIZE(e)], fpst); \
3425 mergemask(&d[H##ESIZE(e)], r, mask); \