Lines Matching +full:- +full:chs

2  * M-profile MVE Operations
24 #include "exec/helper-proto.h"
25 #include "accel/tcg/cpu-ldst.h"
39 if ((env->condexec_bits & 0xf) != 0) { in mve_eci_mask()
43 eci = env->condexec_bits >> 4; in mve_eci_mask()
66 * (3) low-overhead-branch tail predication will mask out part in mve_element_mask()
70 * We combine all these into a 16-bit result with the same semantics in mve_element_mask()
72 * 8-bit vector ops will look at all bits of the result; in mve_element_mask()
73 * 16-bit ops will look at bits 0, 2, 4, ...; in mve_element_mask()
74 * 32-bit ops will look at bits 0, 4, 8 and 12. in mve_element_mask()
76 * the 4-bit slice of the mask corresponding to a single beat. in mve_element_mask()
78 uint16_t mask = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0); in mve_element_mask()
80 if (!(env->v7m.vpr & R_V7M_VPR_MASK01_MASK)) { in mve_element_mask()
83 if (!(env->v7m.vpr & R_V7M_VPR_MASK23_MASK)) { in mve_element_mask()
87 if (env->v7m.ltpsize < 4 && in mve_element_mask()
88 env->regs[14] <= (1 << (4 - env->v7m.ltpsize))) { in mve_element_mask()
95 int masklen = env->regs[14] << env->v7m.ltpsize; in mve_element_mask()
112 uint32_t vpr = env->v7m.vpr; in mve_advance_vpt()
117 if ((env->condexec_bits & 0xf) == 0) { in mve_advance_vpt()
118 env->condexec_bits = (env->condexec_bits == (ECI_A0A1A2B0 << 4)) ? in mve_advance_vpt()
147 env->v7m.vpr = vpr; in mve_advance_vpt()
268 * 64-bit accesses are slightly different: they are done as two 32-bit
270 * and with a single 32-bit offset in the first of the two Qm elements.
273 * stored in the even-beat element.
293 m[H4(e & ~1)] = addr - 4; \
319 m[H4(e & ~1)] = addr - 4; \
380 * one 32-bit memory access per beat. in DO_VLDR64_SG()
559 for (e = 3; e >= 0; e--) { \
645 for (e = 3; e >= 0; e--) { \
670 for (e = 1; e >= 0; e--) { \
775 * into the 32-bit value, so we only need to write the 32-bit in HELPER()
799 #define DO_CLS_B(N) (clrsb32(N) - 24)
800 #define DO_CLS_H(N) (clrsb32(N) - 16)
806 #define DO_CLZ_B(N) (clz32(N) - 24)
807 #define DO_CLZ_H(N) (clz32(N) - 16)
824 #define DO_ABS(N) ((N) < 0 ? -(N) : (N))
836 #define DO_NEG(N) (-(N))
850 * All these insns work at 64-bit widths.
886 /* provide unsigned 2-op helpers for all sizes */
892 /* provide signed 2-op helpers for all sizes */
899 * "Long" operations where two half-sized inputs (taken from either the
900 * top or the bottom of the input vector) produce a double-width result.
932 env->vfp.qc[0] = qc; \
937 /* provide unsigned 2-op helpers for all sizes */
943 /* provide signed 2-op helpers for all sizes */
962 #define DO_SUB(N, M) ((N) - (M))
1048 #define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) in DO_2OP_S()
1065 return ((uint64_t)n - m) >> 1; in do_vhsub_u()
1070 return ((int64_t)n - m) >> 1; in do_vhsub_s()
1117 env->vfp.fpsr &= ~FPSR_NZCV_MASK; in DO_2OP_S()
1118 env->vfp.fpsr |= carry_in * FPSR_C; in DO_2OP_S()
1125 bool carry_in = env->vfp.fpsr & FPSR_C; in HELPER()
1131 bool carry_in = env->vfp.fpsr & FPSR_C; in HELPER()
1132 do_vadc(env, vd, vn, vm, -1, carry_in, false); in HELPER()
1143 do_vadc(env, vd, vn, vm, -1, 1, true); in HELPER()
1158 r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)]); \
1197 #define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
1198 #define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
1199 #define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
1201 #define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
1202 #define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
1203 #define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
1207 * "shift by esize-1", adjusting the QRDMULH rounding constant to match.
1281 * (A * B - C * D) etc for VQDMLSDH. in DO_2OP_SAT_S()
1295 m[H##ESIZE(e - XCHG)], \ in DO_2OP_SAT_S()
1296 n[H##ESIZE(e + (1 - 2 * XCHG))], \ in DO_2OP_SAT_S()
1297 m[H##ESIZE(e + (1 - XCHG))], \ in DO_2OP_SAT_S()
1304 env->vfp.qc[0] = qc; \ in DO_2OP_SAT_S()
1334 * bring it back into the non-saturated range. However, if in do_vqdmladh_w()
1353 int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 7); in do_vqdmlsdh_b()
1360 int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 15); in do_vqdmlsdh_h()
1438 env->vfp.qc[0] = qc; \
1476 env->vfp.qc[0] = qc; \
1481 /* provide unsigned 2-op scalar helpers for all sizes */
1546 * bring it back into the non-saturated range. However, if in do_vqdmlah_w()
1612 * whether to propagate a saturation indication into FPSCR.QC -- for in DO_2OP_ACC_SCALAR_U()
1613 * the 16x16->32 case we must check only the bit corresponding to the T or B in DO_2OP_ACC_SCALAR_U()
1614 * half that we used, but for the 32x32->64 case we propagate if the mask in DO_2OP_ACC_SCALAR_U()
1634 env->vfp.qc[0] = qc; \ in DO_2OP_ACC_SCALAR_U()
1693 env->vfp.qc[0] = qc; \
1711 n >>= 8 - m; in do_vbrsrb()
1724 n >>= 16 - m; in do_vbrsrh()
1737 n >>= 32 - m; in do_vbrsrw()
1760 (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
1779 DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
1780 DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
1781 DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
1782 DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
1798 n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
1821 DO_DAV_S(vmlsdav, false, +=, -=)
1823 DO_DAV_S(vmlsdavx, true, +=, -=)
1827 * this is implemented with a 72-bit internal accumulator value of which
1829 * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
1830 * is squashed back into 64-bits after each beat.
1843 mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \
1845 mul = -mul; \
1928 m = -m;
1936 m = -m; in do_mina()
1967 uint32_t r = n0 >= m0 ? (n0 - m0) : (m0 - n0); \ in DO_VMAXMINV_S()
2031 env->vfp.qc[0] = qc; \
2036 /* provide unsigned 2-op shift helpers for all sizes */
2065 /* Shift-and-insert; we always work with 64 bits at a time */
2078 * this because it would try to shift by an out-of-range \
2098 #define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
2099 #define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
2109 * Long shifts taking half-sized inputs from top or bottom of the input
2110 * vector and producing a double-width result. ESIZE, TYPE are for
2113 * because the long shift is strictly left-only.
2170 return (x >> sh) + ((x >> (sh - 1)) & 1);
2181 return (x >> sh) + ((x >> (sh - 1)) & 1); in do_srshr()
2223 env->vfp.qc[0] = qc; \
2322 env->vfp.qc[0] = qc; \
2373 * For each 32-bit element, we shift it left, bringing in the
2393 rdm = d[H4(e)] >> (32 - shift);
2404 return do_sqrshl_d(n, -(int8_t)shift, false, NULL); in HELPER()
2414 return do_sqrshl_d(n, (int8_t)shift, false, &env->QF); in HELPER()
2419 return do_uqrshl_d(n, (int8_t)shift, false, &env->QF); in HELPER()
2424 return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF); in HELPER()
2429 return do_uqrshl_d(n, (int8_t)shift, true, &env->QF); in HELPER()
2432 /* Operate on 64-bit values, but saturate at 48 bits */
2438 if (shift <= -48) { in do_sqrshl48_d()
2446 src >>= -shift - 1; in do_sqrshl48_d()
2449 val = src >> -shift; in do_sqrshl48_d()
2468 /* Operate on 64-bit values, but saturate at 48 bits */
2474 if (shift <= -(48 + round)) { in do_uqrshl48_d()
2478 val = src >> (-shift - 1); in do_uqrshl48_d()
2481 val = src >> -shift; in do_uqrshl48_d()
2502 return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF); in HELPER()
2507 return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF); in HELPER()
2512 return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); in HELPER()
2517 return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF); in HELPER()
2522 return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF); in HELPER()
2527 return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF); in HELPER()
2585 offset -= imm; in do_sub_wrap()
2595 * P0 bits for non-executed beats (where eci_mask is 0) are unchanged. in DO_VIDUP_ALL()
2616 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \ in DO_VIDUP_ALL()
2638 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
2686 uint16_t p0 = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
2703 * This insn is itself subject to predication and to beat-wise execution, in HELPER()
2708 uint16_t beatpred = ~env->v7m.vpr & mask; in HELPER()
2709 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (beatpred & eci_mask); in HELPER()
2717 * ltpmask in mve_element_mask(), but we have pre-calculated
2729 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask); in HELPER()
2746 env->vfp.qc[0] = qc; \
2758 #define DO_VQNEG_B(N, SATP) do_sat_bhs(-(int64_t)N, INT8_MIN, INT8_MAX, SATP)
2759 #define DO_VQNEG_H(N, SATP) do_sat_bhs(-(int64_t)N, INT16_MIN, INT16_MAX, SATP)
2760 #define DO_VQNEG_W(N, SATP) do_sat_bhs(-(int64_t)N, INT32_MIN, INT32_MAX, SATP)
2797 * 2-operand floating point. Note that if an element is partially
2798 * predicated we must do the FP operation to update the non-predicated
2816 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2889 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ in DO_2OP_FP_ALL()
2898 r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)], fpst); \ in DO_2OP_FP_ALL()
2912 #define DO_VFMA(OP, ESIZE, TYPE, CHS) \ argument
2926 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
2933 if (CHS) { \
2963 fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3047 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3081 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3113 &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3163 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3174 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
3196 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3207 env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
3260 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3293 &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
3334 bool ieee = !(env->vfp.fpcr & FPCR_AHP);
3338 float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
3364 bool ieee = !(env->vfp.fpcr & FPCR_AHP); in do_vcvt_hs()
3368 float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; in do_vcvt_hs()
3418 fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \