arm/tcg/translate-vfp.c

5  *  Copyright (c) 2005-2007 CodeSourcery
25 #include "translate-a32.h"
28 #include "decode-vfp.c.inc"
29 #include "decode-vfp-uncond.c.inc"
91  * Return the offset of a 16-bit half of the specified VFP single-precision
111  * Generate code for M-profile lazy FP state preservation if needed;
116     if (s->v7m_lspact) {  in gen_preserve_fp_state()
122         if (translator_io_start(&s->base)) {  in gen_preserve_fp_state()
123             s->base.is_jmp = DISAS_UPDATE_EXIT;  in gen_preserve_fp_state()
131         s->v7m_lspact = false;  in gen_preserve_fp_state()
138          *  - set s->mve_no_pred to false, so this instruction  in gen_preserve_fp_state()
140          *  - end the TB now, without chaining to the next TB  in gen_preserve_fp_state()
142         if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {  in gen_preserve_fp_state()
143             s->mve_no_pred = false;  in gen_preserve_fp_state()
144             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;  in gen_preserve_fp_state()
150  * Generate code for M-profile FP context handling: update the
158     if (s->v8m_fpccr_s_wrong) {  in gen_update_fp_context()
162         if (s->v8m_secure) {  in gen_update_fp_context()
169         s->v8m_fpccr_s_wrong = false;  in gen_update_fp_context()
172     if (s->v7m_new_fp_ctxt_needed) {  in gen_update_fp_context()
180         fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);  in gen_update_fp_context()
195          * but those do not exist for M-profile, so are not relevant here.)  in gen_update_fp_context()
197         s->mve_no_pred = dc_isar_feature(aa32_mve, s);  in gen_update_fp_context()
199         if (s->v8m_secure) {  in gen_update_fp_context()
206         s->v7m_new_fp_ctxt_needed = false;  in gen_update_fp_context()
211  * Check that VFP access is enabled, A-profile specific version.
221     if (s->fp_excp_el) {  in vfp_access_check_a()
225          * For v7, any use of a Floating-point instruction or access  in vfp_access_check_a()
226          * to a Floating-point Extension register that is trapped to  in vfp_access_check_a()
233         gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el);  in vfp_access_check_a()
242     if (s->sme_trap_nonstreaming) {  in vfp_access_check_a()
249     if (!s->vfp_enabled && !ignore_vfp_enabled) {  in vfp_access_check_a()
258  * Check that VFP access is enabled, M-profile specific version.
260  * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
267     if (s->fp_excp_el) {  in vfp_access_check_m()
269          * M-profile mostly catches the "FPU disabled" case early, in  in vfp_access_check_m()
271          * which do coprocessor-checks are outside the large ranges of  in vfp_access_check_m()
272          * the encoding space handled by the patterns in m-nocp.decode,  in vfp_access_check_m()
276                               syn_uncategorized(), s->fp_excp_el);  in vfp_access_check_m()
280     /* Handle M-profile lazy FP state mechanics */  in vfp_access_check_m()
282     /* Trigger lazy-state preservation if necessary */  in vfp_access_check_m()
295  * FMXR/FMRX to the always-available special registers.
309     int sz = a->sz;  in trans_VSEL()
323     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VSEL()
325         ((a->vm | a->vn | a->vd) & 0x10)) {  in trans_VSEL()
329     rd = a->vd;  in trans_VSEL()
330     rn = a->vn;  in trans_VSEL()
331     rm = a->vm;  in trans_VSEL()
357         switch (a->cc) {  in trans_VSEL()
364         case 2: /* ge: N == V -> N ^ V == 0 */  in trans_VSEL()
388         switch (a->cc) {  in trans_VSEL()
395         case 2: /* ge: N == V -> N ^ V == 0 */  in trans_VSEL()
432     int sz = a->sz;  in trans_VRINT()
435     int rounding = fp_decode_rm[a->rm];  in trans_VRINT()
449     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VRINT()
451         ((a->vm | a->vd) & 0x10)) {  in trans_VRINT()
455     rd = a->vd;  in trans_VRINT()
456     rm = a->vm;  in trans_VRINT()
499     int sz = a->sz;  in trans_VCVT()
502     int rounding = fp_decode_rm[a->rm];  in trans_VCVT()
503     bool is_signed = a->op;  in trans_VCVT()
517     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VCVT()
518     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {  in trans_VCVT()
522     rd = a->vd;  in trans_VCVT()
523     rm = a->vm;  in trans_VCVT()
580      * In a CPU with MVE, the VMOV (vector lane to general-purpose register)  in mve_skip_vmov()
581      * and VMOV (general-purpose register to vector lane) insns are not  in mve_skip_vmov()
591      * Note that if PSR.ECI is non-zero then we cannot be in an IT block.  in mve_skip_vmov()
593      * Return true if this VMOV scalar <-> gpreg should be skipped because  in mve_skip_vmov()
604     switch (s->eci) {  in mve_skip_vmov()
629         if (a->size == MO_32  in trans_VMOV_to_gp()
636     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VMOV_to_gp()
637     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {  in trans_VMOV_to_gp()
651     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {  in trans_VMOV_to_gp()
653         read_neon_element32(tmp, a->vn, a->index,  in trans_VMOV_to_gp()
654                             a->size | (a->u ? 0 : MO_SIGN));  in trans_VMOV_to_gp()
655         store_reg(s, a->rt, tmp);  in trans_VMOV_to_gp()
674         if (a->size == MO_32  in trans_VMOV_from_gp()
681     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VMOV_from_gp()
682     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {  in trans_VMOV_from_gp()
696     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {  in trans_VMOV_from_gp()
697         tmp = load_reg(s, a->rt);  in trans_VMOV_from_gp()
698         write_neon_element32(tmp, a->vn, a->index, a->size);  in trans_VMOV_from_gp()
717     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VDUP()
718     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {  in trans_VDUP()
722     if (a->b && a->e) {  in trans_VDUP()
726     if (a->q && (a->vn & 1)) {  in trans_VDUP()
730     vec_size = a->q ? 16 : 8;  in trans_VDUP()
731     if (a->b) {  in trans_VDUP()
733     } else if (a->e) {  in trans_VDUP()
743     tmp = load_reg(s, a->rt);  in trans_VDUP()
744     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),  in trans_VDUP()
755         /* M profile version was already handled in m-nocp.decode */  in trans_VMSR_VMRS()
763     switch (a->reg) {  in trans_VMSR_VMRS()
814     if (a->l) {  in trans_VMSR_VMRS()
816         switch (a->reg) {  in trans_VMSR_VMRS()
821             if (s->current_el == 1) {  in trans_VMSR_VMRS()
825                                               tcg_constant_i32(a->rt),  in trans_VMSR_VMRS()
826                                               tcg_constant_i32(a->reg));  in trans_VMSR_VMRS()
832             tmp = load_cpu_field(vfp.xregs[a->reg]);  in trans_VMSR_VMRS()
835             if (a->rt == 15) {  in trans_VMSR_VMRS()
847         if (a->rt == 15) {  in trans_VMSR_VMRS()
851             store_reg(s, a->rt, tmp);  in trans_VMSR_VMRS()
855         switch (a->reg) {  in trans_VMSR_VMRS()
863             tmp = load_reg(s, a->rt);  in trans_VMSR_VMRS()
872             tmp = load_reg(s, a->rt);  in trans_VMSR_VMRS()
874             store_cpu_field(tmp, vfp.xregs[a->reg]);  in trans_VMSR_VMRS()
879             tmp = load_reg(s, a->rt);  in trans_VMSR_VMRS()
880             store_cpu_field(tmp, vfp.xregs[a->reg]);  in trans_VMSR_VMRS()
899     if (a->rt == 15) {  in trans_VMOV_half()
908     if (a->l) {  in trans_VMOV_half()
911         vfp_load_reg16(tmp, a->vn);  in trans_VMOV_half()
912         store_reg(s, a->rt, tmp);  in trans_VMOV_half()
915         tmp = load_reg(s, a->rt);  in trans_VMOV_half()
917         vfp_store_reg32(tmp, a->vn);  in trans_VMOV_half()
935     if (a->l) {  in trans_VMOV_single()
938         vfp_load_reg32(tmp, a->vn);  in trans_VMOV_single()
939         if (a->rt == 15) {  in trans_VMOV_single()
943             store_reg(s, a->rt, tmp);  in trans_VMOV_single()
947         tmp = load_reg(s, a->rt);  in trans_VMOV_single()
948         vfp_store_reg32(tmp, a->vn);  in trans_VMOV_single()
963      * VMOV between two general-purpose registers and two single precision  in trans_VMOV_64_sp()
970     if (a->op) {  in trans_VMOV_64_sp()
973         vfp_load_reg32(tmp, a->vm);  in trans_VMOV_64_sp()
974         store_reg(s, a->rt, tmp);  in trans_VMOV_64_sp()
976         vfp_load_reg32(tmp, a->vm + 1);  in trans_VMOV_64_sp()
977         store_reg(s, a->rt2, tmp);  in trans_VMOV_64_sp()
980         tmp = load_reg(s, a->rt);  in trans_VMOV_64_sp()
981         vfp_store_reg32(tmp, a->vm);  in trans_VMOV_64_sp()
982         tmp = load_reg(s, a->rt2);  in trans_VMOV_64_sp()
983         vfp_store_reg32(tmp, a->vm + 1);  in trans_VMOV_64_sp()
994      * VMOV between two general-purpose registers and one double precision  in trans_VMOV_64_dp()
1002     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VMOV_64_dp()
1003     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {  in trans_VMOV_64_dp()
1011     if (a->op) {  in trans_VMOV_64_dp()
1014         vfp_load_reg32(tmp, a->vm * 2);  in trans_VMOV_64_dp()
1015         store_reg(s, a->rt, tmp);  in trans_VMOV_64_dp()
1017         vfp_load_reg32(tmp, a->vm * 2 + 1);  in trans_VMOV_64_dp()
1018         store_reg(s, a->rt2, tmp);  in trans_VMOV_64_dp()
1021         tmp = load_reg(s, a->rt);  in trans_VMOV_64_dp()
1022         vfp_store_reg32(tmp, a->vm * 2);  in trans_VMOV_64_dp()
1023         tmp = load_reg(s, a->rt2);  in trans_VMOV_64_dp()
1024         vfp_store_reg32(tmp, a->vm * 2 + 1);  in trans_VMOV_64_dp()
1044     offset = a->imm << 1;  in trans_VLDR_VSTR_hp()
1045     if (!a->u) {  in trans_VLDR_VSTR_hp()
1046         offset = -offset;  in trans_VLDR_VSTR_hp()
1050     addr = add_reg_for_lit(s, a->rn, offset);  in trans_VLDR_VSTR_hp()
1052     if (a->l) {  in trans_VLDR_VSTR_hp()
1054         vfp_store_reg32(tmp, a->vd);  in trans_VLDR_VSTR_hp()
1056         vfp_load_reg32(tmp, a->vd);  in trans_VLDR_VSTR_hp()
1075     offset = a->imm << 2;  in trans_VLDR_VSTR_sp()
1076     if (!a->u) {  in trans_VLDR_VSTR_sp()
1077         offset = -offset;  in trans_VLDR_VSTR_sp()
1081     addr = add_reg_for_lit(s, a->rn, offset);  in trans_VLDR_VSTR_sp()
1083     if (a->l) {  in trans_VLDR_VSTR_sp()
1085         vfp_store_reg32(tmp, a->vd);  in trans_VLDR_VSTR_sp()
1087         vfp_load_reg32(tmp, a->vd);  in trans_VLDR_VSTR_sp()
1104     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VLDR_VSTR_dp()
1105     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {  in trans_VLDR_VSTR_dp()
1113     offset = a->imm << 2;  in trans_VLDR_VSTR_dp()
1114     if (!a->u) {  in trans_VLDR_VSTR_dp()
1115         offset = -offset;  in trans_VLDR_VSTR_dp()
1119     addr = add_reg_for_lit(s, a->rn, offset);  in trans_VLDR_VSTR_dp()
1121     if (a->l) {  in trans_VLDR_VSTR_dp()
1123         vfp_store_reg64(tmp, a->vd);  in trans_VLDR_VSTR_dp()
1125         vfp_load_reg64(tmp, a->vd);  in trans_VLDR_VSTR_dp()
1141     n = a->imm;  in trans_VLDM_VSTM_sp()
1143     if (n == 0 || (a->vd + n) > 32) {  in trans_VLDM_VSTM_sp()
1150     if (a->rn == 15 && a->w) {  in trans_VLDM_VSTM_sp()
1155     s->eci_handled = true;  in trans_VLDM_VSTM_sp()
1162     addr = add_reg_for_lit(s, a->rn, 0);  in trans_VLDM_VSTM_sp()
1163     if (a->p) {  in trans_VLDM_VSTM_sp()
1164         /* pre-decrement */  in trans_VLDM_VSTM_sp()
1165         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));  in trans_VLDM_VSTM_sp()
1168     if (s->v8m_stackcheck && a->rn == 13 && a->w) {  in trans_VLDM_VSTM_sp()
1171          * and is either the old SP (if post-increment) or  in trans_VLDM_VSTM_sp()
1172          * the new SP (if pre-decrement). For post-increment  in trans_VLDM_VSTM_sp()
1183         if (a->l) {  in trans_VLDM_VSTM_sp()
1186             vfp_store_reg32(tmp, a->vd + i);  in trans_VLDM_VSTM_sp()
1189             vfp_load_reg32(tmp, a->vd + i);  in trans_VLDM_VSTM_sp()
1194     if (a->w) {  in trans_VLDM_VSTM_sp()
1196         if (a->p) {  in trans_VLDM_VSTM_sp()
1197             offset = -offset * n;  in trans_VLDM_VSTM_sp()
1200         store_reg(s, a->rn, addr);  in trans_VLDM_VSTM_sp()
1219     n = a->imm >> 1;  in trans_VLDM_VSTM_dp()
1221     if (n == 0 || (a->vd + n) > 32 || n > 16) {  in trans_VLDM_VSTM_dp()
1228     if (a->rn == 15 && a->w) {  in trans_VLDM_VSTM_dp()
1233     /* UNDEF accesses to D16-D31 if they don't exist */  in trans_VLDM_VSTM_dp()
1234     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {  in trans_VLDM_VSTM_dp()
1238     s->eci_handled = true;  in trans_VLDM_VSTM_dp()
1245     addr = add_reg_for_lit(s, a->rn, 0);  in trans_VLDM_VSTM_dp()
1246     if (a->p) {  in trans_VLDM_VSTM_dp()
1247         /* pre-decrement */  in trans_VLDM_VSTM_dp()
1248         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));  in trans_VLDM_VSTM_dp()
1251     if (s->v8m_stackcheck && a->rn == 13 && a->w) {  in trans_VLDM_VSTM_dp()
1254          * and is either the old SP (if post-increment) or  in trans_VLDM_VSTM_dp()
1255          * the new SP (if pre-decrement). For post-increment  in trans_VLDM_VSTM_dp()
1266         if (a->l) {  in trans_VLDM_VSTM_dp()
1269             vfp_store_reg64(tmp, a->vd + i);  in trans_VLDM_VSTM_dp()
1272             vfp_load_reg64(tmp, a->vd + i);  in trans_VLDM_VSTM_dp()
1277     if (a->w) {  in trans_VLDM_VSTM_dp()
1279         if (a->p) {  in trans_VLDM_VSTM_dp()
1280             offset = -offset * n;  in trans_VLDM_VSTM_dp()
1281         } else if (a->imm & 1) {  in trans_VLDM_VSTM_dp()
1290         store_reg(s, a->rn, addr);  in trans_VLDM_VSTM_dp()
1354  * Perform a 3-operand VFP data processing instruction. fn is the
1356  * code to handle looping around for VFP vector processing.
1363     int veclen = s->vec_len;  in do_vfp_3op_sp()
1372         (veclen != 0 || s->vec_stride != 0)) {  in do_vfp_3op_sp()
1386             delta_d = s->vec_stride + 1;  in do_vfp_3op_sp()
1418         veclen--;  in do_vfp_3op_sp()
1434      * Do a half-precision operation. Functionally this is  in do_vfp_3op_hp()
1436      *  - it uses the FPST_A32_F16  in do_vfp_3op_hp()
1437      *  - it doesn't need the VFP vector handling (fp16 is a  in do_vfp_3op_hp()
1439      *  - it does the aa32_fp16_arith feature test  in do_vfp_3op_hp()
1448     if (s->vec_len != 0 || s->vec_stride != 0) {  in do_vfp_3op_hp()
1477     int veclen = s->vec_len;  in do_vfp_3op_dp()
1485     /* UNDEF accesses to D16-D31 if they don't exist */  in do_vfp_3op_dp()
1491         (veclen != 0 || s->vec_stride != 0)) {  in do_vfp_3op_dp()
1505             delta_d = (s->vec_stride >> 1) + 1;  in do_vfp_3op_dp()
1536         veclen--;  in do_vfp_3op_dp()
1552     int veclen = s->vec_len;  in do_vfp_2op_sp()
1558         (veclen != 0 || s->vec_stride != 0)) {  in do_vfp_2op_sp()
1572             delta_d = s->vec_stride + 1;  in do_vfp_2op_sp()
1598             /* single source one-many */  in do_vfp_2op_sp()
1599             while (veclen--) {  in do_vfp_2op_sp()
1607         veclen--;  in do_vfp_2op_sp()
1618      * Do a half-precision operation. Functionally this is  in do_vfp_2op_hp()
1620      *  - it doesn't need the VFP vector handling (fp16 is a  in do_vfp_2op_hp()
1622      *  - it does the aa32_fp16_arith feature test  in do_vfp_2op_hp()
1632     if (s->vec_len != 0 || s->vec_stride != 0) {  in do_vfp_2op_hp()
1652     int veclen = s->vec_len;  in do_vfp_2op_dp()
1657     /* UNDEF accesses to D16-D31 if they don't exist */  in do_vfp_2op_dp()
1663         (veclen != 0 || s->vec_stride != 0)) {  in do_vfp_2op_dp()
1677             delta_d = (s->vec_stride >> 1) + 1;  in do_vfp_2op_dp()
1703             /* single source one-many */  in do_vfp_2op_dp()
1704             while (veclen--) {  in do_vfp_2op_dp()
1712         veclen--;  in do_vfp_2op_dp()
1731     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);  in trans_VMLA_hp()
1745     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);  in trans_VMLA_sp()
1759     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);  in trans_VMLA_dp()
1765      * VMLS: vd = vd + -(vn * vm)  in gen_VMLS_hp()
1777     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);  in trans_VMLS_hp()
1783      * VMLS: vd = vd + -(vn * vm)  in gen_VMLS_sp()
1795     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);  in trans_VMLS_sp()
1801      * VMLS: vd = vd + -(vn * vm)  in gen_VMLS_dp()
1813     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);  in trans_VMLS_dp()
1819      * VNMLS: -fd + (fn * fm)  in gen_VNMLS_hp()
1820      * Note that it isn't valid to replace (-A + B) with (B - A) or similar  in gen_VNMLS_hp()
1833     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);  in trans_VNMLS_hp()
1839      * VNMLS: -fd + (fn * fm)  in gen_VNMLS_sp()
1840      * Note that it isn't valid to replace (-A + B) with (B - A) or similar  in gen_VNMLS_sp()
1853     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);  in trans_VNMLS_sp()
1859      * VNMLS: -fd + (fn * fm)  in gen_VNMLS_dp()
1860      * Note that it isn't valid to replace (-A + B) with (B - A) or similar  in gen_VNMLS_dp()
1873     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);  in trans_VNMLS_dp()
1878     /* VNMLA: -fd + -(fn * fm) */  in gen_VNMLA_hp()
1889     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);  in trans_VNMLA_hp()
1894     /* VNMLA: -fd + -(fn * fm) */  in gen_VNMLA_sp()
1905     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);  in trans_VNMLA_sp()
1910     /* VNMLA: -fd + (fn * fm) */  in gen_VNMLA_dp()
1921     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);  in trans_VNMLA_dp()
1926     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);  in trans_VMUL_hp()
1931     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);  in trans_VMUL_sp()
1936     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);  in trans_VMUL_dp()
1941     /* VNMUL: -(fn * fm) */  in gen_VNMUL_hp()
1948     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);  in trans_VNMUL_hp()
1953     /* VNMUL: -(fn * fm) */  in gen_VNMUL_sp()
1960     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);  in trans_VNMUL_sp()
1965     /* VNMUL: -(fn * fm) */  in gen_VNMUL_dp()
1972     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);  in trans_VNMUL_dp()
1977     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);  in trans_VADD_hp()
1982     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);  in trans_VADD_sp()
1987     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);  in trans_VADD_dp()
1992     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);  in trans_VSUB_hp()
1997     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);  in trans_VSUB_sp()
2002     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);  in trans_VSUB_dp()
2007     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);  in trans_VDIV_hp()
2012     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);  in trans_VDIV_sp()
2017     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);  in trans_VDIV_dp()
2026                          a->vd, a->vn, a->vm, false);  in trans_VMINNM_hp()
2035                          a->vd, a->vn, a->vm, false);  in trans_VMAXNM_hp()
2044                          a->vd, a->vn, a->vm, false);  in trans_VMINNM_sp()
2053                          a->vd, a->vn, a->vm, false);  in trans_VMAXNM_sp()
2062                          a->vd, a->vn, a->vm, false);  in trans_VMINNM_dp()
2071                          a->vd, a->vn, a->vm, false);  in trans_VMAXNM_dp()
2077      * VFNMA : fd = muladd(-fd,  fn, fm)  in do_vfm_hp()
2078      * VFNMS : fd = muladd(-fd, -fn, fm)  in do_vfm_hp()
2080      * VFMS  : fd = muladd( fd, -fn, fm)  in do_vfm_hp()
2082      * These are fused multiply-add, and must be done as one floating  in do_vfm_hp()
2086      * bit flipped if it is a negated-input.  in do_vfm_hp()
2094      * in a Neon-no-VFP core that ID register field will be non-zero.  in do_vfm_hp()
2102     if (s->vec_len != 0 || s->vec_stride != 0) {  in do_vfm_hp()
2114     vfp_load_reg16(vn, a->vn);  in do_vfm_hp()
2115     vfp_load_reg16(vm, a->vm);  in do_vfm_hp()
2120     vfp_load_reg16(vd, a->vd);  in do_vfm_hp()
2127     vfp_store_reg32(vd, a->vd);  in do_vfm_hp()
2134      * VFNMA : fd = muladd(-fd,  fn, fm)  in do_vfm_sp()
2135      * VFNMS : fd = muladd(-fd, -fn, fm)  in do_vfm_sp()
2137      * VFMS  : fd = muladd( fd, -fn, fm)  in do_vfm_sp()
2139      * These are fused multiply-add, and must be done as one floating  in do_vfm_sp()
2143      * bit flipped if it is a negated-input.  in do_vfm_sp()
2151      * in a Neon-no-VFP core that ID register field will be non-zero.  in do_vfm_sp()
2158      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from  in do_vfm_sp()
2161     if (s->vec_len != 0 || s->vec_stride != 0) {  in do_vfm_sp()
2173     vfp_load_reg32(vn, a->vn);  in do_vfm_sp()
2174     vfp_load_reg32(vm, a->vm);  in do_vfm_sp()
2179     vfp_load_reg32(vd, a->vd);  in do_vfm_sp()
2186     vfp_store_reg32(vd, a->vd);  in do_vfm_sp()
2193      * VFNMA : fd = muladd(-fd, -fn, fm)  in do_vfm_dp()
2194      * VFNMS : fd = muladd(-fd,  fn, fm)  in do_vfm_dp()
2196      * VFMS  : fd = muladd( fd, -fn, fm)  in do_vfm_dp()
2198      * These are fused multiply-add, and must be done as one floating  in do_vfm_dp()
2202      * bit flipped if it is a negated-input.  in do_vfm_dp()
2210      * in a Neon-no-VFP core that ID register field will be non-zero.  in do_vfm_dp()
2217      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from  in do_vfm_dp()
2220     if (s->vec_len != 0 || s->vec_stride != 0) {  in do_vfm_dp()
2224     /* UNDEF accesses to D16-D31 if they don't exist. */  in do_vfm_dp()
2226         ((a->vd | a->vn | a->vm) & 0x10)) {  in do_vfm_dp()
2238     vfp_load_reg64(vn, a->vn);  in do_vfm_dp()
2239     vfp_load_reg64(vm, a->vm);  in do_vfm_dp()
2244     vfp_load_reg64(vd, a->vd);  in do_vfm_dp()
2251     vfp_store_reg64(vd, a->vd);  in do_vfm_dp()
2278     if (s->vec_len != 0 || s->vec_stride != 0) {  in MAKE_VFM_TRANS_FNS()
2286     vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);  in MAKE_VFM_TRANS_FNS()
2293     int veclen = s->vec_len;  in trans_VMOV_imm_sp()
2297     vd = a->vd;  in trans_VMOV_imm_sp()
2304         (veclen != 0 || s->vec_stride != 0)) {  in trans_VMOV_imm_sp()
2318             delta_d = s->vec_stride + 1;  in trans_VMOV_imm_sp()
2322     fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));  in trans_VMOV_imm_sp()
2332         veclen--;  in trans_VMOV_imm_sp()
2342     int veclen = s->vec_len;  in trans_VMOV_imm_dp()
2346     vd = a->vd;  in trans_VMOV_imm_dp()
2352     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VMOV_imm_dp()
2358         (veclen != 0 || s->vec_stride != 0)) {  in trans_VMOV_imm_dp()
2372             delta_d = (s->vec_stride >> 1) + 1;  in trans_VMOV_imm_dp()
2376     fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));  in trans_VMOV_imm_dp()
2386         veclen--;  in trans_VMOV_imm_dp()
2400         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2411         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2453     if (a->z && a->vm != 0) {  in DO_VFP_2OP()
2464     vfp_load_reg16(vd, a->vd);  in DO_VFP_2OP()
2465     if (a->z) {  in DO_VFP_2OP()
2468         vfp_load_reg16(vm, a->vm);  in DO_VFP_2OP()
2471     if (a->e) {  in DO_VFP_2OP()
2488     if (a->z && a->vm != 0) {  in trans_VCMP_sp()
2499     vfp_load_reg32(vd, a->vd);  in trans_VCMP_sp()
2500     if (a->z) {  in trans_VCMP_sp()
2503         vfp_load_reg32(vm, a->vm);  in trans_VCMP_sp()
2506     if (a->e) {  in trans_VCMP_sp()
2523     if (a->z && a->vm != 0) {  in trans_VCMP_dp()
2527     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCMP_dp()
2528     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {  in trans_VCMP_dp()
2539     vfp_load_reg64(vd, a->vd);  in trans_VCMP_dp()
2540     if (a->z) {  in trans_VCMP_dp()
2543         vfp_load_reg64(vm, a->vm);  in trans_VCMP_dp()
2546     if (a->e) {  in trans_VCMP_dp()
2572     tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));  in trans_VCVT_f32_f16()
2574     vfp_store_reg32(tmp, a->vd);  in trans_VCVT_f32_f16()
2593     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_f64_f16()
2594     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {  in trans_VCVT_f64_f16()
2606     tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));  in trans_VCVT_f64_f16()
2609     vfp_store_reg64(vd, a->vd);  in trans_VCVT_f64_f16()
2629     vfp_load_reg32(tmp, a->vm);  in trans_VCVT_b16_f32()
2631     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));  in trans_VCVT_b16_f32()
2653     vfp_load_reg32(tmp, a->vm);  in trans_VCVT_f16_f32()
2655     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));  in trans_VCVT_f16_f32()
2674     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_f16_f64()
2675     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {  in trans_VCVT_f16_f64()
2688     vfp_load_reg64(vm, a->vm);  in trans_VCVT_f16_f64()
2690     tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));  in trans_VCVT_f16_f64()
2708     vfp_load_reg16(tmp, a->vm);  in trans_VRINTR_hp()
2711     vfp_store_reg32(tmp, a->vd);  in trans_VRINTR_hp()
2729     vfp_load_reg32(tmp, a->vm);  in trans_VRINTR_sp()
2732     vfp_store_reg32(tmp, a->vd);  in trans_VRINTR_sp()
2749     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VRINTR_dp()
2750     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {  in trans_VRINTR_dp()
2759     vfp_load_reg64(tmp, a->vm);  in trans_VRINTR_dp()
2762     vfp_store_reg64(tmp, a->vd);  in trans_VRINTR_dp()
2781     vfp_load_reg16(tmp, a->vm);  in trans_VRINTZ_hp()
2786     vfp_store_reg32(tmp, a->vd);  in trans_VRINTZ_hp()
2805     vfp_load_reg32(tmp, a->vm);  in trans_VRINTZ_sp()
2810     vfp_store_reg32(tmp, a->vd);  in trans_VRINTZ_sp()
2828     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VRINTZ_dp()
2829     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {  in trans_VRINTZ_dp()
2838     vfp_load_reg64(tmp, a->vm);  in trans_VRINTZ_dp()
2843     vfp_store_reg64(tmp, a->vd);  in trans_VRINTZ_dp()
2861     vfp_load_reg16(tmp, a->vm);  in trans_VRINTX_hp()
2864     vfp_store_reg32(tmp, a->vd);  in trans_VRINTX_hp()
2882     vfp_load_reg32(tmp, a->vm);  in trans_VRINTX_sp()
2885     vfp_store_reg32(tmp, a->vd);  in trans_VRINTX_sp()
2902     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VRINTX_dp()
2903     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {  in trans_VRINTX_dp()
2912     vfp_load_reg64(tmp, a->vm);  in trans_VRINTX_dp()
2915     vfp_store_reg64(tmp, a->vd);  in trans_VRINTX_dp()
2928     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_sp()
2929     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {  in trans_VCVT_sp()
2939     vfp_load_reg32(vm, a->vm);  in trans_VCVT_sp()
2941     vfp_store_reg64(vd, a->vd);  in trans_VCVT_sp()
2954     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_dp()
2955     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {  in trans_VCVT_dp()
2965     vfp_load_reg64(vm, a->vm);  in trans_VCVT_dp()
2967     vfp_store_reg32(vd, a->vd);  in trans_VCVT_dp()
2985     vfp_load_reg32(vm, a->vm);  in trans_VCVT_int_hp()
2987     if (a->s) {  in trans_VCVT_int_hp()
2988         /* i32 -> f16 */  in trans_VCVT_int_hp()
2991         /* u32 -> f16 */  in trans_VCVT_int_hp()
2994     vfp_store_reg32(vm, a->vd);  in trans_VCVT_int_hp()
3012     vfp_load_reg32(vm, a->vm);  in trans_VCVT_int_sp()
3014     if (a->s) {  in trans_VCVT_int_sp()
3015         /* i32 -> f32 */  in trans_VCVT_int_sp()
3018         /* u32 -> f32 */  in trans_VCVT_int_sp()
3021     vfp_store_reg32(vm, a->vd);  in trans_VCVT_int_sp()
3035     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_int_dp()
3036     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {  in trans_VCVT_int_dp()
3046     vfp_load_reg32(vm, a->vm);  in trans_VCVT_int_dp()
3048     if (a->s) {  in trans_VCVT_int_dp()
3049         /* i32 -> f64 */  in trans_VCVT_int_dp()
3052         /* u32 -> f64 */  in trans_VCVT_int_dp()
3055     vfp_store_reg64(vd, a->vd);  in trans_VCVT_int_dp()
3072     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VJCVT()
3073     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {  in trans_VJCVT()
3083     vfp_load_reg64(vm, a->vm);  in trans_VJCVT()
3085     vfp_store_reg32(vd, a->vd);  in trans_VJCVT()
3103     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);  in trans_VCVT_fix_hp()
3106     vfp_load_reg32(vd, a->vd);  in trans_VCVT_fix_hp()
3112     switch (a->opc) {  in trans_VCVT_fix_hp()
3141     vfp_store_reg32(vd, a->vd);  in trans_VCVT_fix_hp()
3159     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);  in trans_VCVT_fix_sp()
3162     vfp_load_reg32(vd, a->vd);  in trans_VCVT_fix_sp()
3168     switch (a->opc) {  in trans_VCVT_fix_sp()
3197     vfp_store_reg32(vd, a->vd);  in trans_VCVT_fix_sp()
3212     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_fix_dp()
3213     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {  in trans_VCVT_fix_dp()
3221     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);  in trans_VCVT_fix_dp()
3224     vfp_load_reg64(vd, a->vd);  in trans_VCVT_fix_dp()
3230     switch (a->opc) {  in trans_VCVT_fix_dp()
3259     vfp_store_reg64(vd, a->vd);  in trans_VCVT_fix_dp()
3278     vfp_load_reg16(vm, a->vm);  in trans_VCVT_hp_int()
3280     if (a->s) {  in trans_VCVT_hp_int()
3281         if (a->rz) {  in trans_VCVT_hp_int()
3287         if (a->rz) {  in trans_VCVT_hp_int()
3293     vfp_store_reg32(vm, a->vd);  in trans_VCVT_hp_int()
3312     vfp_load_reg32(vm, a->vm);  in trans_VCVT_sp_int()
3314     if (a->s) {  in trans_VCVT_sp_int()
3315         if (a->rz) {  in trans_VCVT_sp_int()
3321         if (a->rz) {  in trans_VCVT_sp_int()
3327     vfp_store_reg32(vm, a->vd);  in trans_VCVT_sp_int()
3341     /* UNDEF accesses to D16-D31 if they don't exist. */  in trans_VCVT_dp_int()
3342     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {  in trans_VCVT_dp_int()
3353     vfp_load_reg64(vm, a->vm);  in trans_VCVT_dp_int()
3355     if (a->s) {  in trans_VCVT_dp_int()
3356         if (a->rz) {  in trans_VCVT_dp_int()
3362         if (a->rz) {  in trans_VCVT_dp_int()
3368     vfp_store_reg32(vd, a->vd);  in trans_VCVT_dp_int()
3380     if (s->vec_len != 0 || s->vec_stride != 0) {  in trans_VINS()
3391     vfp_load_reg16(rm, a->vm);  in trans_VINS()
3392     vfp_load_reg16(rd, a->vd);  in trans_VINS()
3394     vfp_store_reg32(rd, a->vd);  in trans_VINS()
3406     if (s->vec_len != 0 || s->vec_stride != 0) {  in trans_VMOVX()
3416     vfp_load_reg32(rm, a->vm);  in trans_VMOVX()
3418     vfp_store_reg32(rm, a->vd);  in trans_VMOVX()