1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 53 54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 55 static inline long mve_qreg_offset(unsigned reg) 56 { 57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 58 } 59 60 static TCGv_ptr mve_qreg_ptr(unsigned reg) 61 { 62 TCGv_ptr ret = tcg_temp_new_ptr(); 63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 64 return ret; 65 } 66 67 static bool mve_no_predication(DisasContext *s) 68 { 69 /* 70 * Return true if we are executing the entire MVE instruction 71 * with no predication or partial-execution, and so we can safely 72 * use an inline TCG vector implementation. 73 */ 74 return s->eci == 0 && s->mve_no_pred; 75 } 76 77 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 78 { 79 /* 80 * Check whether Qregs are in range. For v8.1M only Q0..Q7 81 * are supported, see VFPSmallRegisterBank(). 82 */ 83 return qmask < 8; 84 } 85 86 bool mve_eci_check(DisasContext *s) 87 { 88 /* 89 * This is a beatwise insn: check that ECI is valid (not a 90 * reserved value) and note that we are handling it. 91 * Return true if OK, false if we generated an exception. 92 */ 93 s->eci_handled = true; 94 switch (s->eci) { 95 case ECI_NONE: 96 case ECI_A0: 97 case ECI_A0A1: 98 case ECI_A0A1A2: 99 case ECI_A0A1A2B0: 100 return true; 101 default: 102 /* Reserved value: INVSTATE UsageFault */ 103 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 104 default_exception_el(s)); 105 return false; 106 } 107 } 108 109 void mve_update_eci(DisasContext *s) 110 { 111 /* 112 * The helper function will always update the CPUState field, 113 * so we only need to update the DisasContext field. 114 */ 115 if (s->eci) { 116 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 117 } 118 } 119 120 void mve_update_and_store_eci(DisasContext *s) 121 { 122 /* 123 * For insns which don't call a helper function that will call 124 * mve_advance_vpt(), this version updates s->eci and also stores 125 * it out to the CPUState field. 126 */ 127 if (s->eci) { 128 mve_update_eci(s); 129 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 130 } 131 } 132 133 static bool mve_skip_first_beat(DisasContext *s) 134 { 135 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 136 switch (s->eci) { 137 case ECI_NONE: 138 return false; 139 case ECI_A0: 140 case ECI_A0A1: 141 case ECI_A0A1A2: 142 case ECI_A0A1A2B0: 143 return true; 144 default: 145 g_assert_not_reached(); 146 } 147 } 148 149 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 150 unsigned msize) 151 { 152 TCGv_i32 addr; 153 uint32_t offset; 154 TCGv_ptr qreg; 155 156 if (!dc_isar_feature(aa32_mve, s) || 157 !mve_check_qreg_bank(s, a->qd) || 158 !fn) { 159 return false; 160 } 161 162 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 163 if (a->rn == 15 || (a->rn == 13 && a->w)) { 164 return false; 165 } 166 167 if (!mve_eci_check(s) || !vfp_access_check(s)) { 168 return true; 169 } 170 171 offset = a->imm << msize; 172 if (!a->a) { 173 offset = -offset; 174 } 175 addr = load_reg(s, a->rn); 176 if (a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 180 qreg = mve_qreg_ptr(a->qd); 181 fn(cpu_env, qreg, addr); 182 tcg_temp_free_ptr(qreg); 183 184 /* 185 * Writeback always happens after the last beat of the insn, 186 * regardless of predication 187 */ 188 if (a->w) { 189 if (!a->p) { 190 tcg_gen_addi_i32(addr, addr, offset); 191 } 192 store_reg(s, a->rn, addr); 193 } else { 194 tcg_temp_free_i32(addr); 195 } 196 mve_update_eci(s); 197 return true; 198 } 199 200 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 201 { 202 static MVEGenLdStFn * const ldstfns[4][2] = { 203 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 204 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 205 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 206 { NULL, NULL } 207 }; 208 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 209 } 210 211 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 212 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 213 { \ 214 static MVEGenLdStFn * const ldstfns[2][2] = { \ 215 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 216 { NULL, gen_helper_mve_##ULD }, \ 217 }; \ 218 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 219 } 220 221 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 222 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 223 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 224 225 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 226 { 227 TCGv_i32 addr; 228 TCGv_ptr qd, qm; 229 230 if (!dc_isar_feature(aa32_mve, s) || 231 !mve_check_qreg_bank(s, a->qd | a->qm) || 232 !fn || a->rn == 15) { 233 /* Rn case is UNPREDICTABLE */ 234 return false; 235 } 236 237 if (!mve_eci_check(s) || !vfp_access_check(s)) { 238 return true; 239 } 240 241 addr = load_reg(s, a->rn); 242 243 qd = mve_qreg_ptr(a->qd); 244 qm = mve_qreg_ptr(a->qm); 245 fn(cpu_env, qd, qm, addr); 246 tcg_temp_free_ptr(qd); 247 tcg_temp_free_ptr(qm); 248 tcg_temp_free_i32(addr); 249 mve_update_eci(s); 250 return true; 251 } 252 253 /* 254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 255 * signextended to halfword elements in register". _os_ indicates that 256 * the offsets in Qm should be scaled by the element size. 257 */ 258 /* This macro is just to make the arrays more compact in these functions */ 259 #define F(N) gen_helper_mve_##N 260 261 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 262 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 263 { 264 static MVEGenLdStSGFn * const fns[2][4][4] = { { 265 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 266 { NULL, NULL, F(vldrh_sg_sw), NULL }, 267 { NULL, NULL, NULL, NULL }, 268 { NULL, NULL, NULL, NULL } 269 }, { 270 { NULL, NULL, NULL, NULL }, 271 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 272 { NULL, NULL, NULL, NULL }, 273 { NULL, NULL, NULL, NULL } 274 } 275 }; 276 if (a->qd == a->qm) { 277 return false; /* UNPREDICTABLE */ 278 } 279 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 280 } 281 282 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 283 { 284 static MVEGenLdStSGFn * const fns[2][4][4] = { { 285 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 286 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 287 { NULL, NULL, F(vldrw_sg_uw), NULL }, 288 { NULL, NULL, NULL, F(vldrd_sg_ud) } 289 }, { 290 { NULL, NULL, NULL, NULL }, 291 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 292 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 293 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 294 } 295 }; 296 if (a->qd == a->qm) { 297 return false; /* UNPREDICTABLE */ 298 } 299 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 300 } 301 302 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 303 { 304 static MVEGenLdStSGFn * const fns[2][4][4] = { { 305 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 306 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 307 { NULL, NULL, F(vstrw_sg_uw), NULL }, 308 { NULL, NULL, NULL, F(vstrd_sg_ud) } 309 }, { 310 { NULL, NULL, NULL, NULL }, 311 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 312 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 313 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 314 } 315 }; 316 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 317 } 318 319 #undef F 320 321 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 322 MVEGenLdStSGFn *fn, unsigned msize) 323 { 324 uint32_t offset; 325 TCGv_ptr qd, qm; 326 327 if (!dc_isar_feature(aa32_mve, s) || 328 !mve_check_qreg_bank(s, a->qd | a->qm) || 329 !fn) { 330 return false; 331 } 332 333 if (!mve_eci_check(s) || !vfp_access_check(s)) { 334 return true; 335 } 336 337 offset = a->imm << msize; 338 if (!a->a) { 339 offset = -offset; 340 } 341 342 qd = mve_qreg_ptr(a->qd); 343 qm = mve_qreg_ptr(a->qm); 344 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 345 tcg_temp_free_ptr(qd); 346 tcg_temp_free_ptr(qm); 347 mve_update_eci(s); 348 return true; 349 } 350 351 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 352 { 353 static MVEGenLdStSGFn * const fns[] = { 354 gen_helper_mve_vldrw_sg_uw, 355 gen_helper_mve_vldrw_sg_wb_uw, 356 }; 357 if (a->qd == a->qm) { 358 return false; /* UNPREDICTABLE */ 359 } 360 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 361 } 362 363 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 364 { 365 static MVEGenLdStSGFn * const fns[] = { 366 gen_helper_mve_vldrd_sg_ud, 367 gen_helper_mve_vldrd_sg_wb_ud, 368 }; 369 if (a->qd == a->qm) { 370 return false; /* UNPREDICTABLE */ 371 } 372 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 373 } 374 375 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 376 { 377 static MVEGenLdStSGFn * const fns[] = { 378 gen_helper_mve_vstrw_sg_uw, 379 gen_helper_mve_vstrw_sg_wb_uw, 380 }; 381 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 382 } 383 384 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 385 { 386 static MVEGenLdStSGFn * const fns[] = { 387 gen_helper_mve_vstrd_sg_ud, 388 gen_helper_mve_vstrd_sg_wb_ud, 389 }; 390 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 391 } 392 393 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 394 int addrinc) 395 { 396 TCGv_i32 rn; 397 398 if (!dc_isar_feature(aa32_mve, s) || 399 !mve_check_qreg_bank(s, a->qd) || 400 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 401 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 402 return false; 403 } 404 if (!mve_eci_check(s) || !vfp_access_check(s)) { 405 return true; 406 } 407 408 rn = load_reg(s, a->rn); 409 /* 410 * We pass the index of Qd, not a pointer, because the helper must 411 * access multiple Q registers starting at Qd and working up. 412 */ 413 fn(cpu_env, tcg_constant_i32(a->qd), rn); 414 415 if (a->w) { 416 tcg_gen_addi_i32(rn, rn, addrinc); 417 store_reg(s, a->rn, rn); 418 } else { 419 tcg_temp_free_i32(rn); 420 } 421 mve_update_and_store_eci(s); 422 return true; 423 } 424 425 /* This macro is just to make the arrays more compact in these functions */ 426 #define F(N) gen_helper_mve_##N 427 428 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 429 { 430 static MVEGenLdStIlFn * const fns[4][4] = { 431 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 432 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 433 { NULL, NULL, NULL, NULL }, 434 { NULL, NULL, NULL, NULL }, 435 }; 436 if (a->qd > 6) { 437 return false; 438 } 439 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 440 } 441 442 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 443 { 444 static MVEGenLdStIlFn * const fns[4][4] = { 445 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 446 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 447 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 448 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 449 }; 450 if (a->qd > 4) { 451 return false; 452 } 453 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 454 } 455 456 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 457 { 458 static MVEGenLdStIlFn * const fns[4][4] = { 459 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 460 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 461 { NULL, NULL, NULL, NULL }, 462 { NULL, NULL, NULL, NULL }, 463 }; 464 if (a->qd > 6) { 465 return false; 466 } 467 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 468 } 469 470 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 471 { 472 static MVEGenLdStIlFn * const fns[4][4] = { 473 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 474 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 475 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 476 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 477 }; 478 if (a->qd > 4) { 479 return false; 480 } 481 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 482 } 483 484 #undef F 485 486 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 487 { 488 TCGv_ptr qd; 489 TCGv_i32 rt; 490 491 if (!dc_isar_feature(aa32_mve, s) || 492 !mve_check_qreg_bank(s, a->qd)) { 493 return false; 494 } 495 if (a->rt == 13 || a->rt == 15) { 496 /* UNPREDICTABLE; we choose to UNDEF */ 497 return false; 498 } 499 if (!mve_eci_check(s) || !vfp_access_check(s)) { 500 return true; 501 } 502 503 rt = load_reg(s, a->rt); 504 if (mve_no_predication(s)) { 505 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt); 506 } else { 507 qd = mve_qreg_ptr(a->qd); 508 tcg_gen_dup_i32(a->size, rt, rt); 509 gen_helper_mve_vdup(cpu_env, qd, rt); 510 tcg_temp_free_ptr(qd); 511 } 512 tcg_temp_free_i32(rt); 513 mve_update_eci(s); 514 return true; 515 } 516 517 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, 518 GVecGen2Fn vecfn) 519 { 520 TCGv_ptr qd, qm; 521 522 if (!dc_isar_feature(aa32_mve, s) || 523 !mve_check_qreg_bank(s, a->qd | a->qm) || 524 !fn) { 525 return false; 526 } 527 528 if (!mve_eci_check(s) || !vfp_access_check(s)) { 529 return true; 530 } 531 532 if (vecfn && mve_no_predication(s)) { 533 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16); 534 } else { 535 qd = mve_qreg_ptr(a->qd); 536 qm = mve_qreg_ptr(a->qm); 537 fn(cpu_env, qd, qm); 538 tcg_temp_free_ptr(qd); 539 tcg_temp_free_ptr(qm); 540 } 541 mve_update_eci(s); 542 return true; 543 } 544 545 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 546 { 547 return do_1op_vec(s, a, fn, NULL); 548 } 549 550 #define DO_1OP_VEC(INSN, FN, VECFN) \ 551 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 552 { \ 553 static MVEGenOneOpFn * const fns[] = { \ 554 gen_helper_mve_##FN##b, \ 555 gen_helper_mve_##FN##h, \ 556 gen_helper_mve_##FN##w, \ 557 NULL, \ 558 }; \ 559 return do_1op_vec(s, a, fns[a->size], VECFN); \ 560 } 561 562 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL) 563 564 DO_1OP(VCLZ, vclz) 565 DO_1OP(VCLS, vcls) 566 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs) 567 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg) 568 DO_1OP(VQABS, vqabs) 569 DO_1OP(VQNEG, vqneg) 570 DO_1OP(VMAXA, vmaxa) 571 DO_1OP(VMINA, vmina) 572 573 /* 574 * For simple float/int conversions we use the fixed-point 575 * conversion helpers with a zero shift count 576 */ 577 #define DO_VCVT(INSN, HFN, SFN) \ 578 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 579 { \ 580 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 581 } \ 582 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 583 { \ 584 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 585 } \ 586 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 587 { \ 588 static MVEGenOneOpFn * const fns[] = { \ 589 NULL, \ 590 gen_##INSN##h, \ 591 gen_##INSN##s, \ 592 NULL, \ 593 }; \ 594 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 595 return false; \ 596 } \ 597 return do_1op(s, a, fns[a->size]); \ 598 } 599 600 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 601 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 602 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 603 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 604 605 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 606 enum arm_fprounding rmode, bool u) 607 { 608 /* 609 * Handle VCVT fp to int with specified rounding mode. 610 * This is a 1op fn but we must pass the rounding mode as 611 * an immediate to the helper. 612 */ 613 TCGv_ptr qd, qm; 614 static MVEGenVCVTRmodeFn * const fns[4][2] = { 615 { NULL, NULL }, 616 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 617 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 618 { NULL, NULL }, 619 }; 620 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 621 622 if (!dc_isar_feature(aa32_mve_fp, s) || 623 !mve_check_qreg_bank(s, a->qd | a->qm) || 624 !fn) { 625 return false; 626 } 627 628 if (!mve_eci_check(s) || !vfp_access_check(s)) { 629 return true; 630 } 631 632 qd = mve_qreg_ptr(a->qd); 633 qm = mve_qreg_ptr(a->qm); 634 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 635 tcg_temp_free_ptr(qd); 636 tcg_temp_free_ptr(qm); 637 mve_update_eci(s); 638 return true; 639 } 640 641 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 642 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 643 { \ 644 return do_vcvt_rmode(s, a, RMODE, U); \ 645 } \ 646 647 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 648 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 649 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 650 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 651 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 652 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 653 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 654 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 655 656 #define DO_VCVT_SH(INSN, FN) \ 657 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 658 { \ 659 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 660 return false; \ 661 } \ 662 return do_1op(s, a, gen_helper_mve_##FN); \ 663 } \ 664 665 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 666 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 667 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 668 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 669 670 #define DO_VRINT(INSN, RMODE) \ 671 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 672 { \ 673 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 674 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 675 } \ 676 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 677 { \ 678 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 679 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 680 } \ 681 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 682 { \ 683 static MVEGenOneOpFn * const fns[] = { \ 684 NULL, \ 685 gen_##INSN##h, \ 686 gen_##INSN##s, \ 687 NULL, \ 688 }; \ 689 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 690 return false; \ 691 } \ 692 return do_1op(s, a, fns[a->size]); \ 693 } 694 695 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 696 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 697 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 698 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 699 DO_VRINT(VRINTP, FPROUNDING_POSINF) 700 701 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 702 { 703 static MVEGenOneOpFn * const fns[] = { 704 NULL, 705 gen_helper_mve_vrintx_h, 706 gen_helper_mve_vrintx_s, 707 NULL, 708 }; 709 if (!dc_isar_feature(aa32_mve_fp, s)) { 710 return false; 711 } 712 return do_1op(s, a, fns[a->size]); 713 } 714 715 /* Narrowing moves: only size 0 and 1 are valid */ 716 #define DO_VMOVN(INSN, FN) \ 717 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 718 { \ 719 static MVEGenOneOpFn * const fns[] = { \ 720 gen_helper_mve_##FN##b, \ 721 gen_helper_mve_##FN##h, \ 722 NULL, \ 723 NULL, \ 724 }; \ 725 return do_1op(s, a, fns[a->size]); \ 726 } 727 728 DO_VMOVN(VMOVNB, vmovnb) 729 DO_VMOVN(VMOVNT, vmovnt) 730 DO_VMOVN(VQMOVUNB, vqmovunb) 731 DO_VMOVN(VQMOVUNT, vqmovunt) 732 DO_VMOVN(VQMOVN_BS, vqmovnbs) 733 DO_VMOVN(VQMOVN_TS, vqmovnts) 734 DO_VMOVN(VQMOVN_BU, vqmovnbu) 735 DO_VMOVN(VQMOVN_TU, vqmovntu) 736 737 static bool trans_VREV16(DisasContext *s, arg_1op *a) 738 { 739 static MVEGenOneOpFn * const fns[] = { 740 gen_helper_mve_vrev16b, 741 NULL, 742 NULL, 743 NULL, 744 }; 745 return do_1op(s, a, fns[a->size]); 746 } 747 748 static bool trans_VREV32(DisasContext *s, arg_1op *a) 749 { 750 static MVEGenOneOpFn * const fns[] = { 751 gen_helper_mve_vrev32b, 752 gen_helper_mve_vrev32h, 753 NULL, 754 NULL, 755 }; 756 return do_1op(s, a, fns[a->size]); 757 } 758 759 static bool trans_VREV64(DisasContext *s, arg_1op *a) 760 { 761 static MVEGenOneOpFn * const fns[] = { 762 gen_helper_mve_vrev64b, 763 gen_helper_mve_vrev64h, 764 gen_helper_mve_vrev64w, 765 NULL, 766 }; 767 return do_1op(s, a, fns[a->size]); 768 } 769 770 static bool trans_VMVN(DisasContext *s, arg_1op *a) 771 { 772 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not); 773 } 774 775 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 776 { 777 static MVEGenOneOpFn * const fns[] = { 778 NULL, 779 gen_helper_mve_vfabsh, 780 gen_helper_mve_vfabss, 781 NULL, 782 }; 783 if (!dc_isar_feature(aa32_mve_fp, s)) { 784 return false; 785 } 786 return do_1op(s, a, fns[a->size]); 787 } 788 789 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 790 { 791 static MVEGenOneOpFn * const fns[] = { 792 NULL, 793 gen_helper_mve_vfnegh, 794 gen_helper_mve_vfnegs, 795 NULL, 796 }; 797 if (!dc_isar_feature(aa32_mve_fp, s)) { 798 return false; 799 } 800 return do_1op(s, a, fns[a->size]); 801 } 802 803 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 804 GVecGen3Fn *vecfn) 805 { 806 TCGv_ptr qd, qn, qm; 807 808 if (!dc_isar_feature(aa32_mve, s) || 809 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 810 !fn) { 811 return false; 812 } 813 if (!mve_eci_check(s) || !vfp_access_check(s)) { 814 return true; 815 } 816 817 if (vecfn && mve_no_predication(s)) { 818 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 819 mve_qreg_offset(a->qm), 16, 16); 820 } else { 821 qd = mve_qreg_ptr(a->qd); 822 qn = mve_qreg_ptr(a->qn); 823 qm = mve_qreg_ptr(a->qm); 824 fn(cpu_env, qd, qn, qm); 825 tcg_temp_free_ptr(qd); 826 tcg_temp_free_ptr(qn); 827 tcg_temp_free_ptr(qm); 828 } 829 mve_update_eci(s); 830 return true; 831 } 832 833 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 834 { 835 return do_2op_vec(s, a, fn, NULL); 836 } 837 838 #define DO_LOGIC(INSN, HELPER, VECFN) \ 839 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 840 { \ 841 return do_2op_vec(s, a, HELPER, VECFN); \ 842 } 843 844 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 845 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 846 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 847 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 848 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 849 850 static bool trans_VPSEL(DisasContext *s, arg_2op *a) 851 { 852 /* This insn updates predication bits */ 853 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 854 return do_2op(s, a, gen_helper_mve_vpsel); 855 } 856 857 #define DO_2OP_VEC(INSN, FN, VECFN) \ 858 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 859 { \ 860 static MVEGenTwoOpFn * const fns[] = { \ 861 gen_helper_mve_##FN##b, \ 862 gen_helper_mve_##FN##h, \ 863 gen_helper_mve_##FN##w, \ 864 NULL, \ 865 }; \ 866 return do_2op_vec(s, a, fns[a->size], VECFN); \ 867 } 868 869 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL) 870 871 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add) 872 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub) 873 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul) 874 DO_2OP(VMULH_S, vmulhs) 875 DO_2OP(VMULH_U, vmulhu) 876 DO_2OP(VRMULH_S, vrmulhs) 877 DO_2OP(VRMULH_U, vrmulhu) 878 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax) 879 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax) 880 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin) 881 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin) 882 DO_2OP(VABD_S, vabds) 883 DO_2OP(VABD_U, vabdu) 884 DO_2OP(VHADD_S, vhadds) 885 DO_2OP(VHADD_U, vhaddu) 886 DO_2OP(VHSUB_S, vhsubs) 887 DO_2OP(VHSUB_U, vhsubu) 888 DO_2OP(VMULL_BS, vmullbs) 889 DO_2OP(VMULL_BU, vmullbu) 890 DO_2OP(VMULL_TS, vmullts) 891 DO_2OP(VMULL_TU, vmulltu) 892 DO_2OP(VQDMULH, vqdmulh) 893 DO_2OP(VQRDMULH, vqrdmulh) 894 DO_2OP(VQADD_S, vqadds) 895 DO_2OP(VQADD_U, vqaddu) 896 DO_2OP(VQSUB_S, vqsubs) 897 DO_2OP(VQSUB_U, vqsubu) 898 DO_2OP(VSHL_S, vshls) 899 DO_2OP(VSHL_U, vshlu) 900 DO_2OP(VRSHL_S, vrshls) 901 DO_2OP(VRSHL_U, vrshlu) 902 DO_2OP(VQSHL_S, vqshls) 903 DO_2OP(VQSHL_U, vqshlu) 904 DO_2OP(VQRSHL_S, vqrshls) 905 DO_2OP(VQRSHL_U, vqrshlu) 906 DO_2OP(VQDMLADH, vqdmladh) 907 DO_2OP(VQDMLADHX, vqdmladhx) 908 DO_2OP(VQRDMLADH, vqrdmladh) 909 DO_2OP(VQRDMLADHX, vqrdmladhx) 910 DO_2OP(VQDMLSDH, vqdmlsdh) 911 DO_2OP(VQDMLSDHX, vqdmlsdhx) 912 DO_2OP(VQRDMLSDH, vqrdmlsdh) 913 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 914 DO_2OP(VRHADD_S, vrhadds) 915 DO_2OP(VRHADD_U, vrhaddu) 916 /* 917 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 918 * so we can reuse the DO_2OP macro. (Our implementation calculates the 919 * "expected" results in this case.) Similarly for VHCADD. 920 */ 921 DO_2OP(VCADD90, vcadd90) 922 DO_2OP(VCADD270, vcadd270) 923 DO_2OP(VHCADD90, vhcadd90) 924 DO_2OP(VHCADD270, vhcadd270) 925 926 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 927 { 928 static MVEGenTwoOpFn * const fns[] = { 929 NULL, 930 gen_helper_mve_vqdmullbh, 931 gen_helper_mve_vqdmullbw, 932 NULL, 933 }; 934 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 935 /* UNPREDICTABLE; we choose to undef */ 936 return false; 937 } 938 return do_2op(s, a, fns[a->size]); 939 } 940 941 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 942 { 943 static MVEGenTwoOpFn * const fns[] = { 944 NULL, 945 gen_helper_mve_vqdmullth, 946 gen_helper_mve_vqdmulltw, 947 NULL, 948 }; 949 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 950 /* UNPREDICTABLE; we choose to undef */ 951 return false; 952 } 953 return do_2op(s, a, fns[a->size]); 954 } 955 956 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 957 { 958 /* 959 * Note that a->size indicates the output size, ie VMULL.P8 960 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 961 * is the 16x16->32 operation and a->size is MO_32. 962 */ 963 static MVEGenTwoOpFn * const fns[] = { 964 NULL, 965 gen_helper_mve_vmullpbh, 966 gen_helper_mve_vmullpbw, 967 NULL, 968 }; 969 return do_2op(s, a, fns[a->size]); 970 } 971 972 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 973 { 974 /* a->size is as for trans_VMULLP_B */ 975 static MVEGenTwoOpFn * const fns[] = { 976 NULL, 977 gen_helper_mve_vmullpth, 978 gen_helper_mve_vmullptw, 979 NULL, 980 }; 981 return do_2op(s, a, fns[a->size]); 982 } 983 984 /* 985 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 986 * of the 32-bit elements in each lane of the input vectors, where the 987 * carry-out of each add is the carry-in of the next. The initial carry 988 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 989 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 990 * These insns are subject to beat-wise execution. Partial execution 991 * of an I=1 (initial carry input fixed) insn which does not 992 * execute the first beat must start with the current FPSCR.NZCV 993 * value, not the fixed constant input. 994 */ 995 static bool trans_VADC(DisasContext *s, arg_2op *a) 996 { 997 return do_2op(s, a, gen_helper_mve_vadc); 998 } 999 1000 static bool trans_VADCI(DisasContext *s, arg_2op *a) 1001 { 1002 if (mve_skip_first_beat(s)) { 1003 return trans_VADC(s, a); 1004 } 1005 return do_2op(s, a, gen_helper_mve_vadci); 1006 } 1007 1008 static bool trans_VSBC(DisasContext *s, arg_2op *a) 1009 { 1010 return do_2op(s, a, gen_helper_mve_vsbc); 1011 } 1012 1013 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 1014 { 1015 if (mve_skip_first_beat(s)) { 1016 return trans_VSBC(s, a); 1017 } 1018 return do_2op(s, a, gen_helper_mve_vsbci); 1019 } 1020 1021 #define DO_2OP_FP(INSN, FN) \ 1022 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 1023 { \ 1024 static MVEGenTwoOpFn * const fns[] = { \ 1025 NULL, \ 1026 gen_helper_mve_##FN##h, \ 1027 gen_helper_mve_##FN##s, \ 1028 NULL, \ 1029 }; \ 1030 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1031 return false; \ 1032 } \ 1033 return do_2op(s, a, fns[a->size]); \ 1034 } 1035 1036 DO_2OP_FP(VADD_fp, vfadd) 1037 DO_2OP_FP(VSUB_fp, vfsub) 1038 DO_2OP_FP(VMUL_fp, vfmul) 1039 DO_2OP_FP(VABD_fp, vfabd) 1040 DO_2OP_FP(VMAXNM, vmaxnm) 1041 DO_2OP_FP(VMINNM, vminnm) 1042 DO_2OP_FP(VCADD90_fp, vfcadd90) 1043 DO_2OP_FP(VCADD270_fp, vfcadd270) 1044 DO_2OP_FP(VFMA, vfma) 1045 DO_2OP_FP(VFMS, vfms) 1046 DO_2OP_FP(VCMUL0, vcmul0) 1047 DO_2OP_FP(VCMUL90, vcmul90) 1048 DO_2OP_FP(VCMUL180, vcmul180) 1049 DO_2OP_FP(VCMUL270, vcmul270) 1050 DO_2OP_FP(VCMLA0, vcmla0) 1051 DO_2OP_FP(VCMLA90, vcmla90) 1052 DO_2OP_FP(VCMLA180, vcmla180) 1053 DO_2OP_FP(VCMLA270, vcmla270) 1054 DO_2OP_FP(VMAXNMA, vmaxnma) 1055 DO_2OP_FP(VMINNMA, vminnma) 1056 1057 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1058 MVEGenTwoOpScalarFn fn) 1059 { 1060 TCGv_ptr qd, qn; 1061 TCGv_i32 rm; 1062 1063 if (!dc_isar_feature(aa32_mve, s) || 1064 !mve_check_qreg_bank(s, a->qd | a->qn) || 1065 !fn) { 1066 return false; 1067 } 1068 if (a->rm == 13 || a->rm == 15) { 1069 /* UNPREDICTABLE */ 1070 return false; 1071 } 1072 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1073 return true; 1074 } 1075 1076 qd = mve_qreg_ptr(a->qd); 1077 qn = mve_qreg_ptr(a->qn); 1078 rm = load_reg(s, a->rm); 1079 fn(cpu_env, qd, qn, rm); 1080 tcg_temp_free_i32(rm); 1081 tcg_temp_free_ptr(qd); 1082 tcg_temp_free_ptr(qn); 1083 mve_update_eci(s); 1084 return true; 1085 } 1086 1087 #define DO_2OP_SCALAR(INSN, FN) \ 1088 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1089 { \ 1090 static MVEGenTwoOpScalarFn * const fns[] = { \ 1091 gen_helper_mve_##FN##b, \ 1092 gen_helper_mve_##FN##h, \ 1093 gen_helper_mve_##FN##w, \ 1094 NULL, \ 1095 }; \ 1096 return do_2op_scalar(s, a, fns[a->size]); \ 1097 } 1098 1099 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1100 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1101 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1102 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1103 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1104 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1105 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1106 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1107 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1108 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1109 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1110 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1111 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1112 DO_2OP_SCALAR(VBRSR, vbrsr) 1113 DO_2OP_SCALAR(VMLA, vmla) 1114 DO_2OP_SCALAR(VMLAS, vmlas) 1115 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1116 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1117 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1118 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1119 1120 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1121 { 1122 static MVEGenTwoOpScalarFn * const fns[] = { 1123 NULL, 1124 gen_helper_mve_vqdmullb_scalarh, 1125 gen_helper_mve_vqdmullb_scalarw, 1126 NULL, 1127 }; 1128 if (a->qd == a->qn && a->size == MO_32) { 1129 /* UNPREDICTABLE; we choose to undef */ 1130 return false; 1131 } 1132 return do_2op_scalar(s, a, fns[a->size]); 1133 } 1134 1135 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1136 { 1137 static MVEGenTwoOpScalarFn * const fns[] = { 1138 NULL, 1139 gen_helper_mve_vqdmullt_scalarh, 1140 gen_helper_mve_vqdmullt_scalarw, 1141 NULL, 1142 }; 1143 if (a->qd == a->qn && a->size == MO_32) { 1144 /* UNPREDICTABLE; we choose to undef */ 1145 return false; 1146 } 1147 return do_2op_scalar(s, a, fns[a->size]); 1148 } 1149 1150 1151 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1152 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1153 { \ 1154 static MVEGenTwoOpScalarFn * const fns[] = { \ 1155 NULL, \ 1156 gen_helper_mve_##FN##h, \ 1157 gen_helper_mve_##FN##s, \ 1158 NULL, \ 1159 }; \ 1160 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1161 return false; \ 1162 } \ 1163 return do_2op_scalar(s, a, fns[a->size]); \ 1164 } 1165 1166 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1167 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1168 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1169 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1170 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1171 1172 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1173 MVEGenLongDualAccOpFn *fn) 1174 { 1175 TCGv_ptr qn, qm; 1176 TCGv_i64 rda; 1177 TCGv_i32 rdalo, rdahi; 1178 1179 if (!dc_isar_feature(aa32_mve, s) || 1180 !mve_check_qreg_bank(s, a->qn | a->qm) || 1181 !fn) { 1182 return false; 1183 } 1184 /* 1185 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1186 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1187 */ 1188 if (a->rdahi == 13 || a->rdahi == 15) { 1189 return false; 1190 } 1191 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1192 return true; 1193 } 1194 1195 qn = mve_qreg_ptr(a->qn); 1196 qm = mve_qreg_ptr(a->qm); 1197 1198 /* 1199 * This insn is subject to beat-wise execution. Partial execution 1200 * of an A=0 (no-accumulate) insn which does not execute the first 1201 * beat must start with the current rda value, not 0. 1202 */ 1203 if (a->a || mve_skip_first_beat(s)) { 1204 rda = tcg_temp_new_i64(); 1205 rdalo = load_reg(s, a->rdalo); 1206 rdahi = load_reg(s, a->rdahi); 1207 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1208 tcg_temp_free_i32(rdalo); 1209 tcg_temp_free_i32(rdahi); 1210 } else { 1211 rda = tcg_const_i64(0); 1212 } 1213 1214 fn(rda, cpu_env, qn, qm, rda); 1215 tcg_temp_free_ptr(qn); 1216 tcg_temp_free_ptr(qm); 1217 1218 rdalo = tcg_temp_new_i32(); 1219 rdahi = tcg_temp_new_i32(); 1220 tcg_gen_extrl_i64_i32(rdalo, rda); 1221 tcg_gen_extrh_i64_i32(rdahi, rda); 1222 store_reg(s, a->rdalo, rdalo); 1223 store_reg(s, a->rdahi, rdahi); 1224 tcg_temp_free_i64(rda); 1225 mve_update_eci(s); 1226 return true; 1227 } 1228 1229 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1230 { 1231 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1232 { NULL, NULL }, 1233 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1234 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1235 { NULL, NULL }, 1236 }; 1237 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1238 } 1239 1240 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1241 { 1242 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1243 { NULL, NULL }, 1244 { gen_helper_mve_vmlaldavuh, NULL }, 1245 { gen_helper_mve_vmlaldavuw, NULL }, 1246 { NULL, NULL }, 1247 }; 1248 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1249 } 1250 1251 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1252 { 1253 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1254 { NULL, NULL }, 1255 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1256 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1257 { NULL, NULL }, 1258 }; 1259 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1260 } 1261 1262 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1263 { 1264 static MVEGenLongDualAccOpFn * const fns[] = { 1265 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1266 }; 1267 return do_long_dual_acc(s, a, fns[a->x]); 1268 } 1269 1270 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1271 { 1272 static MVEGenLongDualAccOpFn * const fns[] = { 1273 gen_helper_mve_vrmlaldavhuw, NULL, 1274 }; 1275 return do_long_dual_acc(s, a, fns[a->x]); 1276 } 1277 1278 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1279 { 1280 static MVEGenLongDualAccOpFn * const fns[] = { 1281 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1282 }; 1283 return do_long_dual_acc(s, a, fns[a->x]); 1284 } 1285 1286 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1287 { 1288 TCGv_ptr qn, qm; 1289 TCGv_i32 rda; 1290 1291 if (!dc_isar_feature(aa32_mve, s) || 1292 !mve_check_qreg_bank(s, a->qn) || 1293 !fn) { 1294 return false; 1295 } 1296 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1297 return true; 1298 } 1299 1300 qn = mve_qreg_ptr(a->qn); 1301 qm = mve_qreg_ptr(a->qm); 1302 1303 /* 1304 * This insn is subject to beat-wise execution. Partial execution 1305 * of an A=0 (no-accumulate) insn which does not execute the first 1306 * beat must start with the current rda value, not 0. 1307 */ 1308 if (a->a || mve_skip_first_beat(s)) { 1309 rda = load_reg(s, a->rda); 1310 } else { 1311 rda = tcg_const_i32(0); 1312 } 1313 1314 fn(rda, cpu_env, qn, qm, rda); 1315 store_reg(s, a->rda, rda); 1316 tcg_temp_free_ptr(qn); 1317 tcg_temp_free_ptr(qm); 1318 1319 mve_update_eci(s); 1320 return true; 1321 } 1322 1323 #define DO_DUAL_ACC(INSN, FN) \ 1324 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1325 { \ 1326 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1327 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1328 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1329 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1330 { NULL, NULL }, \ 1331 }; \ 1332 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1333 } 1334 1335 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1336 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1337 1338 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1339 { 1340 static MVEGenDualAccOpFn * const fns[4][2] = { 1341 { gen_helper_mve_vmladavub, NULL }, 1342 { gen_helper_mve_vmladavuh, NULL }, 1343 { gen_helper_mve_vmladavuw, NULL }, 1344 { NULL, NULL }, 1345 }; 1346 return do_dual_acc(s, a, fns[a->size][a->x]); 1347 } 1348 1349 static void gen_vpst(DisasContext *s, uint32_t mask) 1350 { 1351 /* 1352 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1353 * being adjacent fields in the register. 1354 * 1355 * Updating the masks is not predicated, but it is subject to beat-wise 1356 * execution, and the mask is updated on the odd-numbered beats. 1357 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1358 * 01 mask field. 1359 */ 1360 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1361 switch (s->eci) { 1362 case ECI_NONE: 1363 case ECI_A0: 1364 /* Update both 01 and 23 fields */ 1365 tcg_gen_deposit_i32(vpr, vpr, 1366 tcg_constant_i32(mask | (mask << 4)), 1367 R_V7M_VPR_MASK01_SHIFT, 1368 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1369 break; 1370 case ECI_A0A1: 1371 case ECI_A0A1A2: 1372 case ECI_A0A1A2B0: 1373 /* Update only the 23 mask field */ 1374 tcg_gen_deposit_i32(vpr, vpr, 1375 tcg_constant_i32(mask), 1376 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1377 break; 1378 default: 1379 g_assert_not_reached(); 1380 } 1381 store_cpu_field(vpr, v7m.vpr); 1382 } 1383 1384 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1385 { 1386 /* mask == 0 is a "related encoding" */ 1387 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1388 return false; 1389 } 1390 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1391 return true; 1392 } 1393 gen_vpst(s, a->mask); 1394 mve_update_and_store_eci(s); 1395 return true; 1396 } 1397 1398 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1399 { 1400 /* 1401 * Invert the predicate in VPR.P0. We have call out to 1402 * a helper because this insn itself is beatwise and can 1403 * be predicated. 1404 */ 1405 if (!dc_isar_feature(aa32_mve, s)) { 1406 return false; 1407 } 1408 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1409 return true; 1410 } 1411 1412 gen_helper_mve_vpnot(cpu_env); 1413 /* This insn updates predication bits */ 1414 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1415 mve_update_eci(s); 1416 return true; 1417 } 1418 1419 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1420 { 1421 /* VADDV: vector add across vector */ 1422 static MVEGenVADDVFn * const fns[4][2] = { 1423 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1424 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1425 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1426 { NULL, NULL } 1427 }; 1428 TCGv_ptr qm; 1429 TCGv_i32 rda; 1430 1431 if (!dc_isar_feature(aa32_mve, s) || 1432 a->size == 3) { 1433 return false; 1434 } 1435 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1436 return true; 1437 } 1438 1439 /* 1440 * This insn is subject to beat-wise execution. Partial execution 1441 * of an A=0 (no-accumulate) insn which does not execute the first 1442 * beat must start with the current value of Rda, not zero. 1443 */ 1444 if (a->a || mve_skip_first_beat(s)) { 1445 /* Accumulate input from Rda */ 1446 rda = load_reg(s, a->rda); 1447 } else { 1448 /* Accumulate starting at zero */ 1449 rda = tcg_const_i32(0); 1450 } 1451 1452 qm = mve_qreg_ptr(a->qm); 1453 fns[a->size][a->u](rda, cpu_env, qm, rda); 1454 store_reg(s, a->rda, rda); 1455 tcg_temp_free_ptr(qm); 1456 1457 mve_update_eci(s); 1458 return true; 1459 } 1460 1461 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1462 { 1463 /* 1464 * Vector Add Long Across Vector: accumulate the 32-bit 1465 * elements of the vector into a 64-bit result stored in 1466 * a pair of general-purpose registers. 1467 * No need to check Qm's bank: it is only 3 bits in decode. 1468 */ 1469 TCGv_ptr qm; 1470 TCGv_i64 rda; 1471 TCGv_i32 rdalo, rdahi; 1472 1473 if (!dc_isar_feature(aa32_mve, s)) { 1474 return false; 1475 } 1476 /* 1477 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1478 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1479 */ 1480 if (a->rdahi == 13 || a->rdahi == 15) { 1481 return false; 1482 } 1483 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1484 return true; 1485 } 1486 1487 /* 1488 * This insn is subject to beat-wise execution. Partial execution 1489 * of an A=0 (no-accumulate) insn which does not execute the first 1490 * beat must start with the current value of RdaHi:RdaLo, not zero. 1491 */ 1492 if (a->a || mve_skip_first_beat(s)) { 1493 /* Accumulate input from RdaHi:RdaLo */ 1494 rda = tcg_temp_new_i64(); 1495 rdalo = load_reg(s, a->rdalo); 1496 rdahi = load_reg(s, a->rdahi); 1497 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1498 tcg_temp_free_i32(rdalo); 1499 tcg_temp_free_i32(rdahi); 1500 } else { 1501 /* Accumulate starting at zero */ 1502 rda = tcg_const_i64(0); 1503 } 1504 1505 qm = mve_qreg_ptr(a->qm); 1506 if (a->u) { 1507 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1508 } else { 1509 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1510 } 1511 tcg_temp_free_ptr(qm); 1512 1513 rdalo = tcg_temp_new_i32(); 1514 rdahi = tcg_temp_new_i32(); 1515 tcg_gen_extrl_i64_i32(rdalo, rda); 1516 tcg_gen_extrh_i64_i32(rdahi, rda); 1517 store_reg(s, a->rdalo, rdalo); 1518 store_reg(s, a->rdahi, rdahi); 1519 tcg_temp_free_i64(rda); 1520 mve_update_eci(s); 1521 return true; 1522 } 1523 1524 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1525 { 1526 TCGv_ptr qd; 1527 uint64_t imm; 1528 1529 if (!dc_isar_feature(aa32_mve, s) || 1530 !mve_check_qreg_bank(s, a->qd) || 1531 !fn) { 1532 return false; 1533 } 1534 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1535 return true; 1536 } 1537 1538 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1539 1540 qd = mve_qreg_ptr(a->qd); 1541 fn(cpu_env, qd, tcg_constant_i64(imm)); 1542 tcg_temp_free_ptr(qd); 1543 mve_update_eci(s); 1544 return true; 1545 } 1546 1547 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1548 { 1549 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1550 MVEGenOneOpImmFn *fn; 1551 1552 if ((a->cmode & 1) && a->cmode < 12) { 1553 if (a->op) { 1554 /* 1555 * For op=1, the immediate will be inverted by asimd_imm_const(), 1556 * so the VBIC becomes a logical AND operation. 1557 */ 1558 fn = gen_helper_mve_vandi; 1559 } else { 1560 fn = gen_helper_mve_vorri; 1561 } 1562 } else { 1563 /* There is one unallocated cmode/op combination in this space */ 1564 if (a->cmode == 15 && a->op == 1) { 1565 return false; 1566 } 1567 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1568 fn = gen_helper_mve_vmovi; 1569 } 1570 return do_1imm(s, a, fn); 1571 } 1572 1573 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1574 bool negateshift, GVecGen2iFn vecfn) 1575 { 1576 TCGv_ptr qd, qm; 1577 int shift = a->shift; 1578 1579 if (!dc_isar_feature(aa32_mve, s) || 1580 !mve_check_qreg_bank(s, a->qd | a->qm) || 1581 !fn) { 1582 return false; 1583 } 1584 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1585 return true; 1586 } 1587 1588 /* 1589 * When we handle a right shift insn using a left-shift helper 1590 * which permits a negative shift count to indicate a right-shift, 1591 * we must negate the shift count. 1592 */ 1593 if (negateshift) { 1594 shift = -shift; 1595 } 1596 1597 if (vecfn && mve_no_predication(s)) { 1598 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 1599 shift, 16, 16); 1600 } else { 1601 qd = mve_qreg_ptr(a->qd); 1602 qm = mve_qreg_ptr(a->qm); 1603 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1604 tcg_temp_free_ptr(qd); 1605 tcg_temp_free_ptr(qm); 1606 } 1607 mve_update_eci(s); 1608 return true; 1609 } 1610 1611 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1612 bool negateshift) 1613 { 1614 return do_2shift_vec(s, a, fn, negateshift, NULL); 1615 } 1616 1617 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \ 1618 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1619 { \ 1620 static MVEGenTwoOpShiftFn * const fns[] = { \ 1621 gen_helper_mve_##FN##b, \ 1622 gen_helper_mve_##FN##h, \ 1623 gen_helper_mve_##FN##w, \ 1624 NULL, \ 1625 }; \ 1626 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \ 1627 } 1628 1629 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1630 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL) 1631 1632 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs, 1633 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1634 { 1635 /* 1636 * We get here with a negated shift count, and we must handle 1637 * shifts by the element size, which tcg_gen_gvec_sari() does not do. 1638 */ 1639 shift = -shift; 1640 if (shift == (8 << vece)) { 1641 shift--; 1642 } 1643 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz); 1644 } 1645 1646 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs, 1647 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1648 { 1649 /* 1650 * We get here with a negated shift count, and we must handle 1651 * shifts by the element size, which tcg_gen_gvec_shri() does not do. 1652 */ 1653 shift = -shift; 1654 if (shift == (8 << vece)) { 1655 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0); 1656 } else { 1657 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz); 1658 } 1659 } 1660 1661 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli) 1662 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1663 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1664 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1665 /* These right shifts use a left-shift helper with negated shift count */ 1666 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s) 1667 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u) 1668 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1669 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1670 1671 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri) 1672 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli) 1673 1674 #define DO_2SHIFT_FP(INSN, FN) \ 1675 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1676 { \ 1677 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1678 return false; \ 1679 } \ 1680 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1681 } 1682 1683 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1684 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1685 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1686 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1687 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1688 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1689 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1690 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1691 1692 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1693 MVEGenTwoOpShiftFn *fn) 1694 { 1695 TCGv_ptr qda; 1696 TCGv_i32 rm; 1697 1698 if (!dc_isar_feature(aa32_mve, s) || 1699 !mve_check_qreg_bank(s, a->qda) || 1700 a->rm == 13 || a->rm == 15 || !fn) { 1701 /* Rm cases are UNPREDICTABLE */ 1702 return false; 1703 } 1704 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1705 return true; 1706 } 1707 1708 qda = mve_qreg_ptr(a->qda); 1709 rm = load_reg(s, a->rm); 1710 fn(cpu_env, qda, qda, rm); 1711 tcg_temp_free_ptr(qda); 1712 tcg_temp_free_i32(rm); 1713 mve_update_eci(s); 1714 return true; 1715 } 1716 1717 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1718 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1719 { \ 1720 static MVEGenTwoOpShiftFn * const fns[] = { \ 1721 gen_helper_mve_##FN##b, \ 1722 gen_helper_mve_##FN##h, \ 1723 gen_helper_mve_##FN##w, \ 1724 NULL, \ 1725 }; \ 1726 return do_2shift_scalar(s, a, fns[a->size]); \ 1727 } 1728 1729 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1730 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1731 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1732 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1733 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1734 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1735 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1736 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1737 1738 #define DO_VSHLL(INSN, FN) \ 1739 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1740 { \ 1741 static MVEGenTwoOpShiftFn * const fns[] = { \ 1742 gen_helper_mve_##FN##b, \ 1743 gen_helper_mve_##FN##h, \ 1744 }; \ 1745 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \ 1746 } 1747 1748 /* 1749 * For the VSHLL vector helpers, the vece is the size of the input 1750 * (ie MO_8 or MO_16); the helpers want to work in the output size. 1751 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.) 1752 */ 1753 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs, 1754 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1755 { 1756 unsigned ovece = vece + 1; 1757 unsigned ibits = vece == MO_8 ? 8 : 16; 1758 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz); 1759 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1760 } 1761 1762 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs, 1763 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1764 { 1765 unsigned ovece = vece + 1; 1766 tcg_gen_gvec_andi(ovece, dofs, aofs, 1767 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz); 1768 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz); 1769 } 1770 1771 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs, 1772 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1773 { 1774 unsigned ovece = vece + 1; 1775 unsigned ibits = vece == MO_8 ? 8 : 16; 1776 if (shift == 0) { 1777 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz); 1778 } else { 1779 tcg_gen_gvec_andi(ovece, dofs, aofs, 1780 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1781 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1782 } 1783 } 1784 1785 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs, 1786 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1787 { 1788 unsigned ovece = vece + 1; 1789 unsigned ibits = vece == MO_8 ? 8 : 16; 1790 if (shift == 0) { 1791 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz); 1792 } else { 1793 tcg_gen_gvec_andi(ovece, dofs, aofs, 1794 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1795 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1796 } 1797 } 1798 1799 DO_VSHLL(VSHLL_BS, vshllbs) 1800 DO_VSHLL(VSHLL_BU, vshllbu) 1801 DO_VSHLL(VSHLL_TS, vshllts) 1802 DO_VSHLL(VSHLL_TU, vshlltu) 1803 1804 #define DO_2SHIFT_N(INSN, FN) \ 1805 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1806 { \ 1807 static MVEGenTwoOpShiftFn * const fns[] = { \ 1808 gen_helper_mve_##FN##b, \ 1809 gen_helper_mve_##FN##h, \ 1810 }; \ 1811 return do_2shift(s, a, fns[a->size], false); \ 1812 } 1813 1814 DO_2SHIFT_N(VSHRNB, vshrnb) 1815 DO_2SHIFT_N(VSHRNT, vshrnt) 1816 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1817 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1818 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1819 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1820 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1821 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1822 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1823 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1824 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1825 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1826 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1827 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1828 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1829 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1830 1831 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1832 { 1833 /* 1834 * Whole Vector Left Shift with Carry. The carry is taken 1835 * from a general purpose register and written back there. 1836 * An imm of 0 means "shift by 32". 1837 */ 1838 TCGv_ptr qd; 1839 TCGv_i32 rdm; 1840 1841 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1842 return false; 1843 } 1844 if (a->rdm == 13 || a->rdm == 15) { 1845 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1846 return false; 1847 } 1848 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1849 return true; 1850 } 1851 1852 qd = mve_qreg_ptr(a->qd); 1853 rdm = load_reg(s, a->rdm); 1854 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1855 store_reg(s, a->rdm, rdm); 1856 tcg_temp_free_ptr(qd); 1857 mve_update_eci(s); 1858 return true; 1859 } 1860 1861 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1862 { 1863 TCGv_ptr qd; 1864 TCGv_i32 rn; 1865 1866 /* 1867 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1868 * This fills the vector with elements of successively increasing 1869 * or decreasing values, starting from Rn. 1870 */ 1871 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1872 return false; 1873 } 1874 if (a->size == MO_64) { 1875 /* size 0b11 is another encoding */ 1876 return false; 1877 } 1878 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1879 return true; 1880 } 1881 1882 qd = mve_qreg_ptr(a->qd); 1883 rn = load_reg(s, a->rn); 1884 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1885 store_reg(s, a->rn, rn); 1886 tcg_temp_free_ptr(qd); 1887 mve_update_eci(s); 1888 return true; 1889 } 1890 1891 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1892 { 1893 TCGv_ptr qd; 1894 TCGv_i32 rn, rm; 1895 1896 /* 1897 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1898 * This fills the vector with elements of successively increasing 1899 * or decreasing values, starting from Rn. Rm specifies a point where 1900 * the count wraps back around to 0. The updated offset is written back 1901 * to Rn. 1902 */ 1903 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1904 return false; 1905 } 1906 if (!fn || a->rm == 13 || a->rm == 15) { 1907 /* 1908 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1909 * Rm == 13 is VIWDUP, VDWDUP. 1910 */ 1911 return false; 1912 } 1913 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1914 return true; 1915 } 1916 1917 qd = mve_qreg_ptr(a->qd); 1918 rn = load_reg(s, a->rn); 1919 rm = load_reg(s, a->rm); 1920 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1921 store_reg(s, a->rn, rn); 1922 tcg_temp_free_ptr(qd); 1923 tcg_temp_free_i32(rm); 1924 mve_update_eci(s); 1925 return true; 1926 } 1927 1928 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1929 { 1930 static MVEGenVIDUPFn * const fns[] = { 1931 gen_helper_mve_vidupb, 1932 gen_helper_mve_viduph, 1933 gen_helper_mve_vidupw, 1934 NULL, 1935 }; 1936 return do_vidup(s, a, fns[a->size]); 1937 } 1938 1939 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1940 { 1941 static MVEGenVIDUPFn * const fns[] = { 1942 gen_helper_mve_vidupb, 1943 gen_helper_mve_viduph, 1944 gen_helper_mve_vidupw, 1945 NULL, 1946 }; 1947 /* VDDUP is just like VIDUP but with a negative immediate */ 1948 a->imm = -a->imm; 1949 return do_vidup(s, a, fns[a->size]); 1950 } 1951 1952 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1953 { 1954 static MVEGenVIWDUPFn * const fns[] = { 1955 gen_helper_mve_viwdupb, 1956 gen_helper_mve_viwduph, 1957 gen_helper_mve_viwdupw, 1958 NULL, 1959 }; 1960 return do_viwdup(s, a, fns[a->size]); 1961 } 1962 1963 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1964 { 1965 static MVEGenVIWDUPFn * const fns[] = { 1966 gen_helper_mve_vdwdupb, 1967 gen_helper_mve_vdwduph, 1968 gen_helper_mve_vdwdupw, 1969 NULL, 1970 }; 1971 return do_viwdup(s, a, fns[a->size]); 1972 } 1973 1974 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1975 { 1976 TCGv_ptr qn, qm; 1977 1978 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1979 !fn) { 1980 return false; 1981 } 1982 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1983 return true; 1984 } 1985 1986 qn = mve_qreg_ptr(a->qn); 1987 qm = mve_qreg_ptr(a->qm); 1988 fn(cpu_env, qn, qm); 1989 tcg_temp_free_ptr(qn); 1990 tcg_temp_free_ptr(qm); 1991 if (a->mask) { 1992 /* VPT */ 1993 gen_vpst(s, a->mask); 1994 } 1995 /* This insn updates predication bits */ 1996 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1997 mve_update_eci(s); 1998 return true; 1999 } 2000 2001 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 2002 MVEGenScalarCmpFn *fn) 2003 { 2004 TCGv_ptr qn; 2005 TCGv_i32 rm; 2006 2007 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 2008 return false; 2009 } 2010 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2011 return true; 2012 } 2013 2014 qn = mve_qreg_ptr(a->qn); 2015 if (a->rm == 15) { 2016 /* Encoding Rm=0b1111 means "constant zero" */ 2017 rm = tcg_constant_i32(0); 2018 } else { 2019 rm = load_reg(s, a->rm); 2020 } 2021 fn(cpu_env, qn, rm); 2022 tcg_temp_free_ptr(qn); 2023 tcg_temp_free_i32(rm); 2024 if (a->mask) { 2025 /* VPT */ 2026 gen_vpst(s, a->mask); 2027 } 2028 /* This insn updates predication bits */ 2029 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2030 mve_update_eci(s); 2031 return true; 2032 } 2033 2034 #define DO_VCMP(INSN, FN) \ 2035 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2036 { \ 2037 static MVEGenCmpFn * const fns[] = { \ 2038 gen_helper_mve_##FN##b, \ 2039 gen_helper_mve_##FN##h, \ 2040 gen_helper_mve_##FN##w, \ 2041 NULL, \ 2042 }; \ 2043 return do_vcmp(s, a, fns[a->size]); \ 2044 } \ 2045 static bool trans_##INSN##_scalar(DisasContext *s, \ 2046 arg_vcmp_scalar *a) \ 2047 { \ 2048 static MVEGenScalarCmpFn * const fns[] = { \ 2049 gen_helper_mve_##FN##_scalarb, \ 2050 gen_helper_mve_##FN##_scalarh, \ 2051 gen_helper_mve_##FN##_scalarw, \ 2052 NULL, \ 2053 }; \ 2054 return do_vcmp_scalar(s, a, fns[a->size]); \ 2055 } 2056 2057 DO_VCMP(VCMPEQ, vcmpeq) 2058 DO_VCMP(VCMPNE, vcmpne) 2059 DO_VCMP(VCMPCS, vcmpcs) 2060 DO_VCMP(VCMPHI, vcmphi) 2061 DO_VCMP(VCMPGE, vcmpge) 2062 DO_VCMP(VCMPLT, vcmplt) 2063 DO_VCMP(VCMPGT, vcmpgt) 2064 DO_VCMP(VCMPLE, vcmple) 2065 2066 #define DO_VCMP_FP(INSN, FN) \ 2067 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2068 { \ 2069 static MVEGenCmpFn * const fns[] = { \ 2070 NULL, \ 2071 gen_helper_mve_##FN##h, \ 2072 gen_helper_mve_##FN##s, \ 2073 NULL, \ 2074 }; \ 2075 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2076 return false; \ 2077 } \ 2078 return do_vcmp(s, a, fns[a->size]); \ 2079 } \ 2080 static bool trans_##INSN##_scalar(DisasContext *s, \ 2081 arg_vcmp_scalar *a) \ 2082 { \ 2083 static MVEGenScalarCmpFn * const fns[] = { \ 2084 NULL, \ 2085 gen_helper_mve_##FN##_scalarh, \ 2086 gen_helper_mve_##FN##_scalars, \ 2087 NULL, \ 2088 }; \ 2089 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2090 return false; \ 2091 } \ 2092 return do_vcmp_scalar(s, a, fns[a->size]); \ 2093 } 2094 2095 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 2096 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 2097 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 2098 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 2099 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 2100 DO_VCMP_FP(VCMPLE_fp, vfcmple) 2101 2102 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 2103 { 2104 /* 2105 * MIN/MAX operations across a vector: compute the min or 2106 * max of the initial value in a general purpose register 2107 * and all the elements in the vector, and store it back 2108 * into the general purpose register. 2109 */ 2110 TCGv_ptr qm; 2111 TCGv_i32 rda; 2112 2113 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2114 !fn || a->rda == 13 || a->rda == 15) { 2115 /* Rda cases are UNPREDICTABLE */ 2116 return false; 2117 } 2118 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2119 return true; 2120 } 2121 2122 qm = mve_qreg_ptr(a->qm); 2123 rda = load_reg(s, a->rda); 2124 fn(rda, cpu_env, qm, rda); 2125 store_reg(s, a->rda, rda); 2126 tcg_temp_free_ptr(qm); 2127 mve_update_eci(s); 2128 return true; 2129 } 2130 2131 #define DO_VMAXV(INSN, FN) \ 2132 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2133 { \ 2134 static MVEGenVADDVFn * const fns[] = { \ 2135 gen_helper_mve_##FN##b, \ 2136 gen_helper_mve_##FN##h, \ 2137 gen_helper_mve_##FN##w, \ 2138 NULL, \ 2139 }; \ 2140 return do_vmaxv(s, a, fns[a->size]); \ 2141 } 2142 2143 DO_VMAXV(VMAXV_S, vmaxvs) 2144 DO_VMAXV(VMAXV_U, vmaxvu) 2145 DO_VMAXV(VMAXAV, vmaxav) 2146 DO_VMAXV(VMINV_S, vminvs) 2147 DO_VMAXV(VMINV_U, vminvu) 2148 DO_VMAXV(VMINAV, vminav) 2149 2150 #define DO_VMAXV_FP(INSN, FN) \ 2151 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2152 { \ 2153 static MVEGenVADDVFn * const fns[] = { \ 2154 NULL, \ 2155 gen_helper_mve_##FN##h, \ 2156 gen_helper_mve_##FN##s, \ 2157 NULL, \ 2158 }; \ 2159 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2160 return false; \ 2161 } \ 2162 return do_vmaxv(s, a, fns[a->size]); \ 2163 } 2164 2165 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2166 DO_VMAXV_FP(VMINNMV, vminnmv) 2167 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2168 DO_VMAXV_FP(VMINNMAV, vminnmav) 2169 2170 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2171 { 2172 /* Absolute difference accumulated across vector */ 2173 TCGv_ptr qn, qm; 2174 TCGv_i32 rda; 2175 2176 if (!dc_isar_feature(aa32_mve, s) || 2177 !mve_check_qreg_bank(s, a->qm | a->qn) || 2178 !fn || a->rda == 13 || a->rda == 15) { 2179 /* Rda cases are UNPREDICTABLE */ 2180 return false; 2181 } 2182 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2183 return true; 2184 } 2185 2186 qm = mve_qreg_ptr(a->qm); 2187 qn = mve_qreg_ptr(a->qn); 2188 rda = load_reg(s, a->rda); 2189 fn(rda, cpu_env, qn, qm, rda); 2190 store_reg(s, a->rda, rda); 2191 tcg_temp_free_ptr(qm); 2192 tcg_temp_free_ptr(qn); 2193 mve_update_eci(s); 2194 return true; 2195 } 2196 2197 #define DO_VABAV(INSN, FN) \ 2198 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2199 { \ 2200 static MVEGenVABAVFn * const fns[] = { \ 2201 gen_helper_mve_##FN##b, \ 2202 gen_helper_mve_##FN##h, \ 2203 gen_helper_mve_##FN##w, \ 2204 NULL, \ 2205 }; \ 2206 return do_vabav(s, a, fns[a->size]); \ 2207 } 2208 2209 DO_VABAV(VABAV_S, vabavs) 2210 DO_VABAV(VABAV_U, vabavu) 2211 2212 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2213 { 2214 /* 2215 * VMOV two 32-bit vector lanes to two general-purpose registers. 2216 * This insn is not predicated but it is subject to beat-wise 2217 * execution if it is not in an IT block. For us this means 2218 * only that if PSR.ECI says we should not be executing the beat 2219 * corresponding to the lane of the vector register being accessed 2220 * then we should skip perfoming the move, and that we need to do 2221 * the usual check for bad ECI state and advance of ECI state. 2222 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2223 */ 2224 TCGv_i32 tmp; 2225 int vd; 2226 2227 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2228 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2229 a->rt == a->rt2) { 2230 /* Rt/Rt2 cases are UNPREDICTABLE */ 2231 return false; 2232 } 2233 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2234 return true; 2235 } 2236 2237 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2238 vd = a->qd * 2; 2239 2240 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2241 tmp = tcg_temp_new_i32(); 2242 read_neon_element32(tmp, vd, a->idx, MO_32); 2243 store_reg(s, a->rt, tmp); 2244 } 2245 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2246 tmp = tcg_temp_new_i32(); 2247 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2248 store_reg(s, a->rt2, tmp); 2249 } 2250 2251 mve_update_and_store_eci(s); 2252 return true; 2253 } 2254 2255 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2256 { 2257 /* 2258 * VMOV two general-purpose registers to two 32-bit vector lanes. 2259 * This insn is not predicated but it is subject to beat-wise 2260 * execution if it is not in an IT block. For us this means 2261 * only that if PSR.ECI says we should not be executing the beat 2262 * corresponding to the lane of the vector register being accessed 2263 * then we should skip perfoming the move, and that we need to do 2264 * the usual check for bad ECI state and advance of ECI state. 2265 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2266 */ 2267 TCGv_i32 tmp; 2268 int vd; 2269 2270 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2271 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2272 /* Rt/Rt2 cases are UNPREDICTABLE */ 2273 return false; 2274 } 2275 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2276 return true; 2277 } 2278 2279 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2280 vd = a->qd * 2; 2281 2282 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2283 tmp = load_reg(s, a->rt); 2284 write_neon_element32(tmp, vd, a->idx, MO_32); 2285 tcg_temp_free_i32(tmp); 2286 } 2287 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2288 tmp = load_reg(s, a->rt2); 2289 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2290 tcg_temp_free_i32(tmp); 2291 } 2292 2293 mve_update_and_store_eci(s); 2294 return true; 2295 } 2296