1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 53 54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 55 static inline long mve_qreg_offset(unsigned reg) 56 { 57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 58 } 59 60 static TCGv_ptr mve_qreg_ptr(unsigned reg) 61 { 62 TCGv_ptr ret = tcg_temp_new_ptr(); 63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 64 return ret; 65 } 66 67 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 68 { 69 /* 70 * Check whether Qregs are in range. For v8.1M only Q0..Q7 71 * are supported, see VFPSmallRegisterBank(). 72 */ 73 return qmask < 8; 74 } 75 76 bool mve_eci_check(DisasContext *s) 77 { 78 /* 79 * This is a beatwise insn: check that ECI is valid (not a 80 * reserved value) and note that we are handling it. 81 * Return true if OK, false if we generated an exception. 82 */ 83 s->eci_handled = true; 84 switch (s->eci) { 85 case ECI_NONE: 86 case ECI_A0: 87 case ECI_A0A1: 88 case ECI_A0A1A2: 89 case ECI_A0A1A2B0: 90 return true; 91 default: 92 /* Reserved value: INVSTATE UsageFault */ 93 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 94 default_exception_el(s)); 95 return false; 96 } 97 } 98 99 void mve_update_eci(DisasContext *s) 100 { 101 /* 102 * The helper function will always update the CPUState field, 103 * so we only need to update the DisasContext field. 104 */ 105 if (s->eci) { 106 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 107 } 108 } 109 110 void mve_update_and_store_eci(DisasContext *s) 111 { 112 /* 113 * For insns which don't call a helper function that will call 114 * mve_advance_vpt(), this version updates s->eci and also stores 115 * it out to the CPUState field. 116 */ 117 if (s->eci) { 118 mve_update_eci(s); 119 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 120 } 121 } 122 123 static bool mve_skip_first_beat(DisasContext *s) 124 { 125 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 126 switch (s->eci) { 127 case ECI_NONE: 128 return false; 129 case ECI_A0: 130 case ECI_A0A1: 131 case ECI_A0A1A2: 132 case ECI_A0A1A2B0: 133 return true; 134 default: 135 g_assert_not_reached(); 136 } 137 } 138 139 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 140 unsigned msize) 141 { 142 TCGv_i32 addr; 143 uint32_t offset; 144 TCGv_ptr qreg; 145 146 if (!dc_isar_feature(aa32_mve, s) || 147 !mve_check_qreg_bank(s, a->qd) || 148 !fn) { 149 return false; 150 } 151 152 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 153 if (a->rn == 15 || (a->rn == 13 && a->w)) { 154 return false; 155 } 156 157 if (!mve_eci_check(s) || !vfp_access_check(s)) { 158 return true; 159 } 160 161 offset = a->imm << msize; 162 if (!a->a) { 163 offset = -offset; 164 } 165 addr = load_reg(s, a->rn); 166 if (a->p) { 167 tcg_gen_addi_i32(addr, addr, offset); 168 } 169 170 qreg = mve_qreg_ptr(a->qd); 171 fn(cpu_env, qreg, addr); 172 tcg_temp_free_ptr(qreg); 173 174 /* 175 * Writeback always happens after the last beat of the insn, 176 * regardless of predication 177 */ 178 if (a->w) { 179 if (!a->p) { 180 tcg_gen_addi_i32(addr, addr, offset); 181 } 182 store_reg(s, a->rn, addr); 183 } else { 184 tcg_temp_free_i32(addr); 185 } 186 mve_update_eci(s); 187 return true; 188 } 189 190 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 191 { 192 static MVEGenLdStFn * const ldstfns[4][2] = { 193 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 194 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 195 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 196 { NULL, NULL } 197 }; 198 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 199 } 200 201 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 202 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 203 { \ 204 static MVEGenLdStFn * const ldstfns[2][2] = { \ 205 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 206 { NULL, gen_helper_mve_##ULD }, \ 207 }; \ 208 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 209 } 210 211 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 212 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 213 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 214 215 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 216 { 217 TCGv_i32 addr; 218 TCGv_ptr qd, qm; 219 220 if (!dc_isar_feature(aa32_mve, s) || 221 !mve_check_qreg_bank(s, a->qd | a->qm) || 222 !fn || a->rn == 15) { 223 /* Rn case is UNPREDICTABLE */ 224 return false; 225 } 226 227 if (!mve_eci_check(s) || !vfp_access_check(s)) { 228 return true; 229 } 230 231 addr = load_reg(s, a->rn); 232 233 qd = mve_qreg_ptr(a->qd); 234 qm = mve_qreg_ptr(a->qm); 235 fn(cpu_env, qd, qm, addr); 236 tcg_temp_free_ptr(qd); 237 tcg_temp_free_ptr(qm); 238 tcg_temp_free_i32(addr); 239 mve_update_eci(s); 240 return true; 241 } 242 243 /* 244 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 245 * signextended to halfword elements in register". _os_ indicates that 246 * the offsets in Qm should be scaled by the element size. 247 */ 248 /* This macro is just to make the arrays more compact in these functions */ 249 #define F(N) gen_helper_mve_##N 250 251 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 252 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 253 { 254 static MVEGenLdStSGFn * const fns[2][4][4] = { { 255 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 256 { NULL, NULL, F(vldrh_sg_sw), NULL }, 257 { NULL, NULL, NULL, NULL }, 258 { NULL, NULL, NULL, NULL } 259 }, { 260 { NULL, NULL, NULL, NULL }, 261 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 262 { NULL, NULL, NULL, NULL }, 263 { NULL, NULL, NULL, NULL } 264 } 265 }; 266 if (a->qd == a->qm) { 267 return false; /* UNPREDICTABLE */ 268 } 269 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 270 } 271 272 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 273 { 274 static MVEGenLdStSGFn * const fns[2][4][4] = { { 275 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 276 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 277 { NULL, NULL, F(vldrw_sg_uw), NULL }, 278 { NULL, NULL, NULL, F(vldrd_sg_ud) } 279 }, { 280 { NULL, NULL, NULL, NULL }, 281 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 282 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 283 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 284 } 285 }; 286 if (a->qd == a->qm) { 287 return false; /* UNPREDICTABLE */ 288 } 289 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 290 } 291 292 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 293 { 294 static MVEGenLdStSGFn * const fns[2][4][4] = { { 295 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 296 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 297 { NULL, NULL, F(vstrw_sg_uw), NULL }, 298 { NULL, NULL, NULL, F(vstrd_sg_ud) } 299 }, { 300 { NULL, NULL, NULL, NULL }, 301 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 302 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 303 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 304 } 305 }; 306 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 307 } 308 309 #undef F 310 311 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 312 MVEGenLdStSGFn *fn, unsigned msize) 313 { 314 uint32_t offset; 315 TCGv_ptr qd, qm; 316 317 if (!dc_isar_feature(aa32_mve, s) || 318 !mve_check_qreg_bank(s, a->qd | a->qm) || 319 !fn) { 320 return false; 321 } 322 323 if (!mve_eci_check(s) || !vfp_access_check(s)) { 324 return true; 325 } 326 327 offset = a->imm << msize; 328 if (!a->a) { 329 offset = -offset; 330 } 331 332 qd = mve_qreg_ptr(a->qd); 333 qm = mve_qreg_ptr(a->qm); 334 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 335 tcg_temp_free_ptr(qd); 336 tcg_temp_free_ptr(qm); 337 mve_update_eci(s); 338 return true; 339 } 340 341 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 342 { 343 static MVEGenLdStSGFn * const fns[] = { 344 gen_helper_mve_vldrw_sg_uw, 345 gen_helper_mve_vldrw_sg_wb_uw, 346 }; 347 if (a->qd == a->qm) { 348 return false; /* UNPREDICTABLE */ 349 } 350 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 351 } 352 353 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 354 { 355 static MVEGenLdStSGFn * const fns[] = { 356 gen_helper_mve_vldrd_sg_ud, 357 gen_helper_mve_vldrd_sg_wb_ud, 358 }; 359 if (a->qd == a->qm) { 360 return false; /* UNPREDICTABLE */ 361 } 362 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 363 } 364 365 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 366 { 367 static MVEGenLdStSGFn * const fns[] = { 368 gen_helper_mve_vstrw_sg_uw, 369 gen_helper_mve_vstrw_sg_wb_uw, 370 }; 371 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 372 } 373 374 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 375 { 376 static MVEGenLdStSGFn * const fns[] = { 377 gen_helper_mve_vstrd_sg_ud, 378 gen_helper_mve_vstrd_sg_wb_ud, 379 }; 380 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 381 } 382 383 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 384 int addrinc) 385 { 386 TCGv_i32 rn; 387 388 if (!dc_isar_feature(aa32_mve, s) || 389 !mve_check_qreg_bank(s, a->qd) || 390 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 391 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 392 return false; 393 } 394 if (!mve_eci_check(s) || !vfp_access_check(s)) { 395 return true; 396 } 397 398 rn = load_reg(s, a->rn); 399 /* 400 * We pass the index of Qd, not a pointer, because the helper must 401 * access multiple Q registers starting at Qd and working up. 402 */ 403 fn(cpu_env, tcg_constant_i32(a->qd), rn); 404 405 if (a->w) { 406 tcg_gen_addi_i32(rn, rn, addrinc); 407 store_reg(s, a->rn, rn); 408 } else { 409 tcg_temp_free_i32(rn); 410 } 411 mve_update_and_store_eci(s); 412 return true; 413 } 414 415 /* This macro is just to make the arrays more compact in these functions */ 416 #define F(N) gen_helper_mve_##N 417 418 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 419 { 420 static MVEGenLdStIlFn * const fns[4][4] = { 421 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 422 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 423 { NULL, NULL, NULL, NULL }, 424 { NULL, NULL, NULL, NULL }, 425 }; 426 if (a->qd > 6) { 427 return false; 428 } 429 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 430 } 431 432 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 433 { 434 static MVEGenLdStIlFn * const fns[4][4] = { 435 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 436 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 437 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 438 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 439 }; 440 if (a->qd > 4) { 441 return false; 442 } 443 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 444 } 445 446 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 447 { 448 static MVEGenLdStIlFn * const fns[4][4] = { 449 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 450 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 451 { NULL, NULL, NULL, NULL }, 452 { NULL, NULL, NULL, NULL }, 453 }; 454 if (a->qd > 6) { 455 return false; 456 } 457 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 458 } 459 460 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 461 { 462 static MVEGenLdStIlFn * const fns[4][4] = { 463 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 464 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 465 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 466 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 467 }; 468 if (a->qd > 4) { 469 return false; 470 } 471 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 472 } 473 474 #undef F 475 476 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 477 { 478 TCGv_ptr qd; 479 TCGv_i32 rt; 480 481 if (!dc_isar_feature(aa32_mve, s) || 482 !mve_check_qreg_bank(s, a->qd)) { 483 return false; 484 } 485 if (a->rt == 13 || a->rt == 15) { 486 /* UNPREDICTABLE; we choose to UNDEF */ 487 return false; 488 } 489 if (!mve_eci_check(s) || !vfp_access_check(s)) { 490 return true; 491 } 492 493 qd = mve_qreg_ptr(a->qd); 494 rt = load_reg(s, a->rt); 495 tcg_gen_dup_i32(a->size, rt, rt); 496 gen_helper_mve_vdup(cpu_env, qd, rt); 497 tcg_temp_free_ptr(qd); 498 tcg_temp_free_i32(rt); 499 mve_update_eci(s); 500 return true; 501 } 502 503 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 504 { 505 TCGv_ptr qd, qm; 506 507 if (!dc_isar_feature(aa32_mve, s) || 508 !mve_check_qreg_bank(s, a->qd | a->qm) || 509 !fn) { 510 return false; 511 } 512 513 if (!mve_eci_check(s) || !vfp_access_check(s)) { 514 return true; 515 } 516 517 qd = mve_qreg_ptr(a->qd); 518 qm = mve_qreg_ptr(a->qm); 519 fn(cpu_env, qd, qm); 520 tcg_temp_free_ptr(qd); 521 tcg_temp_free_ptr(qm); 522 mve_update_eci(s); 523 return true; 524 } 525 526 #define DO_1OP(INSN, FN) \ 527 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 528 { \ 529 static MVEGenOneOpFn * const fns[] = { \ 530 gen_helper_mve_##FN##b, \ 531 gen_helper_mve_##FN##h, \ 532 gen_helper_mve_##FN##w, \ 533 NULL, \ 534 }; \ 535 return do_1op(s, a, fns[a->size]); \ 536 } 537 538 DO_1OP(VCLZ, vclz) 539 DO_1OP(VCLS, vcls) 540 DO_1OP(VABS, vabs) 541 DO_1OP(VNEG, vneg) 542 DO_1OP(VQABS, vqabs) 543 DO_1OP(VQNEG, vqneg) 544 DO_1OP(VMAXA, vmaxa) 545 DO_1OP(VMINA, vmina) 546 547 /* 548 * For simple float/int conversions we use the fixed-point 549 * conversion helpers with a zero shift count 550 */ 551 #define DO_VCVT(INSN, HFN, SFN) \ 552 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 553 { \ 554 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 555 } \ 556 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 557 { \ 558 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 559 } \ 560 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 561 { \ 562 static MVEGenOneOpFn * const fns[] = { \ 563 NULL, \ 564 gen_##INSN##h, \ 565 gen_##INSN##s, \ 566 NULL, \ 567 }; \ 568 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 569 return false; \ 570 } \ 571 return do_1op(s, a, fns[a->size]); \ 572 } 573 574 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 575 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 576 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 577 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 578 579 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 580 enum arm_fprounding rmode, bool u) 581 { 582 /* 583 * Handle VCVT fp to int with specified rounding mode. 584 * This is a 1op fn but we must pass the rounding mode as 585 * an immediate to the helper. 586 */ 587 TCGv_ptr qd, qm; 588 static MVEGenVCVTRmodeFn * const fns[4][2] = { 589 { NULL, NULL }, 590 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 591 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 592 { NULL, NULL }, 593 }; 594 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 595 596 if (!dc_isar_feature(aa32_mve_fp, s) || 597 !mve_check_qreg_bank(s, a->qd | a->qm) || 598 !fn) { 599 return false; 600 } 601 602 if (!mve_eci_check(s) || !vfp_access_check(s)) { 603 return true; 604 } 605 606 qd = mve_qreg_ptr(a->qd); 607 qm = mve_qreg_ptr(a->qm); 608 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 609 tcg_temp_free_ptr(qd); 610 tcg_temp_free_ptr(qm); 611 mve_update_eci(s); 612 return true; 613 } 614 615 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 616 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 617 { \ 618 return do_vcvt_rmode(s, a, RMODE, U); \ 619 } \ 620 621 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 622 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 623 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 624 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 625 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 626 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 627 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 628 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 629 630 #define DO_VCVT_SH(INSN, FN) \ 631 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 632 { \ 633 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 634 return false; \ 635 } \ 636 return do_1op(s, a, gen_helper_mve_##FN); \ 637 } \ 638 639 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 640 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 641 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 642 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 643 644 #define DO_VRINT(INSN, RMODE) \ 645 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 646 { \ 647 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 648 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 649 } \ 650 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 651 { \ 652 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 653 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 654 } \ 655 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 656 { \ 657 static MVEGenOneOpFn * const fns[] = { \ 658 NULL, \ 659 gen_##INSN##h, \ 660 gen_##INSN##s, \ 661 NULL, \ 662 }; \ 663 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 664 return false; \ 665 } \ 666 return do_1op(s, a, fns[a->size]); \ 667 } 668 669 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 670 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 671 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 672 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 673 DO_VRINT(VRINTP, FPROUNDING_POSINF) 674 675 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 676 { 677 static MVEGenOneOpFn * const fns[] = { 678 NULL, 679 gen_helper_mve_vrintx_h, 680 gen_helper_mve_vrintx_s, 681 NULL, 682 }; 683 if (!dc_isar_feature(aa32_mve_fp, s)) { 684 return false; 685 } 686 return do_1op(s, a, fns[a->size]); 687 } 688 689 /* Narrowing moves: only size 0 and 1 are valid */ 690 #define DO_VMOVN(INSN, FN) \ 691 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 692 { \ 693 static MVEGenOneOpFn * const fns[] = { \ 694 gen_helper_mve_##FN##b, \ 695 gen_helper_mve_##FN##h, \ 696 NULL, \ 697 NULL, \ 698 }; \ 699 return do_1op(s, a, fns[a->size]); \ 700 } 701 702 DO_VMOVN(VMOVNB, vmovnb) 703 DO_VMOVN(VMOVNT, vmovnt) 704 DO_VMOVN(VQMOVUNB, vqmovunb) 705 DO_VMOVN(VQMOVUNT, vqmovunt) 706 DO_VMOVN(VQMOVN_BS, vqmovnbs) 707 DO_VMOVN(VQMOVN_TS, vqmovnts) 708 DO_VMOVN(VQMOVN_BU, vqmovnbu) 709 DO_VMOVN(VQMOVN_TU, vqmovntu) 710 711 static bool trans_VREV16(DisasContext *s, arg_1op *a) 712 { 713 static MVEGenOneOpFn * const fns[] = { 714 gen_helper_mve_vrev16b, 715 NULL, 716 NULL, 717 NULL, 718 }; 719 return do_1op(s, a, fns[a->size]); 720 } 721 722 static bool trans_VREV32(DisasContext *s, arg_1op *a) 723 { 724 static MVEGenOneOpFn * const fns[] = { 725 gen_helper_mve_vrev32b, 726 gen_helper_mve_vrev32h, 727 NULL, 728 NULL, 729 }; 730 return do_1op(s, a, fns[a->size]); 731 } 732 733 static bool trans_VREV64(DisasContext *s, arg_1op *a) 734 { 735 static MVEGenOneOpFn * const fns[] = { 736 gen_helper_mve_vrev64b, 737 gen_helper_mve_vrev64h, 738 gen_helper_mve_vrev64w, 739 NULL, 740 }; 741 return do_1op(s, a, fns[a->size]); 742 } 743 744 static bool trans_VMVN(DisasContext *s, arg_1op *a) 745 { 746 return do_1op(s, a, gen_helper_mve_vmvn); 747 } 748 749 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 750 { 751 static MVEGenOneOpFn * const fns[] = { 752 NULL, 753 gen_helper_mve_vfabsh, 754 gen_helper_mve_vfabss, 755 NULL, 756 }; 757 if (!dc_isar_feature(aa32_mve_fp, s)) { 758 return false; 759 } 760 return do_1op(s, a, fns[a->size]); 761 } 762 763 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 764 { 765 static MVEGenOneOpFn * const fns[] = { 766 NULL, 767 gen_helper_mve_vfnegh, 768 gen_helper_mve_vfnegs, 769 NULL, 770 }; 771 if (!dc_isar_feature(aa32_mve_fp, s)) { 772 return false; 773 } 774 return do_1op(s, a, fns[a->size]); 775 } 776 777 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 778 { 779 TCGv_ptr qd, qn, qm; 780 781 if (!dc_isar_feature(aa32_mve, s) || 782 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 783 !fn) { 784 return false; 785 } 786 if (!mve_eci_check(s) || !vfp_access_check(s)) { 787 return true; 788 } 789 790 qd = mve_qreg_ptr(a->qd); 791 qn = mve_qreg_ptr(a->qn); 792 qm = mve_qreg_ptr(a->qm); 793 fn(cpu_env, qd, qn, qm); 794 tcg_temp_free_ptr(qd); 795 tcg_temp_free_ptr(qn); 796 tcg_temp_free_ptr(qm); 797 mve_update_eci(s); 798 return true; 799 } 800 801 #define DO_LOGIC(INSN, HELPER) \ 802 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 803 { \ 804 return do_2op(s, a, HELPER); \ 805 } 806 807 DO_LOGIC(VAND, gen_helper_mve_vand) 808 DO_LOGIC(VBIC, gen_helper_mve_vbic) 809 DO_LOGIC(VORR, gen_helper_mve_vorr) 810 DO_LOGIC(VORN, gen_helper_mve_vorn) 811 DO_LOGIC(VEOR, gen_helper_mve_veor) 812 813 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 814 815 #define DO_2OP(INSN, FN) \ 816 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 817 { \ 818 static MVEGenTwoOpFn * const fns[] = { \ 819 gen_helper_mve_##FN##b, \ 820 gen_helper_mve_##FN##h, \ 821 gen_helper_mve_##FN##w, \ 822 NULL, \ 823 }; \ 824 return do_2op(s, a, fns[a->size]); \ 825 } 826 827 DO_2OP(VADD, vadd) 828 DO_2OP(VSUB, vsub) 829 DO_2OP(VMUL, vmul) 830 DO_2OP(VMULH_S, vmulhs) 831 DO_2OP(VMULH_U, vmulhu) 832 DO_2OP(VRMULH_S, vrmulhs) 833 DO_2OP(VRMULH_U, vrmulhu) 834 DO_2OP(VMAX_S, vmaxs) 835 DO_2OP(VMAX_U, vmaxu) 836 DO_2OP(VMIN_S, vmins) 837 DO_2OP(VMIN_U, vminu) 838 DO_2OP(VABD_S, vabds) 839 DO_2OP(VABD_U, vabdu) 840 DO_2OP(VHADD_S, vhadds) 841 DO_2OP(VHADD_U, vhaddu) 842 DO_2OP(VHSUB_S, vhsubs) 843 DO_2OP(VHSUB_U, vhsubu) 844 DO_2OP(VMULL_BS, vmullbs) 845 DO_2OP(VMULL_BU, vmullbu) 846 DO_2OP(VMULL_TS, vmullts) 847 DO_2OP(VMULL_TU, vmulltu) 848 DO_2OP(VQDMULH, vqdmulh) 849 DO_2OP(VQRDMULH, vqrdmulh) 850 DO_2OP(VQADD_S, vqadds) 851 DO_2OP(VQADD_U, vqaddu) 852 DO_2OP(VQSUB_S, vqsubs) 853 DO_2OP(VQSUB_U, vqsubu) 854 DO_2OP(VSHL_S, vshls) 855 DO_2OP(VSHL_U, vshlu) 856 DO_2OP(VRSHL_S, vrshls) 857 DO_2OP(VRSHL_U, vrshlu) 858 DO_2OP(VQSHL_S, vqshls) 859 DO_2OP(VQSHL_U, vqshlu) 860 DO_2OP(VQRSHL_S, vqrshls) 861 DO_2OP(VQRSHL_U, vqrshlu) 862 DO_2OP(VQDMLADH, vqdmladh) 863 DO_2OP(VQDMLADHX, vqdmladhx) 864 DO_2OP(VQRDMLADH, vqrdmladh) 865 DO_2OP(VQRDMLADHX, vqrdmladhx) 866 DO_2OP(VQDMLSDH, vqdmlsdh) 867 DO_2OP(VQDMLSDHX, vqdmlsdhx) 868 DO_2OP(VQRDMLSDH, vqrdmlsdh) 869 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 870 DO_2OP(VRHADD_S, vrhadds) 871 DO_2OP(VRHADD_U, vrhaddu) 872 /* 873 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 874 * so we can reuse the DO_2OP macro. (Our implementation calculates the 875 * "expected" results in this case.) Similarly for VHCADD. 876 */ 877 DO_2OP(VCADD90, vcadd90) 878 DO_2OP(VCADD270, vcadd270) 879 DO_2OP(VHCADD90, vhcadd90) 880 DO_2OP(VHCADD270, vhcadd270) 881 882 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 883 { 884 static MVEGenTwoOpFn * const fns[] = { 885 NULL, 886 gen_helper_mve_vqdmullbh, 887 gen_helper_mve_vqdmullbw, 888 NULL, 889 }; 890 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 891 /* UNPREDICTABLE; we choose to undef */ 892 return false; 893 } 894 return do_2op(s, a, fns[a->size]); 895 } 896 897 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 898 { 899 static MVEGenTwoOpFn * const fns[] = { 900 NULL, 901 gen_helper_mve_vqdmullth, 902 gen_helper_mve_vqdmulltw, 903 NULL, 904 }; 905 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 906 /* UNPREDICTABLE; we choose to undef */ 907 return false; 908 } 909 return do_2op(s, a, fns[a->size]); 910 } 911 912 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 913 { 914 /* 915 * Note that a->size indicates the output size, ie VMULL.P8 916 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 917 * is the 16x16->32 operation and a->size is MO_32. 918 */ 919 static MVEGenTwoOpFn * const fns[] = { 920 NULL, 921 gen_helper_mve_vmullpbh, 922 gen_helper_mve_vmullpbw, 923 NULL, 924 }; 925 return do_2op(s, a, fns[a->size]); 926 } 927 928 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 929 { 930 /* a->size is as for trans_VMULLP_B */ 931 static MVEGenTwoOpFn * const fns[] = { 932 NULL, 933 gen_helper_mve_vmullpth, 934 gen_helper_mve_vmullptw, 935 NULL, 936 }; 937 return do_2op(s, a, fns[a->size]); 938 } 939 940 /* 941 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 942 * of the 32-bit elements in each lane of the input vectors, where the 943 * carry-out of each add is the carry-in of the next. The initial carry 944 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 945 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 946 * These insns are subject to beat-wise execution. Partial execution 947 * of an I=1 (initial carry input fixed) insn which does not 948 * execute the first beat must start with the current FPSCR.NZCV 949 * value, not the fixed constant input. 950 */ 951 static bool trans_VADC(DisasContext *s, arg_2op *a) 952 { 953 return do_2op(s, a, gen_helper_mve_vadc); 954 } 955 956 static bool trans_VADCI(DisasContext *s, arg_2op *a) 957 { 958 if (mve_skip_first_beat(s)) { 959 return trans_VADC(s, a); 960 } 961 return do_2op(s, a, gen_helper_mve_vadci); 962 } 963 964 static bool trans_VSBC(DisasContext *s, arg_2op *a) 965 { 966 return do_2op(s, a, gen_helper_mve_vsbc); 967 } 968 969 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 970 { 971 if (mve_skip_first_beat(s)) { 972 return trans_VSBC(s, a); 973 } 974 return do_2op(s, a, gen_helper_mve_vsbci); 975 } 976 977 #define DO_2OP_FP(INSN, FN) \ 978 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 979 { \ 980 static MVEGenTwoOpFn * const fns[] = { \ 981 NULL, \ 982 gen_helper_mve_##FN##h, \ 983 gen_helper_mve_##FN##s, \ 984 NULL, \ 985 }; \ 986 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 987 return false; \ 988 } \ 989 return do_2op(s, a, fns[a->size]); \ 990 } 991 992 DO_2OP_FP(VADD_fp, vfadd) 993 DO_2OP_FP(VSUB_fp, vfsub) 994 DO_2OP_FP(VMUL_fp, vfmul) 995 DO_2OP_FP(VABD_fp, vfabd) 996 DO_2OP_FP(VMAXNM, vmaxnm) 997 DO_2OP_FP(VMINNM, vminnm) 998 DO_2OP_FP(VCADD90_fp, vfcadd90) 999 DO_2OP_FP(VCADD270_fp, vfcadd270) 1000 DO_2OP_FP(VFMA, vfma) 1001 DO_2OP_FP(VFMS, vfms) 1002 DO_2OP_FP(VCMUL0, vcmul0) 1003 DO_2OP_FP(VCMUL90, vcmul90) 1004 DO_2OP_FP(VCMUL180, vcmul180) 1005 DO_2OP_FP(VCMUL270, vcmul270) 1006 DO_2OP_FP(VCMLA0, vcmla0) 1007 DO_2OP_FP(VCMLA90, vcmla90) 1008 DO_2OP_FP(VCMLA180, vcmla180) 1009 DO_2OP_FP(VCMLA270, vcmla270) 1010 DO_2OP_FP(VMAXNMA, vmaxnma) 1011 DO_2OP_FP(VMINNMA, vminnma) 1012 1013 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1014 MVEGenTwoOpScalarFn fn) 1015 { 1016 TCGv_ptr qd, qn; 1017 TCGv_i32 rm; 1018 1019 if (!dc_isar_feature(aa32_mve, s) || 1020 !mve_check_qreg_bank(s, a->qd | a->qn) || 1021 !fn) { 1022 return false; 1023 } 1024 if (a->rm == 13 || a->rm == 15) { 1025 /* UNPREDICTABLE */ 1026 return false; 1027 } 1028 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1029 return true; 1030 } 1031 1032 qd = mve_qreg_ptr(a->qd); 1033 qn = mve_qreg_ptr(a->qn); 1034 rm = load_reg(s, a->rm); 1035 fn(cpu_env, qd, qn, rm); 1036 tcg_temp_free_i32(rm); 1037 tcg_temp_free_ptr(qd); 1038 tcg_temp_free_ptr(qn); 1039 mve_update_eci(s); 1040 return true; 1041 } 1042 1043 #define DO_2OP_SCALAR(INSN, FN) \ 1044 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1045 { \ 1046 static MVEGenTwoOpScalarFn * const fns[] = { \ 1047 gen_helper_mve_##FN##b, \ 1048 gen_helper_mve_##FN##h, \ 1049 gen_helper_mve_##FN##w, \ 1050 NULL, \ 1051 }; \ 1052 return do_2op_scalar(s, a, fns[a->size]); \ 1053 } 1054 1055 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1056 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1057 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1058 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1059 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1060 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1061 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1062 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1063 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1064 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1065 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1066 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1067 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1068 DO_2OP_SCALAR(VBRSR, vbrsr) 1069 DO_2OP_SCALAR(VMLA, vmla) 1070 DO_2OP_SCALAR(VMLAS, vmlas) 1071 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1072 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1073 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1074 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1075 1076 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1077 { 1078 static MVEGenTwoOpScalarFn * const fns[] = { 1079 NULL, 1080 gen_helper_mve_vqdmullb_scalarh, 1081 gen_helper_mve_vqdmullb_scalarw, 1082 NULL, 1083 }; 1084 if (a->qd == a->qn && a->size == MO_32) { 1085 /* UNPREDICTABLE; we choose to undef */ 1086 return false; 1087 } 1088 return do_2op_scalar(s, a, fns[a->size]); 1089 } 1090 1091 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1092 { 1093 static MVEGenTwoOpScalarFn * const fns[] = { 1094 NULL, 1095 gen_helper_mve_vqdmullt_scalarh, 1096 gen_helper_mve_vqdmullt_scalarw, 1097 NULL, 1098 }; 1099 if (a->qd == a->qn && a->size == MO_32) { 1100 /* UNPREDICTABLE; we choose to undef */ 1101 return false; 1102 } 1103 return do_2op_scalar(s, a, fns[a->size]); 1104 } 1105 1106 1107 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1108 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1109 { \ 1110 static MVEGenTwoOpScalarFn * const fns[] = { \ 1111 NULL, \ 1112 gen_helper_mve_##FN##h, \ 1113 gen_helper_mve_##FN##s, \ 1114 NULL, \ 1115 }; \ 1116 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1117 return false; \ 1118 } \ 1119 return do_2op_scalar(s, a, fns[a->size]); \ 1120 } 1121 1122 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1123 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1124 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1125 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1126 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1127 1128 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1129 MVEGenLongDualAccOpFn *fn) 1130 { 1131 TCGv_ptr qn, qm; 1132 TCGv_i64 rda; 1133 TCGv_i32 rdalo, rdahi; 1134 1135 if (!dc_isar_feature(aa32_mve, s) || 1136 !mve_check_qreg_bank(s, a->qn | a->qm) || 1137 !fn) { 1138 return false; 1139 } 1140 /* 1141 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1142 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1143 */ 1144 if (a->rdahi == 13 || a->rdahi == 15) { 1145 return false; 1146 } 1147 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1148 return true; 1149 } 1150 1151 qn = mve_qreg_ptr(a->qn); 1152 qm = mve_qreg_ptr(a->qm); 1153 1154 /* 1155 * This insn is subject to beat-wise execution. Partial execution 1156 * of an A=0 (no-accumulate) insn which does not execute the first 1157 * beat must start with the current rda value, not 0. 1158 */ 1159 if (a->a || mve_skip_first_beat(s)) { 1160 rda = tcg_temp_new_i64(); 1161 rdalo = load_reg(s, a->rdalo); 1162 rdahi = load_reg(s, a->rdahi); 1163 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1164 tcg_temp_free_i32(rdalo); 1165 tcg_temp_free_i32(rdahi); 1166 } else { 1167 rda = tcg_const_i64(0); 1168 } 1169 1170 fn(rda, cpu_env, qn, qm, rda); 1171 tcg_temp_free_ptr(qn); 1172 tcg_temp_free_ptr(qm); 1173 1174 rdalo = tcg_temp_new_i32(); 1175 rdahi = tcg_temp_new_i32(); 1176 tcg_gen_extrl_i64_i32(rdalo, rda); 1177 tcg_gen_extrh_i64_i32(rdahi, rda); 1178 store_reg(s, a->rdalo, rdalo); 1179 store_reg(s, a->rdahi, rdahi); 1180 tcg_temp_free_i64(rda); 1181 mve_update_eci(s); 1182 return true; 1183 } 1184 1185 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1186 { 1187 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1188 { NULL, NULL }, 1189 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1190 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1191 { NULL, NULL }, 1192 }; 1193 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1194 } 1195 1196 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1197 { 1198 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1199 { NULL, NULL }, 1200 { gen_helper_mve_vmlaldavuh, NULL }, 1201 { gen_helper_mve_vmlaldavuw, NULL }, 1202 { NULL, NULL }, 1203 }; 1204 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1205 } 1206 1207 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1208 { 1209 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1210 { NULL, NULL }, 1211 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1212 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1213 { NULL, NULL }, 1214 }; 1215 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1216 } 1217 1218 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1219 { 1220 static MVEGenLongDualAccOpFn * const fns[] = { 1221 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1222 }; 1223 return do_long_dual_acc(s, a, fns[a->x]); 1224 } 1225 1226 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1227 { 1228 static MVEGenLongDualAccOpFn * const fns[] = { 1229 gen_helper_mve_vrmlaldavhuw, NULL, 1230 }; 1231 return do_long_dual_acc(s, a, fns[a->x]); 1232 } 1233 1234 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1235 { 1236 static MVEGenLongDualAccOpFn * const fns[] = { 1237 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1238 }; 1239 return do_long_dual_acc(s, a, fns[a->x]); 1240 } 1241 1242 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1243 { 1244 TCGv_ptr qn, qm; 1245 TCGv_i32 rda; 1246 1247 if (!dc_isar_feature(aa32_mve, s) || 1248 !mve_check_qreg_bank(s, a->qn) || 1249 !fn) { 1250 return false; 1251 } 1252 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1253 return true; 1254 } 1255 1256 qn = mve_qreg_ptr(a->qn); 1257 qm = mve_qreg_ptr(a->qm); 1258 1259 /* 1260 * This insn is subject to beat-wise execution. Partial execution 1261 * of an A=0 (no-accumulate) insn which does not execute the first 1262 * beat must start with the current rda value, not 0. 1263 */ 1264 if (a->a || mve_skip_first_beat(s)) { 1265 rda = load_reg(s, a->rda); 1266 } else { 1267 rda = tcg_const_i32(0); 1268 } 1269 1270 fn(rda, cpu_env, qn, qm, rda); 1271 store_reg(s, a->rda, rda); 1272 tcg_temp_free_ptr(qn); 1273 tcg_temp_free_ptr(qm); 1274 1275 mve_update_eci(s); 1276 return true; 1277 } 1278 1279 #define DO_DUAL_ACC(INSN, FN) \ 1280 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1281 { \ 1282 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1283 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1284 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1285 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1286 { NULL, NULL }, \ 1287 }; \ 1288 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1289 } 1290 1291 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1292 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1293 1294 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1295 { 1296 static MVEGenDualAccOpFn * const fns[4][2] = { 1297 { gen_helper_mve_vmladavub, NULL }, 1298 { gen_helper_mve_vmladavuh, NULL }, 1299 { gen_helper_mve_vmladavuw, NULL }, 1300 { NULL, NULL }, 1301 }; 1302 return do_dual_acc(s, a, fns[a->size][a->x]); 1303 } 1304 1305 static void gen_vpst(DisasContext *s, uint32_t mask) 1306 { 1307 /* 1308 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1309 * being adjacent fields in the register. 1310 * 1311 * Updating the masks is not predicated, but it is subject to beat-wise 1312 * execution, and the mask is updated on the odd-numbered beats. 1313 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1314 * 01 mask field. 1315 */ 1316 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1317 switch (s->eci) { 1318 case ECI_NONE: 1319 case ECI_A0: 1320 /* Update both 01 and 23 fields */ 1321 tcg_gen_deposit_i32(vpr, vpr, 1322 tcg_constant_i32(mask | (mask << 4)), 1323 R_V7M_VPR_MASK01_SHIFT, 1324 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1325 break; 1326 case ECI_A0A1: 1327 case ECI_A0A1A2: 1328 case ECI_A0A1A2B0: 1329 /* Update only the 23 mask field */ 1330 tcg_gen_deposit_i32(vpr, vpr, 1331 tcg_constant_i32(mask), 1332 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1333 break; 1334 default: 1335 g_assert_not_reached(); 1336 } 1337 store_cpu_field(vpr, v7m.vpr); 1338 } 1339 1340 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1341 { 1342 /* mask == 0 is a "related encoding" */ 1343 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1344 return false; 1345 } 1346 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1347 return true; 1348 } 1349 gen_vpst(s, a->mask); 1350 mve_update_and_store_eci(s); 1351 return true; 1352 } 1353 1354 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1355 { 1356 /* 1357 * Invert the predicate in VPR.P0. We have call out to 1358 * a helper because this insn itself is beatwise and can 1359 * be predicated. 1360 */ 1361 if (!dc_isar_feature(aa32_mve, s)) { 1362 return false; 1363 } 1364 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1365 return true; 1366 } 1367 1368 gen_helper_mve_vpnot(cpu_env); 1369 mve_update_eci(s); 1370 return true; 1371 } 1372 1373 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1374 { 1375 /* VADDV: vector add across vector */ 1376 static MVEGenVADDVFn * const fns[4][2] = { 1377 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1378 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1379 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1380 { NULL, NULL } 1381 }; 1382 TCGv_ptr qm; 1383 TCGv_i32 rda; 1384 1385 if (!dc_isar_feature(aa32_mve, s) || 1386 a->size == 3) { 1387 return false; 1388 } 1389 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1390 return true; 1391 } 1392 1393 /* 1394 * This insn is subject to beat-wise execution. Partial execution 1395 * of an A=0 (no-accumulate) insn which does not execute the first 1396 * beat must start with the current value of Rda, not zero. 1397 */ 1398 if (a->a || mve_skip_first_beat(s)) { 1399 /* Accumulate input from Rda */ 1400 rda = load_reg(s, a->rda); 1401 } else { 1402 /* Accumulate starting at zero */ 1403 rda = tcg_const_i32(0); 1404 } 1405 1406 qm = mve_qreg_ptr(a->qm); 1407 fns[a->size][a->u](rda, cpu_env, qm, rda); 1408 store_reg(s, a->rda, rda); 1409 tcg_temp_free_ptr(qm); 1410 1411 mve_update_eci(s); 1412 return true; 1413 } 1414 1415 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1416 { 1417 /* 1418 * Vector Add Long Across Vector: accumulate the 32-bit 1419 * elements of the vector into a 64-bit result stored in 1420 * a pair of general-purpose registers. 1421 * No need to check Qm's bank: it is only 3 bits in decode. 1422 */ 1423 TCGv_ptr qm; 1424 TCGv_i64 rda; 1425 TCGv_i32 rdalo, rdahi; 1426 1427 if (!dc_isar_feature(aa32_mve, s)) { 1428 return false; 1429 } 1430 /* 1431 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1432 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1433 */ 1434 if (a->rdahi == 13 || a->rdahi == 15) { 1435 return false; 1436 } 1437 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1438 return true; 1439 } 1440 1441 /* 1442 * This insn is subject to beat-wise execution. Partial execution 1443 * of an A=0 (no-accumulate) insn which does not execute the first 1444 * beat must start with the current value of RdaHi:RdaLo, not zero. 1445 */ 1446 if (a->a || mve_skip_first_beat(s)) { 1447 /* Accumulate input from RdaHi:RdaLo */ 1448 rda = tcg_temp_new_i64(); 1449 rdalo = load_reg(s, a->rdalo); 1450 rdahi = load_reg(s, a->rdahi); 1451 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1452 tcg_temp_free_i32(rdalo); 1453 tcg_temp_free_i32(rdahi); 1454 } else { 1455 /* Accumulate starting at zero */ 1456 rda = tcg_const_i64(0); 1457 } 1458 1459 qm = mve_qreg_ptr(a->qm); 1460 if (a->u) { 1461 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1462 } else { 1463 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1464 } 1465 tcg_temp_free_ptr(qm); 1466 1467 rdalo = tcg_temp_new_i32(); 1468 rdahi = tcg_temp_new_i32(); 1469 tcg_gen_extrl_i64_i32(rdalo, rda); 1470 tcg_gen_extrh_i64_i32(rdahi, rda); 1471 store_reg(s, a->rdalo, rdalo); 1472 store_reg(s, a->rdahi, rdahi); 1473 tcg_temp_free_i64(rda); 1474 mve_update_eci(s); 1475 return true; 1476 } 1477 1478 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1479 { 1480 TCGv_ptr qd; 1481 uint64_t imm; 1482 1483 if (!dc_isar_feature(aa32_mve, s) || 1484 !mve_check_qreg_bank(s, a->qd) || 1485 !fn) { 1486 return false; 1487 } 1488 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1489 return true; 1490 } 1491 1492 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1493 1494 qd = mve_qreg_ptr(a->qd); 1495 fn(cpu_env, qd, tcg_constant_i64(imm)); 1496 tcg_temp_free_ptr(qd); 1497 mve_update_eci(s); 1498 return true; 1499 } 1500 1501 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1502 { 1503 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1504 MVEGenOneOpImmFn *fn; 1505 1506 if ((a->cmode & 1) && a->cmode < 12) { 1507 if (a->op) { 1508 /* 1509 * For op=1, the immediate will be inverted by asimd_imm_const(), 1510 * so the VBIC becomes a logical AND operation. 1511 */ 1512 fn = gen_helper_mve_vandi; 1513 } else { 1514 fn = gen_helper_mve_vorri; 1515 } 1516 } else { 1517 /* There is one unallocated cmode/op combination in this space */ 1518 if (a->cmode == 15 && a->op == 1) { 1519 return false; 1520 } 1521 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1522 fn = gen_helper_mve_vmovi; 1523 } 1524 return do_1imm(s, a, fn); 1525 } 1526 1527 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1528 bool negateshift) 1529 { 1530 TCGv_ptr qd, qm; 1531 int shift = a->shift; 1532 1533 if (!dc_isar_feature(aa32_mve, s) || 1534 !mve_check_qreg_bank(s, a->qd | a->qm) || 1535 !fn) { 1536 return false; 1537 } 1538 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1539 return true; 1540 } 1541 1542 /* 1543 * When we handle a right shift insn using a left-shift helper 1544 * which permits a negative shift count to indicate a right-shift, 1545 * we must negate the shift count. 1546 */ 1547 if (negateshift) { 1548 shift = -shift; 1549 } 1550 1551 qd = mve_qreg_ptr(a->qd); 1552 qm = mve_qreg_ptr(a->qm); 1553 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1554 tcg_temp_free_ptr(qd); 1555 tcg_temp_free_ptr(qm); 1556 mve_update_eci(s); 1557 return true; 1558 } 1559 1560 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1561 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1562 { \ 1563 static MVEGenTwoOpShiftFn * const fns[] = { \ 1564 gen_helper_mve_##FN##b, \ 1565 gen_helper_mve_##FN##h, \ 1566 gen_helper_mve_##FN##w, \ 1567 NULL, \ 1568 }; \ 1569 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1570 } 1571 1572 DO_2SHIFT(VSHLI, vshli_u, false) 1573 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1574 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1575 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1576 /* These right shifts use a left-shift helper with negated shift count */ 1577 DO_2SHIFT(VSHRI_S, vshli_s, true) 1578 DO_2SHIFT(VSHRI_U, vshli_u, true) 1579 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1580 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1581 1582 DO_2SHIFT(VSRI, vsri, false) 1583 DO_2SHIFT(VSLI, vsli, false) 1584 1585 #define DO_2SHIFT_FP(INSN, FN) \ 1586 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1587 { \ 1588 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1589 return false; \ 1590 } \ 1591 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1592 } 1593 1594 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1595 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1596 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1597 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1598 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1599 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1600 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1601 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1602 1603 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1604 MVEGenTwoOpShiftFn *fn) 1605 { 1606 TCGv_ptr qda; 1607 TCGv_i32 rm; 1608 1609 if (!dc_isar_feature(aa32_mve, s) || 1610 !mve_check_qreg_bank(s, a->qda) || 1611 a->rm == 13 || a->rm == 15 || !fn) { 1612 /* Rm cases are UNPREDICTABLE */ 1613 return false; 1614 } 1615 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1616 return true; 1617 } 1618 1619 qda = mve_qreg_ptr(a->qda); 1620 rm = load_reg(s, a->rm); 1621 fn(cpu_env, qda, qda, rm); 1622 tcg_temp_free_ptr(qda); 1623 tcg_temp_free_i32(rm); 1624 mve_update_eci(s); 1625 return true; 1626 } 1627 1628 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1629 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1630 { \ 1631 static MVEGenTwoOpShiftFn * const fns[] = { \ 1632 gen_helper_mve_##FN##b, \ 1633 gen_helper_mve_##FN##h, \ 1634 gen_helper_mve_##FN##w, \ 1635 NULL, \ 1636 }; \ 1637 return do_2shift_scalar(s, a, fns[a->size]); \ 1638 } 1639 1640 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1641 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1642 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1643 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1644 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1645 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1646 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1647 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1648 1649 #define DO_VSHLL(INSN, FN) \ 1650 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1651 { \ 1652 static MVEGenTwoOpShiftFn * const fns[] = { \ 1653 gen_helper_mve_##FN##b, \ 1654 gen_helper_mve_##FN##h, \ 1655 }; \ 1656 return do_2shift(s, a, fns[a->size], false); \ 1657 } 1658 1659 DO_VSHLL(VSHLL_BS, vshllbs) 1660 DO_VSHLL(VSHLL_BU, vshllbu) 1661 DO_VSHLL(VSHLL_TS, vshllts) 1662 DO_VSHLL(VSHLL_TU, vshlltu) 1663 1664 #define DO_2SHIFT_N(INSN, FN) \ 1665 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1666 { \ 1667 static MVEGenTwoOpShiftFn * const fns[] = { \ 1668 gen_helper_mve_##FN##b, \ 1669 gen_helper_mve_##FN##h, \ 1670 }; \ 1671 return do_2shift(s, a, fns[a->size], false); \ 1672 } 1673 1674 DO_2SHIFT_N(VSHRNB, vshrnb) 1675 DO_2SHIFT_N(VSHRNT, vshrnt) 1676 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1677 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1678 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1679 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1680 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1681 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1682 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1683 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1684 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1685 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1686 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1687 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1688 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1689 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1690 1691 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1692 { 1693 /* 1694 * Whole Vector Left Shift with Carry. The carry is taken 1695 * from a general purpose register and written back there. 1696 * An imm of 0 means "shift by 32". 1697 */ 1698 TCGv_ptr qd; 1699 TCGv_i32 rdm; 1700 1701 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1702 return false; 1703 } 1704 if (a->rdm == 13 || a->rdm == 15) { 1705 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1706 return false; 1707 } 1708 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1709 return true; 1710 } 1711 1712 qd = mve_qreg_ptr(a->qd); 1713 rdm = load_reg(s, a->rdm); 1714 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1715 store_reg(s, a->rdm, rdm); 1716 tcg_temp_free_ptr(qd); 1717 mve_update_eci(s); 1718 return true; 1719 } 1720 1721 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1722 { 1723 TCGv_ptr qd; 1724 TCGv_i32 rn; 1725 1726 /* 1727 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1728 * This fills the vector with elements of successively increasing 1729 * or decreasing values, starting from Rn. 1730 */ 1731 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1732 return false; 1733 } 1734 if (a->size == MO_64) { 1735 /* size 0b11 is another encoding */ 1736 return false; 1737 } 1738 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1739 return true; 1740 } 1741 1742 qd = mve_qreg_ptr(a->qd); 1743 rn = load_reg(s, a->rn); 1744 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1745 store_reg(s, a->rn, rn); 1746 tcg_temp_free_ptr(qd); 1747 mve_update_eci(s); 1748 return true; 1749 } 1750 1751 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1752 { 1753 TCGv_ptr qd; 1754 TCGv_i32 rn, rm; 1755 1756 /* 1757 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1758 * This fills the vector with elements of successively increasing 1759 * or decreasing values, starting from Rn. Rm specifies a point where 1760 * the count wraps back around to 0. The updated offset is written back 1761 * to Rn. 1762 */ 1763 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1764 return false; 1765 } 1766 if (!fn || a->rm == 13 || a->rm == 15) { 1767 /* 1768 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1769 * Rm == 13 is VIWDUP, VDWDUP. 1770 */ 1771 return false; 1772 } 1773 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1774 return true; 1775 } 1776 1777 qd = mve_qreg_ptr(a->qd); 1778 rn = load_reg(s, a->rn); 1779 rm = load_reg(s, a->rm); 1780 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1781 store_reg(s, a->rn, rn); 1782 tcg_temp_free_ptr(qd); 1783 tcg_temp_free_i32(rm); 1784 mve_update_eci(s); 1785 return true; 1786 } 1787 1788 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1789 { 1790 static MVEGenVIDUPFn * const fns[] = { 1791 gen_helper_mve_vidupb, 1792 gen_helper_mve_viduph, 1793 gen_helper_mve_vidupw, 1794 NULL, 1795 }; 1796 return do_vidup(s, a, fns[a->size]); 1797 } 1798 1799 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1800 { 1801 static MVEGenVIDUPFn * const fns[] = { 1802 gen_helper_mve_vidupb, 1803 gen_helper_mve_viduph, 1804 gen_helper_mve_vidupw, 1805 NULL, 1806 }; 1807 /* VDDUP is just like VIDUP but with a negative immediate */ 1808 a->imm = -a->imm; 1809 return do_vidup(s, a, fns[a->size]); 1810 } 1811 1812 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1813 { 1814 static MVEGenVIWDUPFn * const fns[] = { 1815 gen_helper_mve_viwdupb, 1816 gen_helper_mve_viwduph, 1817 gen_helper_mve_viwdupw, 1818 NULL, 1819 }; 1820 return do_viwdup(s, a, fns[a->size]); 1821 } 1822 1823 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1824 { 1825 static MVEGenVIWDUPFn * const fns[] = { 1826 gen_helper_mve_vdwdupb, 1827 gen_helper_mve_vdwduph, 1828 gen_helper_mve_vdwdupw, 1829 NULL, 1830 }; 1831 return do_viwdup(s, a, fns[a->size]); 1832 } 1833 1834 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1835 { 1836 TCGv_ptr qn, qm; 1837 1838 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1839 !fn) { 1840 return false; 1841 } 1842 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1843 return true; 1844 } 1845 1846 qn = mve_qreg_ptr(a->qn); 1847 qm = mve_qreg_ptr(a->qm); 1848 fn(cpu_env, qn, qm); 1849 tcg_temp_free_ptr(qn); 1850 tcg_temp_free_ptr(qm); 1851 if (a->mask) { 1852 /* VPT */ 1853 gen_vpst(s, a->mask); 1854 } 1855 mve_update_eci(s); 1856 return true; 1857 } 1858 1859 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1860 MVEGenScalarCmpFn *fn) 1861 { 1862 TCGv_ptr qn; 1863 TCGv_i32 rm; 1864 1865 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1866 return false; 1867 } 1868 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1869 return true; 1870 } 1871 1872 qn = mve_qreg_ptr(a->qn); 1873 if (a->rm == 15) { 1874 /* Encoding Rm=0b1111 means "constant zero" */ 1875 rm = tcg_constant_i32(0); 1876 } else { 1877 rm = load_reg(s, a->rm); 1878 } 1879 fn(cpu_env, qn, rm); 1880 tcg_temp_free_ptr(qn); 1881 tcg_temp_free_i32(rm); 1882 if (a->mask) { 1883 /* VPT */ 1884 gen_vpst(s, a->mask); 1885 } 1886 mve_update_eci(s); 1887 return true; 1888 } 1889 1890 #define DO_VCMP(INSN, FN) \ 1891 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1892 { \ 1893 static MVEGenCmpFn * const fns[] = { \ 1894 gen_helper_mve_##FN##b, \ 1895 gen_helper_mve_##FN##h, \ 1896 gen_helper_mve_##FN##w, \ 1897 NULL, \ 1898 }; \ 1899 return do_vcmp(s, a, fns[a->size]); \ 1900 } \ 1901 static bool trans_##INSN##_scalar(DisasContext *s, \ 1902 arg_vcmp_scalar *a) \ 1903 { \ 1904 static MVEGenScalarCmpFn * const fns[] = { \ 1905 gen_helper_mve_##FN##_scalarb, \ 1906 gen_helper_mve_##FN##_scalarh, \ 1907 gen_helper_mve_##FN##_scalarw, \ 1908 NULL, \ 1909 }; \ 1910 return do_vcmp_scalar(s, a, fns[a->size]); \ 1911 } 1912 1913 DO_VCMP(VCMPEQ, vcmpeq) 1914 DO_VCMP(VCMPNE, vcmpne) 1915 DO_VCMP(VCMPCS, vcmpcs) 1916 DO_VCMP(VCMPHI, vcmphi) 1917 DO_VCMP(VCMPGE, vcmpge) 1918 DO_VCMP(VCMPLT, vcmplt) 1919 DO_VCMP(VCMPGT, vcmpgt) 1920 DO_VCMP(VCMPLE, vcmple) 1921 1922 #define DO_VCMP_FP(INSN, FN) \ 1923 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1924 { \ 1925 static MVEGenCmpFn * const fns[] = { \ 1926 NULL, \ 1927 gen_helper_mve_##FN##h, \ 1928 gen_helper_mve_##FN##s, \ 1929 NULL, \ 1930 }; \ 1931 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1932 return false; \ 1933 } \ 1934 return do_vcmp(s, a, fns[a->size]); \ 1935 } \ 1936 static bool trans_##INSN##_scalar(DisasContext *s, \ 1937 arg_vcmp_scalar *a) \ 1938 { \ 1939 static MVEGenScalarCmpFn * const fns[] = { \ 1940 NULL, \ 1941 gen_helper_mve_##FN##_scalarh, \ 1942 gen_helper_mve_##FN##_scalars, \ 1943 NULL, \ 1944 }; \ 1945 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1946 return false; \ 1947 } \ 1948 return do_vcmp_scalar(s, a, fns[a->size]); \ 1949 } 1950 1951 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 1952 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 1953 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 1954 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 1955 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 1956 DO_VCMP_FP(VCMPLE_fp, vfcmple) 1957 1958 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1959 { 1960 /* 1961 * MIN/MAX operations across a vector: compute the min or 1962 * max of the initial value in a general purpose register 1963 * and all the elements in the vector, and store it back 1964 * into the general purpose register. 1965 */ 1966 TCGv_ptr qm; 1967 TCGv_i32 rda; 1968 1969 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1970 !fn || a->rda == 13 || a->rda == 15) { 1971 /* Rda cases are UNPREDICTABLE */ 1972 return false; 1973 } 1974 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1975 return true; 1976 } 1977 1978 qm = mve_qreg_ptr(a->qm); 1979 rda = load_reg(s, a->rda); 1980 fn(rda, cpu_env, qm, rda); 1981 store_reg(s, a->rda, rda); 1982 tcg_temp_free_ptr(qm); 1983 mve_update_eci(s); 1984 return true; 1985 } 1986 1987 #define DO_VMAXV(INSN, FN) \ 1988 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1989 { \ 1990 static MVEGenVADDVFn * const fns[] = { \ 1991 gen_helper_mve_##FN##b, \ 1992 gen_helper_mve_##FN##h, \ 1993 gen_helper_mve_##FN##w, \ 1994 NULL, \ 1995 }; \ 1996 return do_vmaxv(s, a, fns[a->size]); \ 1997 } 1998 1999 DO_VMAXV(VMAXV_S, vmaxvs) 2000 DO_VMAXV(VMAXV_U, vmaxvu) 2001 DO_VMAXV(VMAXAV, vmaxav) 2002 DO_VMAXV(VMINV_S, vminvs) 2003 DO_VMAXV(VMINV_U, vminvu) 2004 DO_VMAXV(VMINAV, vminav) 2005 2006 #define DO_VMAXV_FP(INSN, FN) \ 2007 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2008 { \ 2009 static MVEGenVADDVFn * const fns[] = { \ 2010 NULL, \ 2011 gen_helper_mve_##FN##h, \ 2012 gen_helper_mve_##FN##s, \ 2013 NULL, \ 2014 }; \ 2015 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2016 return false; \ 2017 } \ 2018 return do_vmaxv(s, a, fns[a->size]); \ 2019 } 2020 2021 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2022 DO_VMAXV_FP(VMINNMV, vminnmv) 2023 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2024 DO_VMAXV_FP(VMINNMAV, vminnmav) 2025 2026 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2027 { 2028 /* Absolute difference accumulated across vector */ 2029 TCGv_ptr qn, qm; 2030 TCGv_i32 rda; 2031 2032 if (!dc_isar_feature(aa32_mve, s) || 2033 !mve_check_qreg_bank(s, a->qm | a->qn) || 2034 !fn || a->rda == 13 || a->rda == 15) { 2035 /* Rda cases are UNPREDICTABLE */ 2036 return false; 2037 } 2038 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2039 return true; 2040 } 2041 2042 qm = mve_qreg_ptr(a->qm); 2043 qn = mve_qreg_ptr(a->qn); 2044 rda = load_reg(s, a->rda); 2045 fn(rda, cpu_env, qn, qm, rda); 2046 store_reg(s, a->rda, rda); 2047 tcg_temp_free_ptr(qm); 2048 tcg_temp_free_ptr(qn); 2049 mve_update_eci(s); 2050 return true; 2051 } 2052 2053 #define DO_VABAV(INSN, FN) \ 2054 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2055 { \ 2056 static MVEGenVABAVFn * const fns[] = { \ 2057 gen_helper_mve_##FN##b, \ 2058 gen_helper_mve_##FN##h, \ 2059 gen_helper_mve_##FN##w, \ 2060 NULL, \ 2061 }; \ 2062 return do_vabav(s, a, fns[a->size]); \ 2063 } 2064 2065 DO_VABAV(VABAV_S, vabavs) 2066 DO_VABAV(VABAV_U, vabavu) 2067 2068 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2069 { 2070 /* 2071 * VMOV two 32-bit vector lanes to two general-purpose registers. 2072 * This insn is not predicated but it is subject to beat-wise 2073 * execution if it is not in an IT block. For us this means 2074 * only that if PSR.ECI says we should not be executing the beat 2075 * corresponding to the lane of the vector register being accessed 2076 * then we should skip perfoming the move, and that we need to do 2077 * the usual check for bad ECI state and advance of ECI state. 2078 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2079 */ 2080 TCGv_i32 tmp; 2081 int vd; 2082 2083 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2084 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2085 a->rt == a->rt2) { 2086 /* Rt/Rt2 cases are UNPREDICTABLE */ 2087 return false; 2088 } 2089 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2090 return true; 2091 } 2092 2093 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2094 vd = a->qd * 2; 2095 2096 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2097 tmp = tcg_temp_new_i32(); 2098 read_neon_element32(tmp, vd, a->idx, MO_32); 2099 store_reg(s, a->rt, tmp); 2100 } 2101 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2102 tmp = tcg_temp_new_i32(); 2103 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2104 store_reg(s, a->rt2, tmp); 2105 } 2106 2107 mve_update_and_store_eci(s); 2108 return true; 2109 } 2110 2111 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2112 { 2113 /* 2114 * VMOV two general-purpose registers to two 32-bit vector lanes. 2115 * This insn is not predicated but it is subject to beat-wise 2116 * execution if it is not in an IT block. For us this means 2117 * only that if PSR.ECI says we should not be executing the beat 2118 * corresponding to the lane of the vector register being accessed 2119 * then we should skip perfoming the move, and that we need to do 2120 * the usual check for bad ECI state and advance of ECI state. 2121 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2122 */ 2123 TCGv_i32 tmp; 2124 int vd; 2125 2126 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2127 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2128 /* Rt/Rt2 cases are UNPREDICTABLE */ 2129 return false; 2130 } 2131 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2132 return true; 2133 } 2134 2135 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2136 vd = a->qd * 2; 2137 2138 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2139 tmp = load_reg(s, a->rt); 2140 write_neon_element32(tmp, vd, a->idx, MO_32); 2141 tcg_temp_free_i32(tmp); 2142 } 2143 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2144 tmp = load_reg(s, a->rt2); 2145 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2146 tcg_temp_free_i32(tmp); 2147 } 2148 2149 mve_update_and_store_eci(s); 2150 return true; 2151 } 2152