1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 50 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 51 static inline long mve_qreg_offset(unsigned reg) 52 { 53 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 54 } 55 56 static TCGv_ptr mve_qreg_ptr(unsigned reg) 57 { 58 TCGv_ptr ret = tcg_temp_new_ptr(); 59 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 60 return ret; 61 } 62 63 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 64 { 65 /* 66 * Check whether Qregs are in range. For v8.1M only Q0..Q7 67 * are supported, see VFPSmallRegisterBank(). 68 */ 69 return qmask < 8; 70 } 71 72 bool mve_eci_check(DisasContext *s) 73 { 74 /* 75 * This is a beatwise insn: check that ECI is valid (not a 76 * reserved value) and note that we are handling it. 77 * Return true if OK, false if we generated an exception. 78 */ 79 s->eci_handled = true; 80 switch (s->eci) { 81 case ECI_NONE: 82 case ECI_A0: 83 case ECI_A0A1: 84 case ECI_A0A1A2: 85 case ECI_A0A1A2B0: 86 return true; 87 default: 88 /* Reserved value: INVSTATE UsageFault */ 89 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 90 default_exception_el(s)); 91 return false; 92 } 93 } 94 95 static void mve_update_eci(DisasContext *s) 96 { 97 /* 98 * The helper function will always update the CPUState field, 99 * so we only need to update the DisasContext field. 100 */ 101 if (s->eci) { 102 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 103 } 104 } 105 106 void mve_update_and_store_eci(DisasContext *s) 107 { 108 /* 109 * For insns which don't call a helper function that will call 110 * mve_advance_vpt(), this version updates s->eci and also stores 111 * it out to the CPUState field. 112 */ 113 if (s->eci) { 114 mve_update_eci(s); 115 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 116 } 117 } 118 119 static bool mve_skip_first_beat(DisasContext *s) 120 { 121 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 122 switch (s->eci) { 123 case ECI_NONE: 124 return false; 125 case ECI_A0: 126 case ECI_A0A1: 127 case ECI_A0A1A2: 128 case ECI_A0A1A2B0: 129 return true; 130 default: 131 g_assert_not_reached(); 132 } 133 } 134 135 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 136 unsigned msize) 137 { 138 TCGv_i32 addr; 139 uint32_t offset; 140 TCGv_ptr qreg; 141 142 if (!dc_isar_feature(aa32_mve, s) || 143 !mve_check_qreg_bank(s, a->qd) || 144 !fn) { 145 return false; 146 } 147 148 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 149 if (a->rn == 15 || (a->rn == 13 && a->w)) { 150 return false; 151 } 152 153 if (!mve_eci_check(s) || !vfp_access_check(s)) { 154 return true; 155 } 156 157 offset = a->imm << msize; 158 if (!a->a) { 159 offset = -offset; 160 } 161 addr = load_reg(s, a->rn); 162 if (a->p) { 163 tcg_gen_addi_i32(addr, addr, offset); 164 } 165 166 qreg = mve_qreg_ptr(a->qd); 167 fn(cpu_env, qreg, addr); 168 tcg_temp_free_ptr(qreg); 169 170 /* 171 * Writeback always happens after the last beat of the insn, 172 * regardless of predication 173 */ 174 if (a->w) { 175 if (!a->p) { 176 tcg_gen_addi_i32(addr, addr, offset); 177 } 178 store_reg(s, a->rn, addr); 179 } else { 180 tcg_temp_free_i32(addr); 181 } 182 mve_update_eci(s); 183 return true; 184 } 185 186 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 187 { 188 static MVEGenLdStFn * const ldstfns[4][2] = { 189 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 190 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 191 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 192 { NULL, NULL } 193 }; 194 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 195 } 196 197 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 198 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 199 { \ 200 static MVEGenLdStFn * const ldstfns[2][2] = { \ 201 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 202 { NULL, gen_helper_mve_##ULD }, \ 203 }; \ 204 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 205 } 206 207 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 208 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 209 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 210 211 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 212 { 213 TCGv_ptr qd; 214 TCGv_i32 rt; 215 216 if (!dc_isar_feature(aa32_mve, s) || 217 !mve_check_qreg_bank(s, a->qd)) { 218 return false; 219 } 220 if (a->rt == 13 || a->rt == 15) { 221 /* UNPREDICTABLE; we choose to UNDEF */ 222 return false; 223 } 224 if (!mve_eci_check(s) || !vfp_access_check(s)) { 225 return true; 226 } 227 228 qd = mve_qreg_ptr(a->qd); 229 rt = load_reg(s, a->rt); 230 tcg_gen_dup_i32(a->size, rt, rt); 231 gen_helper_mve_vdup(cpu_env, qd, rt); 232 tcg_temp_free_ptr(qd); 233 tcg_temp_free_i32(rt); 234 mve_update_eci(s); 235 return true; 236 } 237 238 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 239 { 240 TCGv_ptr qd, qm; 241 242 if (!dc_isar_feature(aa32_mve, s) || 243 !mve_check_qreg_bank(s, a->qd | a->qm) || 244 !fn) { 245 return false; 246 } 247 248 if (!mve_eci_check(s) || !vfp_access_check(s)) { 249 return true; 250 } 251 252 qd = mve_qreg_ptr(a->qd); 253 qm = mve_qreg_ptr(a->qm); 254 fn(cpu_env, qd, qm); 255 tcg_temp_free_ptr(qd); 256 tcg_temp_free_ptr(qm); 257 mve_update_eci(s); 258 return true; 259 } 260 261 #define DO_1OP(INSN, FN) \ 262 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 263 { \ 264 static MVEGenOneOpFn * const fns[] = { \ 265 gen_helper_mve_##FN##b, \ 266 gen_helper_mve_##FN##h, \ 267 gen_helper_mve_##FN##w, \ 268 NULL, \ 269 }; \ 270 return do_1op(s, a, fns[a->size]); \ 271 } 272 273 DO_1OP(VCLZ, vclz) 274 DO_1OP(VCLS, vcls) 275 DO_1OP(VABS, vabs) 276 DO_1OP(VNEG, vneg) 277 278 /* Narrowing moves: only size 0 and 1 are valid */ 279 #define DO_VMOVN(INSN, FN) \ 280 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 281 { \ 282 static MVEGenOneOpFn * const fns[] = { \ 283 gen_helper_mve_##FN##b, \ 284 gen_helper_mve_##FN##h, \ 285 NULL, \ 286 NULL, \ 287 }; \ 288 return do_1op(s, a, fns[a->size]); \ 289 } 290 291 DO_VMOVN(VMOVNB, vmovnb) 292 DO_VMOVN(VMOVNT, vmovnt) 293 DO_VMOVN(VQMOVUNB, vqmovunb) 294 DO_VMOVN(VQMOVUNT, vqmovunt) 295 DO_VMOVN(VQMOVN_BS, vqmovnbs) 296 DO_VMOVN(VQMOVN_TS, vqmovnts) 297 DO_VMOVN(VQMOVN_BU, vqmovnbu) 298 DO_VMOVN(VQMOVN_TU, vqmovntu) 299 300 static bool trans_VREV16(DisasContext *s, arg_1op *a) 301 { 302 static MVEGenOneOpFn * const fns[] = { 303 gen_helper_mve_vrev16b, 304 NULL, 305 NULL, 306 NULL, 307 }; 308 return do_1op(s, a, fns[a->size]); 309 } 310 311 static bool trans_VREV32(DisasContext *s, arg_1op *a) 312 { 313 static MVEGenOneOpFn * const fns[] = { 314 gen_helper_mve_vrev32b, 315 gen_helper_mve_vrev32h, 316 NULL, 317 NULL, 318 }; 319 return do_1op(s, a, fns[a->size]); 320 } 321 322 static bool trans_VREV64(DisasContext *s, arg_1op *a) 323 { 324 static MVEGenOneOpFn * const fns[] = { 325 gen_helper_mve_vrev64b, 326 gen_helper_mve_vrev64h, 327 gen_helper_mve_vrev64w, 328 NULL, 329 }; 330 return do_1op(s, a, fns[a->size]); 331 } 332 333 static bool trans_VMVN(DisasContext *s, arg_1op *a) 334 { 335 return do_1op(s, a, gen_helper_mve_vmvn); 336 } 337 338 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 339 { 340 static MVEGenOneOpFn * const fns[] = { 341 NULL, 342 gen_helper_mve_vfabsh, 343 gen_helper_mve_vfabss, 344 NULL, 345 }; 346 if (!dc_isar_feature(aa32_mve_fp, s)) { 347 return false; 348 } 349 return do_1op(s, a, fns[a->size]); 350 } 351 352 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 353 { 354 static MVEGenOneOpFn * const fns[] = { 355 NULL, 356 gen_helper_mve_vfnegh, 357 gen_helper_mve_vfnegs, 358 NULL, 359 }; 360 if (!dc_isar_feature(aa32_mve_fp, s)) { 361 return false; 362 } 363 return do_1op(s, a, fns[a->size]); 364 } 365 366 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 367 { 368 TCGv_ptr qd, qn, qm; 369 370 if (!dc_isar_feature(aa32_mve, s) || 371 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 372 !fn) { 373 return false; 374 } 375 if (!mve_eci_check(s) || !vfp_access_check(s)) { 376 return true; 377 } 378 379 qd = mve_qreg_ptr(a->qd); 380 qn = mve_qreg_ptr(a->qn); 381 qm = mve_qreg_ptr(a->qm); 382 fn(cpu_env, qd, qn, qm); 383 tcg_temp_free_ptr(qd); 384 tcg_temp_free_ptr(qn); 385 tcg_temp_free_ptr(qm); 386 mve_update_eci(s); 387 return true; 388 } 389 390 #define DO_LOGIC(INSN, HELPER) \ 391 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 392 { \ 393 return do_2op(s, a, HELPER); \ 394 } 395 396 DO_LOGIC(VAND, gen_helper_mve_vand) 397 DO_LOGIC(VBIC, gen_helper_mve_vbic) 398 DO_LOGIC(VORR, gen_helper_mve_vorr) 399 DO_LOGIC(VORN, gen_helper_mve_vorn) 400 DO_LOGIC(VEOR, gen_helper_mve_veor) 401 402 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 403 404 #define DO_2OP(INSN, FN) \ 405 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 406 { \ 407 static MVEGenTwoOpFn * const fns[] = { \ 408 gen_helper_mve_##FN##b, \ 409 gen_helper_mve_##FN##h, \ 410 gen_helper_mve_##FN##w, \ 411 NULL, \ 412 }; \ 413 return do_2op(s, a, fns[a->size]); \ 414 } 415 416 DO_2OP(VADD, vadd) 417 DO_2OP(VSUB, vsub) 418 DO_2OP(VMUL, vmul) 419 DO_2OP(VMULH_S, vmulhs) 420 DO_2OP(VMULH_U, vmulhu) 421 DO_2OP(VRMULH_S, vrmulhs) 422 DO_2OP(VRMULH_U, vrmulhu) 423 DO_2OP(VMAX_S, vmaxs) 424 DO_2OP(VMAX_U, vmaxu) 425 DO_2OP(VMIN_S, vmins) 426 DO_2OP(VMIN_U, vminu) 427 DO_2OP(VABD_S, vabds) 428 DO_2OP(VABD_U, vabdu) 429 DO_2OP(VHADD_S, vhadds) 430 DO_2OP(VHADD_U, vhaddu) 431 DO_2OP(VHSUB_S, vhsubs) 432 DO_2OP(VHSUB_U, vhsubu) 433 DO_2OP(VMULL_BS, vmullbs) 434 DO_2OP(VMULL_BU, vmullbu) 435 DO_2OP(VMULL_TS, vmullts) 436 DO_2OP(VMULL_TU, vmulltu) 437 DO_2OP(VQDMULH, vqdmulh) 438 DO_2OP(VQRDMULH, vqrdmulh) 439 DO_2OP(VQADD_S, vqadds) 440 DO_2OP(VQADD_U, vqaddu) 441 DO_2OP(VQSUB_S, vqsubs) 442 DO_2OP(VQSUB_U, vqsubu) 443 DO_2OP(VSHL_S, vshls) 444 DO_2OP(VSHL_U, vshlu) 445 DO_2OP(VRSHL_S, vrshls) 446 DO_2OP(VRSHL_U, vrshlu) 447 DO_2OP(VQSHL_S, vqshls) 448 DO_2OP(VQSHL_U, vqshlu) 449 DO_2OP(VQRSHL_S, vqrshls) 450 DO_2OP(VQRSHL_U, vqrshlu) 451 DO_2OP(VQDMLADH, vqdmladh) 452 DO_2OP(VQDMLADHX, vqdmladhx) 453 DO_2OP(VQRDMLADH, vqrdmladh) 454 DO_2OP(VQRDMLADHX, vqrdmladhx) 455 DO_2OP(VQDMLSDH, vqdmlsdh) 456 DO_2OP(VQDMLSDHX, vqdmlsdhx) 457 DO_2OP(VQRDMLSDH, vqrdmlsdh) 458 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 459 DO_2OP(VRHADD_S, vrhadds) 460 DO_2OP(VRHADD_U, vrhaddu) 461 /* 462 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 463 * so we can reuse the DO_2OP macro. (Our implementation calculates the 464 * "expected" results in this case.) Similarly for VHCADD. 465 */ 466 DO_2OP(VCADD90, vcadd90) 467 DO_2OP(VCADD270, vcadd270) 468 DO_2OP(VHCADD90, vhcadd90) 469 DO_2OP(VHCADD270, vhcadd270) 470 471 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 472 { 473 static MVEGenTwoOpFn * const fns[] = { 474 NULL, 475 gen_helper_mve_vqdmullbh, 476 gen_helper_mve_vqdmullbw, 477 NULL, 478 }; 479 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 480 /* UNPREDICTABLE; we choose to undef */ 481 return false; 482 } 483 return do_2op(s, a, fns[a->size]); 484 } 485 486 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 487 { 488 static MVEGenTwoOpFn * const fns[] = { 489 NULL, 490 gen_helper_mve_vqdmullth, 491 gen_helper_mve_vqdmulltw, 492 NULL, 493 }; 494 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 495 /* UNPREDICTABLE; we choose to undef */ 496 return false; 497 } 498 return do_2op(s, a, fns[a->size]); 499 } 500 501 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 502 { 503 /* 504 * Note that a->size indicates the output size, ie VMULL.P8 505 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 506 * is the 16x16->32 operation and a->size is MO_32. 507 */ 508 static MVEGenTwoOpFn * const fns[] = { 509 NULL, 510 gen_helper_mve_vmullpbh, 511 gen_helper_mve_vmullpbw, 512 NULL, 513 }; 514 return do_2op(s, a, fns[a->size]); 515 } 516 517 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 518 { 519 /* a->size is as for trans_VMULLP_B */ 520 static MVEGenTwoOpFn * const fns[] = { 521 NULL, 522 gen_helper_mve_vmullpth, 523 gen_helper_mve_vmullptw, 524 NULL, 525 }; 526 return do_2op(s, a, fns[a->size]); 527 } 528 529 /* 530 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 531 * of the 32-bit elements in each lane of the input vectors, where the 532 * carry-out of each add is the carry-in of the next. The initial carry 533 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 534 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 535 * These insns are subject to beat-wise execution. Partial execution 536 * of an I=1 (initial carry input fixed) insn which does not 537 * execute the first beat must start with the current FPSCR.NZCV 538 * value, not the fixed constant input. 539 */ 540 static bool trans_VADC(DisasContext *s, arg_2op *a) 541 { 542 return do_2op(s, a, gen_helper_mve_vadc); 543 } 544 545 static bool trans_VADCI(DisasContext *s, arg_2op *a) 546 { 547 if (mve_skip_first_beat(s)) { 548 return trans_VADC(s, a); 549 } 550 return do_2op(s, a, gen_helper_mve_vadci); 551 } 552 553 static bool trans_VSBC(DisasContext *s, arg_2op *a) 554 { 555 return do_2op(s, a, gen_helper_mve_vsbc); 556 } 557 558 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 559 { 560 if (mve_skip_first_beat(s)) { 561 return trans_VSBC(s, a); 562 } 563 return do_2op(s, a, gen_helper_mve_vsbci); 564 } 565 566 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 567 MVEGenTwoOpScalarFn fn) 568 { 569 TCGv_ptr qd, qn; 570 TCGv_i32 rm; 571 572 if (!dc_isar_feature(aa32_mve, s) || 573 !mve_check_qreg_bank(s, a->qd | a->qn) || 574 !fn) { 575 return false; 576 } 577 if (a->rm == 13 || a->rm == 15) { 578 /* UNPREDICTABLE */ 579 return false; 580 } 581 if (!mve_eci_check(s) || !vfp_access_check(s)) { 582 return true; 583 } 584 585 qd = mve_qreg_ptr(a->qd); 586 qn = mve_qreg_ptr(a->qn); 587 rm = load_reg(s, a->rm); 588 fn(cpu_env, qd, qn, rm); 589 tcg_temp_free_i32(rm); 590 tcg_temp_free_ptr(qd); 591 tcg_temp_free_ptr(qn); 592 mve_update_eci(s); 593 return true; 594 } 595 596 #define DO_2OP_SCALAR(INSN, FN) \ 597 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 598 { \ 599 static MVEGenTwoOpScalarFn * const fns[] = { \ 600 gen_helper_mve_##FN##b, \ 601 gen_helper_mve_##FN##h, \ 602 gen_helper_mve_##FN##w, \ 603 NULL, \ 604 }; \ 605 return do_2op_scalar(s, a, fns[a->size]); \ 606 } 607 608 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 609 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 610 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 611 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 612 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 613 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 614 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 615 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 616 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 617 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 618 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 619 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 620 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 621 DO_2OP_SCALAR(VBRSR, vbrsr) 622 DO_2OP_SCALAR(VMLAS, vmlas) 623 624 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 625 { 626 static MVEGenTwoOpScalarFn * const fns[] = { 627 NULL, 628 gen_helper_mve_vqdmullb_scalarh, 629 gen_helper_mve_vqdmullb_scalarw, 630 NULL, 631 }; 632 if (a->qd == a->qn && a->size == MO_32) { 633 /* UNPREDICTABLE; we choose to undef */ 634 return false; 635 } 636 return do_2op_scalar(s, a, fns[a->size]); 637 } 638 639 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 640 { 641 static MVEGenTwoOpScalarFn * const fns[] = { 642 NULL, 643 gen_helper_mve_vqdmullt_scalarh, 644 gen_helper_mve_vqdmullt_scalarw, 645 NULL, 646 }; 647 if (a->qd == a->qn && a->size == MO_32) { 648 /* UNPREDICTABLE; we choose to undef */ 649 return false; 650 } 651 return do_2op_scalar(s, a, fns[a->size]); 652 } 653 654 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 655 MVEGenLongDualAccOpFn *fn) 656 { 657 TCGv_ptr qn, qm; 658 TCGv_i64 rda; 659 TCGv_i32 rdalo, rdahi; 660 661 if (!dc_isar_feature(aa32_mve, s) || 662 !mve_check_qreg_bank(s, a->qn | a->qm) || 663 !fn) { 664 return false; 665 } 666 /* 667 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 668 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 669 */ 670 if (a->rdahi == 13 || a->rdahi == 15) { 671 return false; 672 } 673 if (!mve_eci_check(s) || !vfp_access_check(s)) { 674 return true; 675 } 676 677 qn = mve_qreg_ptr(a->qn); 678 qm = mve_qreg_ptr(a->qm); 679 680 /* 681 * This insn is subject to beat-wise execution. Partial execution 682 * of an A=0 (no-accumulate) insn which does not execute the first 683 * beat must start with the current rda value, not 0. 684 */ 685 if (a->a || mve_skip_first_beat(s)) { 686 rda = tcg_temp_new_i64(); 687 rdalo = load_reg(s, a->rdalo); 688 rdahi = load_reg(s, a->rdahi); 689 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 690 tcg_temp_free_i32(rdalo); 691 tcg_temp_free_i32(rdahi); 692 } else { 693 rda = tcg_const_i64(0); 694 } 695 696 fn(rda, cpu_env, qn, qm, rda); 697 tcg_temp_free_ptr(qn); 698 tcg_temp_free_ptr(qm); 699 700 rdalo = tcg_temp_new_i32(); 701 rdahi = tcg_temp_new_i32(); 702 tcg_gen_extrl_i64_i32(rdalo, rda); 703 tcg_gen_extrh_i64_i32(rdahi, rda); 704 store_reg(s, a->rdalo, rdalo); 705 store_reg(s, a->rdahi, rdahi); 706 tcg_temp_free_i64(rda); 707 mve_update_eci(s); 708 return true; 709 } 710 711 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 712 { 713 static MVEGenLongDualAccOpFn * const fns[4][2] = { 714 { NULL, NULL }, 715 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 716 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 717 { NULL, NULL }, 718 }; 719 return do_long_dual_acc(s, a, fns[a->size][a->x]); 720 } 721 722 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 723 { 724 static MVEGenLongDualAccOpFn * const fns[4][2] = { 725 { NULL, NULL }, 726 { gen_helper_mve_vmlaldavuh, NULL }, 727 { gen_helper_mve_vmlaldavuw, NULL }, 728 { NULL, NULL }, 729 }; 730 return do_long_dual_acc(s, a, fns[a->size][a->x]); 731 } 732 733 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 734 { 735 static MVEGenLongDualAccOpFn * const fns[4][2] = { 736 { NULL, NULL }, 737 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 738 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 739 { NULL, NULL }, 740 }; 741 return do_long_dual_acc(s, a, fns[a->size][a->x]); 742 } 743 744 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 745 { 746 static MVEGenLongDualAccOpFn * const fns[] = { 747 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 748 }; 749 return do_long_dual_acc(s, a, fns[a->x]); 750 } 751 752 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 753 { 754 static MVEGenLongDualAccOpFn * const fns[] = { 755 gen_helper_mve_vrmlaldavhuw, NULL, 756 }; 757 return do_long_dual_acc(s, a, fns[a->x]); 758 } 759 760 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 761 { 762 static MVEGenLongDualAccOpFn * const fns[] = { 763 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 764 }; 765 return do_long_dual_acc(s, a, fns[a->x]); 766 } 767 768 static void gen_vpst(DisasContext *s, uint32_t mask) 769 { 770 /* 771 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 772 * being adjacent fields in the register. 773 * 774 * Updating the masks is not predicated, but it is subject to beat-wise 775 * execution, and the mask is updated on the odd-numbered beats. 776 * So if PSR.ECI says we should skip beat 1, we mustn't update the 777 * 01 mask field. 778 */ 779 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 780 switch (s->eci) { 781 case ECI_NONE: 782 case ECI_A0: 783 /* Update both 01 and 23 fields */ 784 tcg_gen_deposit_i32(vpr, vpr, 785 tcg_constant_i32(mask | (mask << 4)), 786 R_V7M_VPR_MASK01_SHIFT, 787 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 788 break; 789 case ECI_A0A1: 790 case ECI_A0A1A2: 791 case ECI_A0A1A2B0: 792 /* Update only the 23 mask field */ 793 tcg_gen_deposit_i32(vpr, vpr, 794 tcg_constant_i32(mask), 795 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 796 break; 797 default: 798 g_assert_not_reached(); 799 } 800 store_cpu_field(vpr, v7m.vpr); 801 } 802 803 static bool trans_VPST(DisasContext *s, arg_VPST *a) 804 { 805 /* mask == 0 is a "related encoding" */ 806 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 807 return false; 808 } 809 if (!mve_eci_check(s) || !vfp_access_check(s)) { 810 return true; 811 } 812 gen_vpst(s, a->mask); 813 mve_update_and_store_eci(s); 814 return true; 815 } 816 817 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 818 { 819 /* VADDV: vector add across vector */ 820 static MVEGenVADDVFn * const fns[4][2] = { 821 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 822 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 823 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 824 { NULL, NULL } 825 }; 826 TCGv_ptr qm; 827 TCGv_i32 rda; 828 829 if (!dc_isar_feature(aa32_mve, s) || 830 a->size == 3) { 831 return false; 832 } 833 if (!mve_eci_check(s) || !vfp_access_check(s)) { 834 return true; 835 } 836 837 /* 838 * This insn is subject to beat-wise execution. Partial execution 839 * of an A=0 (no-accumulate) insn which does not execute the first 840 * beat must start with the current value of Rda, not zero. 841 */ 842 if (a->a || mve_skip_first_beat(s)) { 843 /* Accumulate input from Rda */ 844 rda = load_reg(s, a->rda); 845 } else { 846 /* Accumulate starting at zero */ 847 rda = tcg_const_i32(0); 848 } 849 850 qm = mve_qreg_ptr(a->qm); 851 fns[a->size][a->u](rda, cpu_env, qm, rda); 852 store_reg(s, a->rda, rda); 853 tcg_temp_free_ptr(qm); 854 855 mve_update_eci(s); 856 return true; 857 } 858 859 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 860 { 861 /* 862 * Vector Add Long Across Vector: accumulate the 32-bit 863 * elements of the vector into a 64-bit result stored in 864 * a pair of general-purpose registers. 865 * No need to check Qm's bank: it is only 3 bits in decode. 866 */ 867 TCGv_ptr qm; 868 TCGv_i64 rda; 869 TCGv_i32 rdalo, rdahi; 870 871 if (!dc_isar_feature(aa32_mve, s)) { 872 return false; 873 } 874 /* 875 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 876 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 877 */ 878 if (a->rdahi == 13 || a->rdahi == 15) { 879 return false; 880 } 881 if (!mve_eci_check(s) || !vfp_access_check(s)) { 882 return true; 883 } 884 885 /* 886 * This insn is subject to beat-wise execution. Partial execution 887 * of an A=0 (no-accumulate) insn which does not execute the first 888 * beat must start with the current value of RdaHi:RdaLo, not zero. 889 */ 890 if (a->a || mve_skip_first_beat(s)) { 891 /* Accumulate input from RdaHi:RdaLo */ 892 rda = tcg_temp_new_i64(); 893 rdalo = load_reg(s, a->rdalo); 894 rdahi = load_reg(s, a->rdahi); 895 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 896 tcg_temp_free_i32(rdalo); 897 tcg_temp_free_i32(rdahi); 898 } else { 899 /* Accumulate starting at zero */ 900 rda = tcg_const_i64(0); 901 } 902 903 qm = mve_qreg_ptr(a->qm); 904 if (a->u) { 905 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 906 } else { 907 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 908 } 909 tcg_temp_free_ptr(qm); 910 911 rdalo = tcg_temp_new_i32(); 912 rdahi = tcg_temp_new_i32(); 913 tcg_gen_extrl_i64_i32(rdalo, rda); 914 tcg_gen_extrh_i64_i32(rdahi, rda); 915 store_reg(s, a->rdalo, rdalo); 916 store_reg(s, a->rdahi, rdahi); 917 tcg_temp_free_i64(rda); 918 mve_update_eci(s); 919 return true; 920 } 921 922 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 923 { 924 TCGv_ptr qd; 925 uint64_t imm; 926 927 if (!dc_isar_feature(aa32_mve, s) || 928 !mve_check_qreg_bank(s, a->qd) || 929 !fn) { 930 return false; 931 } 932 if (!mve_eci_check(s) || !vfp_access_check(s)) { 933 return true; 934 } 935 936 imm = asimd_imm_const(a->imm, a->cmode, a->op); 937 938 qd = mve_qreg_ptr(a->qd); 939 fn(cpu_env, qd, tcg_constant_i64(imm)); 940 tcg_temp_free_ptr(qd); 941 mve_update_eci(s); 942 return true; 943 } 944 945 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 946 { 947 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 948 MVEGenOneOpImmFn *fn; 949 950 if ((a->cmode & 1) && a->cmode < 12) { 951 if (a->op) { 952 /* 953 * For op=1, the immediate will be inverted by asimd_imm_const(), 954 * so the VBIC becomes a logical AND operation. 955 */ 956 fn = gen_helper_mve_vandi; 957 } else { 958 fn = gen_helper_mve_vorri; 959 } 960 } else { 961 /* There is one unallocated cmode/op combination in this space */ 962 if (a->cmode == 15 && a->op == 1) { 963 return false; 964 } 965 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 966 fn = gen_helper_mve_vmovi; 967 } 968 return do_1imm(s, a, fn); 969 } 970 971 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 972 bool negateshift) 973 { 974 TCGv_ptr qd, qm; 975 int shift = a->shift; 976 977 if (!dc_isar_feature(aa32_mve, s) || 978 !mve_check_qreg_bank(s, a->qd | a->qm) || 979 !fn) { 980 return false; 981 } 982 if (!mve_eci_check(s) || !vfp_access_check(s)) { 983 return true; 984 } 985 986 /* 987 * When we handle a right shift insn using a left-shift helper 988 * which permits a negative shift count to indicate a right-shift, 989 * we must negate the shift count. 990 */ 991 if (negateshift) { 992 shift = -shift; 993 } 994 995 qd = mve_qreg_ptr(a->qd); 996 qm = mve_qreg_ptr(a->qm); 997 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 998 tcg_temp_free_ptr(qd); 999 tcg_temp_free_ptr(qm); 1000 mve_update_eci(s); 1001 return true; 1002 } 1003 1004 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1005 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1006 { \ 1007 static MVEGenTwoOpShiftFn * const fns[] = { \ 1008 gen_helper_mve_##FN##b, \ 1009 gen_helper_mve_##FN##h, \ 1010 gen_helper_mve_##FN##w, \ 1011 NULL, \ 1012 }; \ 1013 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1014 } 1015 1016 DO_2SHIFT(VSHLI, vshli_u, false) 1017 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1018 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1019 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1020 /* These right shifts use a left-shift helper with negated shift count */ 1021 DO_2SHIFT(VSHRI_S, vshli_s, true) 1022 DO_2SHIFT(VSHRI_U, vshli_u, true) 1023 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1024 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1025 1026 DO_2SHIFT(VSRI, vsri, false) 1027 DO_2SHIFT(VSLI, vsli, false) 1028 1029 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1030 MVEGenTwoOpShiftFn *fn) 1031 { 1032 TCGv_ptr qda; 1033 TCGv_i32 rm; 1034 1035 if (!dc_isar_feature(aa32_mve, s) || 1036 !mve_check_qreg_bank(s, a->qda) || 1037 a->rm == 13 || a->rm == 15 || !fn) { 1038 /* Rm cases are UNPREDICTABLE */ 1039 return false; 1040 } 1041 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1042 return true; 1043 } 1044 1045 qda = mve_qreg_ptr(a->qda); 1046 rm = load_reg(s, a->rm); 1047 fn(cpu_env, qda, qda, rm); 1048 tcg_temp_free_ptr(qda); 1049 tcg_temp_free_i32(rm); 1050 mve_update_eci(s); 1051 return true; 1052 } 1053 1054 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1055 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1056 { \ 1057 static MVEGenTwoOpShiftFn * const fns[] = { \ 1058 gen_helper_mve_##FN##b, \ 1059 gen_helper_mve_##FN##h, \ 1060 gen_helper_mve_##FN##w, \ 1061 NULL, \ 1062 }; \ 1063 return do_2shift_scalar(s, a, fns[a->size]); \ 1064 } 1065 1066 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1067 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1068 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1069 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1070 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1071 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1072 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1073 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1074 1075 #define DO_VSHLL(INSN, FN) \ 1076 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1077 { \ 1078 static MVEGenTwoOpShiftFn * const fns[] = { \ 1079 gen_helper_mve_##FN##b, \ 1080 gen_helper_mve_##FN##h, \ 1081 }; \ 1082 return do_2shift(s, a, fns[a->size], false); \ 1083 } 1084 1085 DO_VSHLL(VSHLL_BS, vshllbs) 1086 DO_VSHLL(VSHLL_BU, vshllbu) 1087 DO_VSHLL(VSHLL_TS, vshllts) 1088 DO_VSHLL(VSHLL_TU, vshlltu) 1089 1090 #define DO_2SHIFT_N(INSN, FN) \ 1091 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1092 { \ 1093 static MVEGenTwoOpShiftFn * const fns[] = { \ 1094 gen_helper_mve_##FN##b, \ 1095 gen_helper_mve_##FN##h, \ 1096 }; \ 1097 return do_2shift(s, a, fns[a->size], false); \ 1098 } 1099 1100 DO_2SHIFT_N(VSHRNB, vshrnb) 1101 DO_2SHIFT_N(VSHRNT, vshrnt) 1102 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1103 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1104 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1105 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1106 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1107 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1108 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1109 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1110 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1111 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1112 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1113 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1114 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1115 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1116 1117 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1118 { 1119 /* 1120 * Whole Vector Left Shift with Carry. The carry is taken 1121 * from a general purpose register and written back there. 1122 * An imm of 0 means "shift by 32". 1123 */ 1124 TCGv_ptr qd; 1125 TCGv_i32 rdm; 1126 1127 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1128 return false; 1129 } 1130 if (a->rdm == 13 || a->rdm == 15) { 1131 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1132 return false; 1133 } 1134 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1135 return true; 1136 } 1137 1138 qd = mve_qreg_ptr(a->qd); 1139 rdm = load_reg(s, a->rdm); 1140 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1141 store_reg(s, a->rdm, rdm); 1142 tcg_temp_free_ptr(qd); 1143 mve_update_eci(s); 1144 return true; 1145 } 1146 1147 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1148 { 1149 TCGv_ptr qd; 1150 TCGv_i32 rn; 1151 1152 /* 1153 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1154 * This fills the vector with elements of successively increasing 1155 * or decreasing values, starting from Rn. 1156 */ 1157 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1158 return false; 1159 } 1160 if (a->size == MO_64) { 1161 /* size 0b11 is another encoding */ 1162 return false; 1163 } 1164 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1165 return true; 1166 } 1167 1168 qd = mve_qreg_ptr(a->qd); 1169 rn = load_reg(s, a->rn); 1170 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1171 store_reg(s, a->rn, rn); 1172 tcg_temp_free_ptr(qd); 1173 mve_update_eci(s); 1174 return true; 1175 } 1176 1177 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1178 { 1179 TCGv_ptr qd; 1180 TCGv_i32 rn, rm; 1181 1182 /* 1183 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1184 * This fills the vector with elements of successively increasing 1185 * or decreasing values, starting from Rn. Rm specifies a point where 1186 * the count wraps back around to 0. The updated offset is written back 1187 * to Rn. 1188 */ 1189 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1190 return false; 1191 } 1192 if (!fn || a->rm == 13 || a->rm == 15) { 1193 /* 1194 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1195 * Rm == 13 is VIWDUP, VDWDUP. 1196 */ 1197 return false; 1198 } 1199 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1200 return true; 1201 } 1202 1203 qd = mve_qreg_ptr(a->qd); 1204 rn = load_reg(s, a->rn); 1205 rm = load_reg(s, a->rm); 1206 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1207 store_reg(s, a->rn, rn); 1208 tcg_temp_free_ptr(qd); 1209 tcg_temp_free_i32(rm); 1210 mve_update_eci(s); 1211 return true; 1212 } 1213 1214 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1215 { 1216 static MVEGenVIDUPFn * const fns[] = { 1217 gen_helper_mve_vidupb, 1218 gen_helper_mve_viduph, 1219 gen_helper_mve_vidupw, 1220 NULL, 1221 }; 1222 return do_vidup(s, a, fns[a->size]); 1223 } 1224 1225 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1226 { 1227 static MVEGenVIDUPFn * const fns[] = { 1228 gen_helper_mve_vidupb, 1229 gen_helper_mve_viduph, 1230 gen_helper_mve_vidupw, 1231 NULL, 1232 }; 1233 /* VDDUP is just like VIDUP but with a negative immediate */ 1234 a->imm = -a->imm; 1235 return do_vidup(s, a, fns[a->size]); 1236 } 1237 1238 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1239 { 1240 static MVEGenVIWDUPFn * const fns[] = { 1241 gen_helper_mve_viwdupb, 1242 gen_helper_mve_viwduph, 1243 gen_helper_mve_viwdupw, 1244 NULL, 1245 }; 1246 return do_viwdup(s, a, fns[a->size]); 1247 } 1248 1249 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1250 { 1251 static MVEGenVIWDUPFn * const fns[] = { 1252 gen_helper_mve_vdwdupb, 1253 gen_helper_mve_vdwduph, 1254 gen_helper_mve_vdwdupw, 1255 NULL, 1256 }; 1257 return do_viwdup(s, a, fns[a->size]); 1258 } 1259 1260 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1261 { 1262 TCGv_ptr qn, qm; 1263 1264 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1265 !fn) { 1266 return false; 1267 } 1268 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1269 return true; 1270 } 1271 1272 qn = mve_qreg_ptr(a->qn); 1273 qm = mve_qreg_ptr(a->qm); 1274 fn(cpu_env, qn, qm); 1275 tcg_temp_free_ptr(qn); 1276 tcg_temp_free_ptr(qm); 1277 if (a->mask) { 1278 /* VPT */ 1279 gen_vpst(s, a->mask); 1280 } 1281 mve_update_eci(s); 1282 return true; 1283 } 1284 1285 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1286 MVEGenScalarCmpFn *fn) 1287 { 1288 TCGv_ptr qn; 1289 TCGv_i32 rm; 1290 1291 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1292 return false; 1293 } 1294 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1295 return true; 1296 } 1297 1298 qn = mve_qreg_ptr(a->qn); 1299 if (a->rm == 15) { 1300 /* Encoding Rm=0b1111 means "constant zero" */ 1301 rm = tcg_constant_i32(0); 1302 } else { 1303 rm = load_reg(s, a->rm); 1304 } 1305 fn(cpu_env, qn, rm); 1306 tcg_temp_free_ptr(qn); 1307 tcg_temp_free_i32(rm); 1308 if (a->mask) { 1309 /* VPT */ 1310 gen_vpst(s, a->mask); 1311 } 1312 mve_update_eci(s); 1313 return true; 1314 } 1315 1316 #define DO_VCMP(INSN, FN) \ 1317 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1318 { \ 1319 static MVEGenCmpFn * const fns[] = { \ 1320 gen_helper_mve_##FN##b, \ 1321 gen_helper_mve_##FN##h, \ 1322 gen_helper_mve_##FN##w, \ 1323 NULL, \ 1324 }; \ 1325 return do_vcmp(s, a, fns[a->size]); \ 1326 } \ 1327 static bool trans_##INSN##_scalar(DisasContext *s, \ 1328 arg_vcmp_scalar *a) \ 1329 { \ 1330 static MVEGenScalarCmpFn * const fns[] = { \ 1331 gen_helper_mve_##FN##_scalarb, \ 1332 gen_helper_mve_##FN##_scalarh, \ 1333 gen_helper_mve_##FN##_scalarw, \ 1334 NULL, \ 1335 }; \ 1336 return do_vcmp_scalar(s, a, fns[a->size]); \ 1337 } 1338 1339 DO_VCMP(VCMPEQ, vcmpeq) 1340 DO_VCMP(VCMPNE, vcmpne) 1341 DO_VCMP(VCMPCS, vcmpcs) 1342 DO_VCMP(VCMPHI, vcmphi) 1343 DO_VCMP(VCMPGE, vcmpge) 1344 DO_VCMP(VCMPLT, vcmplt) 1345 DO_VCMP(VCMPGT, vcmpgt) 1346 DO_VCMP(VCMPLE, vcmple) 1347 1348 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1349 { 1350 /* 1351 * MIN/MAX operations across a vector: compute the min or 1352 * max of the initial value in a general purpose register 1353 * and all the elements in the vector, and store it back 1354 * into the general purpose register. 1355 */ 1356 TCGv_ptr qm; 1357 TCGv_i32 rda; 1358 1359 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1360 !fn || a->rda == 13 || a->rda == 15) { 1361 /* Rda cases are UNPREDICTABLE */ 1362 return false; 1363 } 1364 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1365 return true; 1366 } 1367 1368 qm = mve_qreg_ptr(a->qm); 1369 rda = load_reg(s, a->rda); 1370 fn(rda, cpu_env, qm, rda); 1371 store_reg(s, a->rda, rda); 1372 tcg_temp_free_ptr(qm); 1373 mve_update_eci(s); 1374 return true; 1375 } 1376 1377 #define DO_VMAXV(INSN, FN) \ 1378 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1379 { \ 1380 static MVEGenVADDVFn * const fns[] = { \ 1381 gen_helper_mve_##FN##b, \ 1382 gen_helper_mve_##FN##h, \ 1383 gen_helper_mve_##FN##w, \ 1384 NULL, \ 1385 }; \ 1386 return do_vmaxv(s, a, fns[a->size]); \ 1387 } 1388 1389 DO_VMAXV(VMAXV_S, vmaxvs) 1390 DO_VMAXV(VMAXV_U, vmaxvu) 1391 DO_VMAXV(VMAXAV, vmaxav) 1392 DO_VMAXV(VMINV_S, vminvs) 1393 DO_VMAXV(VMINV_U, vminvu) 1394 DO_VMAXV(VMINAV, vminav) 1395 1396 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1397 { 1398 /* Absolute difference accumulated across vector */ 1399 TCGv_ptr qn, qm; 1400 TCGv_i32 rda; 1401 1402 if (!dc_isar_feature(aa32_mve, s) || 1403 !mve_check_qreg_bank(s, a->qm | a->qn) || 1404 !fn || a->rda == 13 || a->rda == 15) { 1405 /* Rda cases are UNPREDICTABLE */ 1406 return false; 1407 } 1408 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1409 return true; 1410 } 1411 1412 qm = mve_qreg_ptr(a->qm); 1413 qn = mve_qreg_ptr(a->qn); 1414 rda = load_reg(s, a->rda); 1415 fn(rda, cpu_env, qn, qm, rda); 1416 store_reg(s, a->rda, rda); 1417 tcg_temp_free_ptr(qm); 1418 tcg_temp_free_ptr(qn); 1419 mve_update_eci(s); 1420 return true; 1421 } 1422 1423 #define DO_VABAV(INSN, FN) \ 1424 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1425 { \ 1426 static MVEGenVABAVFn * const fns[] = { \ 1427 gen_helper_mve_##FN##b, \ 1428 gen_helper_mve_##FN##h, \ 1429 gen_helper_mve_##FN##w, \ 1430 NULL, \ 1431 }; \ 1432 return do_vabav(s, a, fns[a->size]); \ 1433 } 1434 1435 DO_VABAV(VABAV_S, vabavs) 1436 DO_VABAV(VABAV_U, vabavu) 1437