1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 52 static inline long mve_qreg_offset(unsigned reg) 53 { 54 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 55 } 56 57 static TCGv_ptr mve_qreg_ptr(unsigned reg) 58 { 59 TCGv_ptr ret = tcg_temp_new_ptr(); 60 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 61 return ret; 62 } 63 64 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 65 { 66 /* 67 * Check whether Qregs are in range. For v8.1M only Q0..Q7 68 * are supported, see VFPSmallRegisterBank(). 69 */ 70 return qmask < 8; 71 } 72 73 bool mve_eci_check(DisasContext *s) 74 { 75 /* 76 * This is a beatwise insn: check that ECI is valid (not a 77 * reserved value) and note that we are handling it. 78 * Return true if OK, false if we generated an exception. 79 */ 80 s->eci_handled = true; 81 switch (s->eci) { 82 case ECI_NONE: 83 case ECI_A0: 84 case ECI_A0A1: 85 case ECI_A0A1A2: 86 case ECI_A0A1A2B0: 87 return true; 88 default: 89 /* Reserved value: INVSTATE UsageFault */ 90 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 91 default_exception_el(s)); 92 return false; 93 } 94 } 95 96 static void mve_update_eci(DisasContext *s) 97 { 98 /* 99 * The helper function will always update the CPUState field, 100 * so we only need to update the DisasContext field. 101 */ 102 if (s->eci) { 103 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 104 } 105 } 106 107 void mve_update_and_store_eci(DisasContext *s) 108 { 109 /* 110 * For insns which don't call a helper function that will call 111 * mve_advance_vpt(), this version updates s->eci and also stores 112 * it out to the CPUState field. 113 */ 114 if (s->eci) { 115 mve_update_eci(s); 116 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 117 } 118 } 119 120 static bool mve_skip_first_beat(DisasContext *s) 121 { 122 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 123 switch (s->eci) { 124 case ECI_NONE: 125 return false; 126 case ECI_A0: 127 case ECI_A0A1: 128 case ECI_A0A1A2: 129 case ECI_A0A1A2B0: 130 return true; 131 default: 132 g_assert_not_reached(); 133 } 134 } 135 136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 137 unsigned msize) 138 { 139 TCGv_i32 addr; 140 uint32_t offset; 141 TCGv_ptr qreg; 142 143 if (!dc_isar_feature(aa32_mve, s) || 144 !mve_check_qreg_bank(s, a->qd) || 145 !fn) { 146 return false; 147 } 148 149 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 150 if (a->rn == 15 || (a->rn == 13 && a->w)) { 151 return false; 152 } 153 154 if (!mve_eci_check(s) || !vfp_access_check(s)) { 155 return true; 156 } 157 158 offset = a->imm << msize; 159 if (!a->a) { 160 offset = -offset; 161 } 162 addr = load_reg(s, a->rn); 163 if (a->p) { 164 tcg_gen_addi_i32(addr, addr, offset); 165 } 166 167 qreg = mve_qreg_ptr(a->qd); 168 fn(cpu_env, qreg, addr); 169 tcg_temp_free_ptr(qreg); 170 171 /* 172 * Writeback always happens after the last beat of the insn, 173 * regardless of predication 174 */ 175 if (a->w) { 176 if (!a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 store_reg(s, a->rn, addr); 180 } else { 181 tcg_temp_free_i32(addr); 182 } 183 mve_update_eci(s); 184 return true; 185 } 186 187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 188 { 189 static MVEGenLdStFn * const ldstfns[4][2] = { 190 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 191 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 192 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 193 { NULL, NULL } 194 }; 195 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 196 } 197 198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 199 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 200 { \ 201 static MVEGenLdStFn * const ldstfns[2][2] = { \ 202 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 203 { NULL, gen_helper_mve_##ULD }, \ 204 }; \ 205 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 206 } 207 208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 211 212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 213 { 214 TCGv_ptr qd; 215 TCGv_i32 rt; 216 217 if (!dc_isar_feature(aa32_mve, s) || 218 !mve_check_qreg_bank(s, a->qd)) { 219 return false; 220 } 221 if (a->rt == 13 || a->rt == 15) { 222 /* UNPREDICTABLE; we choose to UNDEF */ 223 return false; 224 } 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 qd = mve_qreg_ptr(a->qd); 230 rt = load_reg(s, a->rt); 231 tcg_gen_dup_i32(a->size, rt, rt); 232 gen_helper_mve_vdup(cpu_env, qd, rt); 233 tcg_temp_free_ptr(qd); 234 tcg_temp_free_i32(rt); 235 mve_update_eci(s); 236 return true; 237 } 238 239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 240 { 241 TCGv_ptr qd, qm; 242 243 if (!dc_isar_feature(aa32_mve, s) || 244 !mve_check_qreg_bank(s, a->qd | a->qm) || 245 !fn) { 246 return false; 247 } 248 249 if (!mve_eci_check(s) || !vfp_access_check(s)) { 250 return true; 251 } 252 253 qd = mve_qreg_ptr(a->qd); 254 qm = mve_qreg_ptr(a->qm); 255 fn(cpu_env, qd, qm); 256 tcg_temp_free_ptr(qd); 257 tcg_temp_free_ptr(qm); 258 mve_update_eci(s); 259 return true; 260 } 261 262 #define DO_1OP(INSN, FN) \ 263 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 264 { \ 265 static MVEGenOneOpFn * const fns[] = { \ 266 gen_helper_mve_##FN##b, \ 267 gen_helper_mve_##FN##h, \ 268 gen_helper_mve_##FN##w, \ 269 NULL, \ 270 }; \ 271 return do_1op(s, a, fns[a->size]); \ 272 } 273 274 DO_1OP(VCLZ, vclz) 275 DO_1OP(VCLS, vcls) 276 DO_1OP(VABS, vabs) 277 DO_1OP(VNEG, vneg) 278 279 /* Narrowing moves: only size 0 and 1 are valid */ 280 #define DO_VMOVN(INSN, FN) \ 281 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 282 { \ 283 static MVEGenOneOpFn * const fns[] = { \ 284 gen_helper_mve_##FN##b, \ 285 gen_helper_mve_##FN##h, \ 286 NULL, \ 287 NULL, \ 288 }; \ 289 return do_1op(s, a, fns[a->size]); \ 290 } 291 292 DO_VMOVN(VMOVNB, vmovnb) 293 DO_VMOVN(VMOVNT, vmovnt) 294 DO_VMOVN(VQMOVUNB, vqmovunb) 295 DO_VMOVN(VQMOVUNT, vqmovunt) 296 DO_VMOVN(VQMOVN_BS, vqmovnbs) 297 DO_VMOVN(VQMOVN_TS, vqmovnts) 298 DO_VMOVN(VQMOVN_BU, vqmovnbu) 299 DO_VMOVN(VQMOVN_TU, vqmovntu) 300 301 static bool trans_VREV16(DisasContext *s, arg_1op *a) 302 { 303 static MVEGenOneOpFn * const fns[] = { 304 gen_helper_mve_vrev16b, 305 NULL, 306 NULL, 307 NULL, 308 }; 309 return do_1op(s, a, fns[a->size]); 310 } 311 312 static bool trans_VREV32(DisasContext *s, arg_1op *a) 313 { 314 static MVEGenOneOpFn * const fns[] = { 315 gen_helper_mve_vrev32b, 316 gen_helper_mve_vrev32h, 317 NULL, 318 NULL, 319 }; 320 return do_1op(s, a, fns[a->size]); 321 } 322 323 static bool trans_VREV64(DisasContext *s, arg_1op *a) 324 { 325 static MVEGenOneOpFn * const fns[] = { 326 gen_helper_mve_vrev64b, 327 gen_helper_mve_vrev64h, 328 gen_helper_mve_vrev64w, 329 NULL, 330 }; 331 return do_1op(s, a, fns[a->size]); 332 } 333 334 static bool trans_VMVN(DisasContext *s, arg_1op *a) 335 { 336 return do_1op(s, a, gen_helper_mve_vmvn); 337 } 338 339 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 340 { 341 static MVEGenOneOpFn * const fns[] = { 342 NULL, 343 gen_helper_mve_vfabsh, 344 gen_helper_mve_vfabss, 345 NULL, 346 }; 347 if (!dc_isar_feature(aa32_mve_fp, s)) { 348 return false; 349 } 350 return do_1op(s, a, fns[a->size]); 351 } 352 353 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 354 { 355 static MVEGenOneOpFn * const fns[] = { 356 NULL, 357 gen_helper_mve_vfnegh, 358 gen_helper_mve_vfnegs, 359 NULL, 360 }; 361 if (!dc_isar_feature(aa32_mve_fp, s)) { 362 return false; 363 } 364 return do_1op(s, a, fns[a->size]); 365 } 366 367 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 368 { 369 TCGv_ptr qd, qn, qm; 370 371 if (!dc_isar_feature(aa32_mve, s) || 372 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 373 !fn) { 374 return false; 375 } 376 if (!mve_eci_check(s) || !vfp_access_check(s)) { 377 return true; 378 } 379 380 qd = mve_qreg_ptr(a->qd); 381 qn = mve_qreg_ptr(a->qn); 382 qm = mve_qreg_ptr(a->qm); 383 fn(cpu_env, qd, qn, qm); 384 tcg_temp_free_ptr(qd); 385 tcg_temp_free_ptr(qn); 386 tcg_temp_free_ptr(qm); 387 mve_update_eci(s); 388 return true; 389 } 390 391 #define DO_LOGIC(INSN, HELPER) \ 392 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 393 { \ 394 return do_2op(s, a, HELPER); \ 395 } 396 397 DO_LOGIC(VAND, gen_helper_mve_vand) 398 DO_LOGIC(VBIC, gen_helper_mve_vbic) 399 DO_LOGIC(VORR, gen_helper_mve_vorr) 400 DO_LOGIC(VORN, gen_helper_mve_vorn) 401 DO_LOGIC(VEOR, gen_helper_mve_veor) 402 403 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 404 405 #define DO_2OP(INSN, FN) \ 406 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 407 { \ 408 static MVEGenTwoOpFn * const fns[] = { \ 409 gen_helper_mve_##FN##b, \ 410 gen_helper_mve_##FN##h, \ 411 gen_helper_mve_##FN##w, \ 412 NULL, \ 413 }; \ 414 return do_2op(s, a, fns[a->size]); \ 415 } 416 417 DO_2OP(VADD, vadd) 418 DO_2OP(VSUB, vsub) 419 DO_2OP(VMUL, vmul) 420 DO_2OP(VMULH_S, vmulhs) 421 DO_2OP(VMULH_U, vmulhu) 422 DO_2OP(VRMULH_S, vrmulhs) 423 DO_2OP(VRMULH_U, vrmulhu) 424 DO_2OP(VMAX_S, vmaxs) 425 DO_2OP(VMAX_U, vmaxu) 426 DO_2OP(VMIN_S, vmins) 427 DO_2OP(VMIN_U, vminu) 428 DO_2OP(VABD_S, vabds) 429 DO_2OP(VABD_U, vabdu) 430 DO_2OP(VHADD_S, vhadds) 431 DO_2OP(VHADD_U, vhaddu) 432 DO_2OP(VHSUB_S, vhsubs) 433 DO_2OP(VHSUB_U, vhsubu) 434 DO_2OP(VMULL_BS, vmullbs) 435 DO_2OP(VMULL_BU, vmullbu) 436 DO_2OP(VMULL_TS, vmullts) 437 DO_2OP(VMULL_TU, vmulltu) 438 DO_2OP(VQDMULH, vqdmulh) 439 DO_2OP(VQRDMULH, vqrdmulh) 440 DO_2OP(VQADD_S, vqadds) 441 DO_2OP(VQADD_U, vqaddu) 442 DO_2OP(VQSUB_S, vqsubs) 443 DO_2OP(VQSUB_U, vqsubu) 444 DO_2OP(VSHL_S, vshls) 445 DO_2OP(VSHL_U, vshlu) 446 DO_2OP(VRSHL_S, vrshls) 447 DO_2OP(VRSHL_U, vrshlu) 448 DO_2OP(VQSHL_S, vqshls) 449 DO_2OP(VQSHL_U, vqshlu) 450 DO_2OP(VQRSHL_S, vqrshls) 451 DO_2OP(VQRSHL_U, vqrshlu) 452 DO_2OP(VQDMLADH, vqdmladh) 453 DO_2OP(VQDMLADHX, vqdmladhx) 454 DO_2OP(VQRDMLADH, vqrdmladh) 455 DO_2OP(VQRDMLADHX, vqrdmladhx) 456 DO_2OP(VQDMLSDH, vqdmlsdh) 457 DO_2OP(VQDMLSDHX, vqdmlsdhx) 458 DO_2OP(VQRDMLSDH, vqrdmlsdh) 459 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 460 DO_2OP(VRHADD_S, vrhadds) 461 DO_2OP(VRHADD_U, vrhaddu) 462 /* 463 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 464 * so we can reuse the DO_2OP macro. (Our implementation calculates the 465 * "expected" results in this case.) Similarly for VHCADD. 466 */ 467 DO_2OP(VCADD90, vcadd90) 468 DO_2OP(VCADD270, vcadd270) 469 DO_2OP(VHCADD90, vhcadd90) 470 DO_2OP(VHCADD270, vhcadd270) 471 472 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 473 { 474 static MVEGenTwoOpFn * const fns[] = { 475 NULL, 476 gen_helper_mve_vqdmullbh, 477 gen_helper_mve_vqdmullbw, 478 NULL, 479 }; 480 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 481 /* UNPREDICTABLE; we choose to undef */ 482 return false; 483 } 484 return do_2op(s, a, fns[a->size]); 485 } 486 487 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 488 { 489 static MVEGenTwoOpFn * const fns[] = { 490 NULL, 491 gen_helper_mve_vqdmullth, 492 gen_helper_mve_vqdmulltw, 493 NULL, 494 }; 495 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 496 /* UNPREDICTABLE; we choose to undef */ 497 return false; 498 } 499 return do_2op(s, a, fns[a->size]); 500 } 501 502 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 503 { 504 /* 505 * Note that a->size indicates the output size, ie VMULL.P8 506 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 507 * is the 16x16->32 operation and a->size is MO_32. 508 */ 509 static MVEGenTwoOpFn * const fns[] = { 510 NULL, 511 gen_helper_mve_vmullpbh, 512 gen_helper_mve_vmullpbw, 513 NULL, 514 }; 515 return do_2op(s, a, fns[a->size]); 516 } 517 518 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 519 { 520 /* a->size is as for trans_VMULLP_B */ 521 static MVEGenTwoOpFn * const fns[] = { 522 NULL, 523 gen_helper_mve_vmullpth, 524 gen_helper_mve_vmullptw, 525 NULL, 526 }; 527 return do_2op(s, a, fns[a->size]); 528 } 529 530 /* 531 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 532 * of the 32-bit elements in each lane of the input vectors, where the 533 * carry-out of each add is the carry-in of the next. The initial carry 534 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 535 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 536 * These insns are subject to beat-wise execution. Partial execution 537 * of an I=1 (initial carry input fixed) insn which does not 538 * execute the first beat must start with the current FPSCR.NZCV 539 * value, not the fixed constant input. 540 */ 541 static bool trans_VADC(DisasContext *s, arg_2op *a) 542 { 543 return do_2op(s, a, gen_helper_mve_vadc); 544 } 545 546 static bool trans_VADCI(DisasContext *s, arg_2op *a) 547 { 548 if (mve_skip_first_beat(s)) { 549 return trans_VADC(s, a); 550 } 551 return do_2op(s, a, gen_helper_mve_vadci); 552 } 553 554 static bool trans_VSBC(DisasContext *s, arg_2op *a) 555 { 556 return do_2op(s, a, gen_helper_mve_vsbc); 557 } 558 559 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 560 { 561 if (mve_skip_first_beat(s)) { 562 return trans_VSBC(s, a); 563 } 564 return do_2op(s, a, gen_helper_mve_vsbci); 565 } 566 567 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 568 MVEGenTwoOpScalarFn fn) 569 { 570 TCGv_ptr qd, qn; 571 TCGv_i32 rm; 572 573 if (!dc_isar_feature(aa32_mve, s) || 574 !mve_check_qreg_bank(s, a->qd | a->qn) || 575 !fn) { 576 return false; 577 } 578 if (a->rm == 13 || a->rm == 15) { 579 /* UNPREDICTABLE */ 580 return false; 581 } 582 if (!mve_eci_check(s) || !vfp_access_check(s)) { 583 return true; 584 } 585 586 qd = mve_qreg_ptr(a->qd); 587 qn = mve_qreg_ptr(a->qn); 588 rm = load_reg(s, a->rm); 589 fn(cpu_env, qd, qn, rm); 590 tcg_temp_free_i32(rm); 591 tcg_temp_free_ptr(qd); 592 tcg_temp_free_ptr(qn); 593 mve_update_eci(s); 594 return true; 595 } 596 597 #define DO_2OP_SCALAR(INSN, FN) \ 598 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 599 { \ 600 static MVEGenTwoOpScalarFn * const fns[] = { \ 601 gen_helper_mve_##FN##b, \ 602 gen_helper_mve_##FN##h, \ 603 gen_helper_mve_##FN##w, \ 604 NULL, \ 605 }; \ 606 return do_2op_scalar(s, a, fns[a->size]); \ 607 } 608 609 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 610 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 611 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 612 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 613 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 614 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 615 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 616 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 617 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 618 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 619 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 620 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 621 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 622 DO_2OP_SCALAR(VBRSR, vbrsr) 623 DO_2OP_SCALAR(VMLA, vmla) 624 DO_2OP_SCALAR(VMLAS, vmlas) 625 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 626 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 627 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 628 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 629 630 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 631 { 632 static MVEGenTwoOpScalarFn * const fns[] = { 633 NULL, 634 gen_helper_mve_vqdmullb_scalarh, 635 gen_helper_mve_vqdmullb_scalarw, 636 NULL, 637 }; 638 if (a->qd == a->qn && a->size == MO_32) { 639 /* UNPREDICTABLE; we choose to undef */ 640 return false; 641 } 642 return do_2op_scalar(s, a, fns[a->size]); 643 } 644 645 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 646 { 647 static MVEGenTwoOpScalarFn * const fns[] = { 648 NULL, 649 gen_helper_mve_vqdmullt_scalarh, 650 gen_helper_mve_vqdmullt_scalarw, 651 NULL, 652 }; 653 if (a->qd == a->qn && a->size == MO_32) { 654 /* UNPREDICTABLE; we choose to undef */ 655 return false; 656 } 657 return do_2op_scalar(s, a, fns[a->size]); 658 } 659 660 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 661 MVEGenLongDualAccOpFn *fn) 662 { 663 TCGv_ptr qn, qm; 664 TCGv_i64 rda; 665 TCGv_i32 rdalo, rdahi; 666 667 if (!dc_isar_feature(aa32_mve, s) || 668 !mve_check_qreg_bank(s, a->qn | a->qm) || 669 !fn) { 670 return false; 671 } 672 /* 673 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 674 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 675 */ 676 if (a->rdahi == 13 || a->rdahi == 15) { 677 return false; 678 } 679 if (!mve_eci_check(s) || !vfp_access_check(s)) { 680 return true; 681 } 682 683 qn = mve_qreg_ptr(a->qn); 684 qm = mve_qreg_ptr(a->qm); 685 686 /* 687 * This insn is subject to beat-wise execution. Partial execution 688 * of an A=0 (no-accumulate) insn which does not execute the first 689 * beat must start with the current rda value, not 0. 690 */ 691 if (a->a || mve_skip_first_beat(s)) { 692 rda = tcg_temp_new_i64(); 693 rdalo = load_reg(s, a->rdalo); 694 rdahi = load_reg(s, a->rdahi); 695 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 696 tcg_temp_free_i32(rdalo); 697 tcg_temp_free_i32(rdahi); 698 } else { 699 rda = tcg_const_i64(0); 700 } 701 702 fn(rda, cpu_env, qn, qm, rda); 703 tcg_temp_free_ptr(qn); 704 tcg_temp_free_ptr(qm); 705 706 rdalo = tcg_temp_new_i32(); 707 rdahi = tcg_temp_new_i32(); 708 tcg_gen_extrl_i64_i32(rdalo, rda); 709 tcg_gen_extrh_i64_i32(rdahi, rda); 710 store_reg(s, a->rdalo, rdalo); 711 store_reg(s, a->rdahi, rdahi); 712 tcg_temp_free_i64(rda); 713 mve_update_eci(s); 714 return true; 715 } 716 717 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 718 { 719 static MVEGenLongDualAccOpFn * const fns[4][2] = { 720 { NULL, NULL }, 721 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 722 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 723 { NULL, NULL }, 724 }; 725 return do_long_dual_acc(s, a, fns[a->size][a->x]); 726 } 727 728 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 729 { 730 static MVEGenLongDualAccOpFn * const fns[4][2] = { 731 { NULL, NULL }, 732 { gen_helper_mve_vmlaldavuh, NULL }, 733 { gen_helper_mve_vmlaldavuw, NULL }, 734 { NULL, NULL }, 735 }; 736 return do_long_dual_acc(s, a, fns[a->size][a->x]); 737 } 738 739 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 740 { 741 static MVEGenLongDualAccOpFn * const fns[4][2] = { 742 { NULL, NULL }, 743 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 744 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 745 { NULL, NULL }, 746 }; 747 return do_long_dual_acc(s, a, fns[a->size][a->x]); 748 } 749 750 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 751 { 752 static MVEGenLongDualAccOpFn * const fns[] = { 753 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 754 }; 755 return do_long_dual_acc(s, a, fns[a->x]); 756 } 757 758 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 759 { 760 static MVEGenLongDualAccOpFn * const fns[] = { 761 gen_helper_mve_vrmlaldavhuw, NULL, 762 }; 763 return do_long_dual_acc(s, a, fns[a->x]); 764 } 765 766 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 767 { 768 static MVEGenLongDualAccOpFn * const fns[] = { 769 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 770 }; 771 return do_long_dual_acc(s, a, fns[a->x]); 772 } 773 774 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 775 { 776 TCGv_ptr qn, qm; 777 TCGv_i32 rda; 778 779 if (!dc_isar_feature(aa32_mve, s) || 780 !mve_check_qreg_bank(s, a->qn) || 781 !fn) { 782 return false; 783 } 784 if (!mve_eci_check(s) || !vfp_access_check(s)) { 785 return true; 786 } 787 788 qn = mve_qreg_ptr(a->qn); 789 qm = mve_qreg_ptr(a->qm); 790 791 /* 792 * This insn is subject to beat-wise execution. Partial execution 793 * of an A=0 (no-accumulate) insn which does not execute the first 794 * beat must start with the current rda value, not 0. 795 */ 796 if (a->a || mve_skip_first_beat(s)) { 797 rda = load_reg(s, a->rda); 798 } else { 799 rda = tcg_const_i32(0); 800 } 801 802 fn(rda, cpu_env, qn, qm, rda); 803 store_reg(s, a->rda, rda); 804 tcg_temp_free_ptr(qn); 805 tcg_temp_free_ptr(qm); 806 807 mve_update_eci(s); 808 return true; 809 } 810 811 #define DO_DUAL_ACC(INSN, FN) \ 812 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 813 { \ 814 static MVEGenDualAccOpFn * const fns[4][2] = { \ 815 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 816 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 817 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 818 { NULL, NULL }, \ 819 }; \ 820 return do_dual_acc(s, a, fns[a->size][a->x]); \ 821 } 822 823 DO_DUAL_ACC(VMLADAV_S, vmladavs) 824 DO_DUAL_ACC(VMLSDAV, vmlsdav) 825 826 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 827 { 828 static MVEGenDualAccOpFn * const fns[4][2] = { 829 { gen_helper_mve_vmladavub, NULL }, 830 { gen_helper_mve_vmladavuh, NULL }, 831 { gen_helper_mve_vmladavuw, NULL }, 832 { NULL, NULL }, 833 }; 834 return do_dual_acc(s, a, fns[a->size][a->x]); 835 } 836 837 static void gen_vpst(DisasContext *s, uint32_t mask) 838 { 839 /* 840 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 841 * being adjacent fields in the register. 842 * 843 * Updating the masks is not predicated, but it is subject to beat-wise 844 * execution, and the mask is updated on the odd-numbered beats. 845 * So if PSR.ECI says we should skip beat 1, we mustn't update the 846 * 01 mask field. 847 */ 848 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 849 switch (s->eci) { 850 case ECI_NONE: 851 case ECI_A0: 852 /* Update both 01 and 23 fields */ 853 tcg_gen_deposit_i32(vpr, vpr, 854 tcg_constant_i32(mask | (mask << 4)), 855 R_V7M_VPR_MASK01_SHIFT, 856 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 857 break; 858 case ECI_A0A1: 859 case ECI_A0A1A2: 860 case ECI_A0A1A2B0: 861 /* Update only the 23 mask field */ 862 tcg_gen_deposit_i32(vpr, vpr, 863 tcg_constant_i32(mask), 864 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 865 break; 866 default: 867 g_assert_not_reached(); 868 } 869 store_cpu_field(vpr, v7m.vpr); 870 } 871 872 static bool trans_VPST(DisasContext *s, arg_VPST *a) 873 { 874 /* mask == 0 is a "related encoding" */ 875 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 876 return false; 877 } 878 if (!mve_eci_check(s) || !vfp_access_check(s)) { 879 return true; 880 } 881 gen_vpst(s, a->mask); 882 mve_update_and_store_eci(s); 883 return true; 884 } 885 886 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 887 { 888 /* VADDV: vector add across vector */ 889 static MVEGenVADDVFn * const fns[4][2] = { 890 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 891 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 892 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 893 { NULL, NULL } 894 }; 895 TCGv_ptr qm; 896 TCGv_i32 rda; 897 898 if (!dc_isar_feature(aa32_mve, s) || 899 a->size == 3) { 900 return false; 901 } 902 if (!mve_eci_check(s) || !vfp_access_check(s)) { 903 return true; 904 } 905 906 /* 907 * This insn is subject to beat-wise execution. Partial execution 908 * of an A=0 (no-accumulate) insn which does not execute the first 909 * beat must start with the current value of Rda, not zero. 910 */ 911 if (a->a || mve_skip_first_beat(s)) { 912 /* Accumulate input from Rda */ 913 rda = load_reg(s, a->rda); 914 } else { 915 /* Accumulate starting at zero */ 916 rda = tcg_const_i32(0); 917 } 918 919 qm = mve_qreg_ptr(a->qm); 920 fns[a->size][a->u](rda, cpu_env, qm, rda); 921 store_reg(s, a->rda, rda); 922 tcg_temp_free_ptr(qm); 923 924 mve_update_eci(s); 925 return true; 926 } 927 928 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 929 { 930 /* 931 * Vector Add Long Across Vector: accumulate the 32-bit 932 * elements of the vector into a 64-bit result stored in 933 * a pair of general-purpose registers. 934 * No need to check Qm's bank: it is only 3 bits in decode. 935 */ 936 TCGv_ptr qm; 937 TCGv_i64 rda; 938 TCGv_i32 rdalo, rdahi; 939 940 if (!dc_isar_feature(aa32_mve, s)) { 941 return false; 942 } 943 /* 944 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 945 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 946 */ 947 if (a->rdahi == 13 || a->rdahi == 15) { 948 return false; 949 } 950 if (!mve_eci_check(s) || !vfp_access_check(s)) { 951 return true; 952 } 953 954 /* 955 * This insn is subject to beat-wise execution. Partial execution 956 * of an A=0 (no-accumulate) insn which does not execute the first 957 * beat must start with the current value of RdaHi:RdaLo, not zero. 958 */ 959 if (a->a || mve_skip_first_beat(s)) { 960 /* Accumulate input from RdaHi:RdaLo */ 961 rda = tcg_temp_new_i64(); 962 rdalo = load_reg(s, a->rdalo); 963 rdahi = load_reg(s, a->rdahi); 964 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 965 tcg_temp_free_i32(rdalo); 966 tcg_temp_free_i32(rdahi); 967 } else { 968 /* Accumulate starting at zero */ 969 rda = tcg_const_i64(0); 970 } 971 972 qm = mve_qreg_ptr(a->qm); 973 if (a->u) { 974 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 975 } else { 976 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 977 } 978 tcg_temp_free_ptr(qm); 979 980 rdalo = tcg_temp_new_i32(); 981 rdahi = tcg_temp_new_i32(); 982 tcg_gen_extrl_i64_i32(rdalo, rda); 983 tcg_gen_extrh_i64_i32(rdahi, rda); 984 store_reg(s, a->rdalo, rdalo); 985 store_reg(s, a->rdahi, rdahi); 986 tcg_temp_free_i64(rda); 987 mve_update_eci(s); 988 return true; 989 } 990 991 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 992 { 993 TCGv_ptr qd; 994 uint64_t imm; 995 996 if (!dc_isar_feature(aa32_mve, s) || 997 !mve_check_qreg_bank(s, a->qd) || 998 !fn) { 999 return false; 1000 } 1001 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1002 return true; 1003 } 1004 1005 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1006 1007 qd = mve_qreg_ptr(a->qd); 1008 fn(cpu_env, qd, tcg_constant_i64(imm)); 1009 tcg_temp_free_ptr(qd); 1010 mve_update_eci(s); 1011 return true; 1012 } 1013 1014 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1015 { 1016 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1017 MVEGenOneOpImmFn *fn; 1018 1019 if ((a->cmode & 1) && a->cmode < 12) { 1020 if (a->op) { 1021 /* 1022 * For op=1, the immediate will be inverted by asimd_imm_const(), 1023 * so the VBIC becomes a logical AND operation. 1024 */ 1025 fn = gen_helper_mve_vandi; 1026 } else { 1027 fn = gen_helper_mve_vorri; 1028 } 1029 } else { 1030 /* There is one unallocated cmode/op combination in this space */ 1031 if (a->cmode == 15 && a->op == 1) { 1032 return false; 1033 } 1034 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1035 fn = gen_helper_mve_vmovi; 1036 } 1037 return do_1imm(s, a, fn); 1038 } 1039 1040 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1041 bool negateshift) 1042 { 1043 TCGv_ptr qd, qm; 1044 int shift = a->shift; 1045 1046 if (!dc_isar_feature(aa32_mve, s) || 1047 !mve_check_qreg_bank(s, a->qd | a->qm) || 1048 !fn) { 1049 return false; 1050 } 1051 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1052 return true; 1053 } 1054 1055 /* 1056 * When we handle a right shift insn using a left-shift helper 1057 * which permits a negative shift count to indicate a right-shift, 1058 * we must negate the shift count. 1059 */ 1060 if (negateshift) { 1061 shift = -shift; 1062 } 1063 1064 qd = mve_qreg_ptr(a->qd); 1065 qm = mve_qreg_ptr(a->qm); 1066 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1067 tcg_temp_free_ptr(qd); 1068 tcg_temp_free_ptr(qm); 1069 mve_update_eci(s); 1070 return true; 1071 } 1072 1073 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1074 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1075 { \ 1076 static MVEGenTwoOpShiftFn * const fns[] = { \ 1077 gen_helper_mve_##FN##b, \ 1078 gen_helper_mve_##FN##h, \ 1079 gen_helper_mve_##FN##w, \ 1080 NULL, \ 1081 }; \ 1082 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1083 } 1084 1085 DO_2SHIFT(VSHLI, vshli_u, false) 1086 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1087 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1088 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1089 /* These right shifts use a left-shift helper with negated shift count */ 1090 DO_2SHIFT(VSHRI_S, vshli_s, true) 1091 DO_2SHIFT(VSHRI_U, vshli_u, true) 1092 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1093 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1094 1095 DO_2SHIFT(VSRI, vsri, false) 1096 DO_2SHIFT(VSLI, vsli, false) 1097 1098 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1099 MVEGenTwoOpShiftFn *fn) 1100 { 1101 TCGv_ptr qda; 1102 TCGv_i32 rm; 1103 1104 if (!dc_isar_feature(aa32_mve, s) || 1105 !mve_check_qreg_bank(s, a->qda) || 1106 a->rm == 13 || a->rm == 15 || !fn) { 1107 /* Rm cases are UNPREDICTABLE */ 1108 return false; 1109 } 1110 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1111 return true; 1112 } 1113 1114 qda = mve_qreg_ptr(a->qda); 1115 rm = load_reg(s, a->rm); 1116 fn(cpu_env, qda, qda, rm); 1117 tcg_temp_free_ptr(qda); 1118 tcg_temp_free_i32(rm); 1119 mve_update_eci(s); 1120 return true; 1121 } 1122 1123 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1124 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1125 { \ 1126 static MVEGenTwoOpShiftFn * const fns[] = { \ 1127 gen_helper_mve_##FN##b, \ 1128 gen_helper_mve_##FN##h, \ 1129 gen_helper_mve_##FN##w, \ 1130 NULL, \ 1131 }; \ 1132 return do_2shift_scalar(s, a, fns[a->size]); \ 1133 } 1134 1135 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1136 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1137 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1138 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1139 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1140 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1141 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1142 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1143 1144 #define DO_VSHLL(INSN, FN) \ 1145 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1146 { \ 1147 static MVEGenTwoOpShiftFn * const fns[] = { \ 1148 gen_helper_mve_##FN##b, \ 1149 gen_helper_mve_##FN##h, \ 1150 }; \ 1151 return do_2shift(s, a, fns[a->size], false); \ 1152 } 1153 1154 DO_VSHLL(VSHLL_BS, vshllbs) 1155 DO_VSHLL(VSHLL_BU, vshllbu) 1156 DO_VSHLL(VSHLL_TS, vshllts) 1157 DO_VSHLL(VSHLL_TU, vshlltu) 1158 1159 #define DO_2SHIFT_N(INSN, FN) \ 1160 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1161 { \ 1162 static MVEGenTwoOpShiftFn * const fns[] = { \ 1163 gen_helper_mve_##FN##b, \ 1164 gen_helper_mve_##FN##h, \ 1165 }; \ 1166 return do_2shift(s, a, fns[a->size], false); \ 1167 } 1168 1169 DO_2SHIFT_N(VSHRNB, vshrnb) 1170 DO_2SHIFT_N(VSHRNT, vshrnt) 1171 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1172 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1173 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1174 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1175 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1176 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1177 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1178 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1179 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1180 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1181 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1182 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1183 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1184 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1185 1186 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1187 { 1188 /* 1189 * Whole Vector Left Shift with Carry. The carry is taken 1190 * from a general purpose register and written back there. 1191 * An imm of 0 means "shift by 32". 1192 */ 1193 TCGv_ptr qd; 1194 TCGv_i32 rdm; 1195 1196 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1197 return false; 1198 } 1199 if (a->rdm == 13 || a->rdm == 15) { 1200 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1201 return false; 1202 } 1203 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1204 return true; 1205 } 1206 1207 qd = mve_qreg_ptr(a->qd); 1208 rdm = load_reg(s, a->rdm); 1209 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1210 store_reg(s, a->rdm, rdm); 1211 tcg_temp_free_ptr(qd); 1212 mve_update_eci(s); 1213 return true; 1214 } 1215 1216 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1217 { 1218 TCGv_ptr qd; 1219 TCGv_i32 rn; 1220 1221 /* 1222 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1223 * This fills the vector with elements of successively increasing 1224 * or decreasing values, starting from Rn. 1225 */ 1226 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1227 return false; 1228 } 1229 if (a->size == MO_64) { 1230 /* size 0b11 is another encoding */ 1231 return false; 1232 } 1233 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1234 return true; 1235 } 1236 1237 qd = mve_qreg_ptr(a->qd); 1238 rn = load_reg(s, a->rn); 1239 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1240 store_reg(s, a->rn, rn); 1241 tcg_temp_free_ptr(qd); 1242 mve_update_eci(s); 1243 return true; 1244 } 1245 1246 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1247 { 1248 TCGv_ptr qd; 1249 TCGv_i32 rn, rm; 1250 1251 /* 1252 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1253 * This fills the vector with elements of successively increasing 1254 * or decreasing values, starting from Rn. Rm specifies a point where 1255 * the count wraps back around to 0. The updated offset is written back 1256 * to Rn. 1257 */ 1258 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1259 return false; 1260 } 1261 if (!fn || a->rm == 13 || a->rm == 15) { 1262 /* 1263 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1264 * Rm == 13 is VIWDUP, VDWDUP. 1265 */ 1266 return false; 1267 } 1268 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1269 return true; 1270 } 1271 1272 qd = mve_qreg_ptr(a->qd); 1273 rn = load_reg(s, a->rn); 1274 rm = load_reg(s, a->rm); 1275 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1276 store_reg(s, a->rn, rn); 1277 tcg_temp_free_ptr(qd); 1278 tcg_temp_free_i32(rm); 1279 mve_update_eci(s); 1280 return true; 1281 } 1282 1283 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1284 { 1285 static MVEGenVIDUPFn * const fns[] = { 1286 gen_helper_mve_vidupb, 1287 gen_helper_mve_viduph, 1288 gen_helper_mve_vidupw, 1289 NULL, 1290 }; 1291 return do_vidup(s, a, fns[a->size]); 1292 } 1293 1294 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1295 { 1296 static MVEGenVIDUPFn * const fns[] = { 1297 gen_helper_mve_vidupb, 1298 gen_helper_mve_viduph, 1299 gen_helper_mve_vidupw, 1300 NULL, 1301 }; 1302 /* VDDUP is just like VIDUP but with a negative immediate */ 1303 a->imm = -a->imm; 1304 return do_vidup(s, a, fns[a->size]); 1305 } 1306 1307 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1308 { 1309 static MVEGenVIWDUPFn * const fns[] = { 1310 gen_helper_mve_viwdupb, 1311 gen_helper_mve_viwduph, 1312 gen_helper_mve_viwdupw, 1313 NULL, 1314 }; 1315 return do_viwdup(s, a, fns[a->size]); 1316 } 1317 1318 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1319 { 1320 static MVEGenVIWDUPFn * const fns[] = { 1321 gen_helper_mve_vdwdupb, 1322 gen_helper_mve_vdwduph, 1323 gen_helper_mve_vdwdupw, 1324 NULL, 1325 }; 1326 return do_viwdup(s, a, fns[a->size]); 1327 } 1328 1329 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1330 { 1331 TCGv_ptr qn, qm; 1332 1333 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1334 !fn) { 1335 return false; 1336 } 1337 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1338 return true; 1339 } 1340 1341 qn = mve_qreg_ptr(a->qn); 1342 qm = mve_qreg_ptr(a->qm); 1343 fn(cpu_env, qn, qm); 1344 tcg_temp_free_ptr(qn); 1345 tcg_temp_free_ptr(qm); 1346 if (a->mask) { 1347 /* VPT */ 1348 gen_vpst(s, a->mask); 1349 } 1350 mve_update_eci(s); 1351 return true; 1352 } 1353 1354 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1355 MVEGenScalarCmpFn *fn) 1356 { 1357 TCGv_ptr qn; 1358 TCGv_i32 rm; 1359 1360 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1361 return false; 1362 } 1363 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1364 return true; 1365 } 1366 1367 qn = mve_qreg_ptr(a->qn); 1368 if (a->rm == 15) { 1369 /* Encoding Rm=0b1111 means "constant zero" */ 1370 rm = tcg_constant_i32(0); 1371 } else { 1372 rm = load_reg(s, a->rm); 1373 } 1374 fn(cpu_env, qn, rm); 1375 tcg_temp_free_ptr(qn); 1376 tcg_temp_free_i32(rm); 1377 if (a->mask) { 1378 /* VPT */ 1379 gen_vpst(s, a->mask); 1380 } 1381 mve_update_eci(s); 1382 return true; 1383 } 1384 1385 #define DO_VCMP(INSN, FN) \ 1386 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1387 { \ 1388 static MVEGenCmpFn * const fns[] = { \ 1389 gen_helper_mve_##FN##b, \ 1390 gen_helper_mve_##FN##h, \ 1391 gen_helper_mve_##FN##w, \ 1392 NULL, \ 1393 }; \ 1394 return do_vcmp(s, a, fns[a->size]); \ 1395 } \ 1396 static bool trans_##INSN##_scalar(DisasContext *s, \ 1397 arg_vcmp_scalar *a) \ 1398 { \ 1399 static MVEGenScalarCmpFn * const fns[] = { \ 1400 gen_helper_mve_##FN##_scalarb, \ 1401 gen_helper_mve_##FN##_scalarh, \ 1402 gen_helper_mve_##FN##_scalarw, \ 1403 NULL, \ 1404 }; \ 1405 return do_vcmp_scalar(s, a, fns[a->size]); \ 1406 } 1407 1408 DO_VCMP(VCMPEQ, vcmpeq) 1409 DO_VCMP(VCMPNE, vcmpne) 1410 DO_VCMP(VCMPCS, vcmpcs) 1411 DO_VCMP(VCMPHI, vcmphi) 1412 DO_VCMP(VCMPGE, vcmpge) 1413 DO_VCMP(VCMPLT, vcmplt) 1414 DO_VCMP(VCMPGT, vcmpgt) 1415 DO_VCMP(VCMPLE, vcmple) 1416 1417 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1418 { 1419 /* 1420 * MIN/MAX operations across a vector: compute the min or 1421 * max of the initial value in a general purpose register 1422 * and all the elements in the vector, and store it back 1423 * into the general purpose register. 1424 */ 1425 TCGv_ptr qm; 1426 TCGv_i32 rda; 1427 1428 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1429 !fn || a->rda == 13 || a->rda == 15) { 1430 /* Rda cases are UNPREDICTABLE */ 1431 return false; 1432 } 1433 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1434 return true; 1435 } 1436 1437 qm = mve_qreg_ptr(a->qm); 1438 rda = load_reg(s, a->rda); 1439 fn(rda, cpu_env, qm, rda); 1440 store_reg(s, a->rda, rda); 1441 tcg_temp_free_ptr(qm); 1442 mve_update_eci(s); 1443 return true; 1444 } 1445 1446 #define DO_VMAXV(INSN, FN) \ 1447 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1448 { \ 1449 static MVEGenVADDVFn * const fns[] = { \ 1450 gen_helper_mve_##FN##b, \ 1451 gen_helper_mve_##FN##h, \ 1452 gen_helper_mve_##FN##w, \ 1453 NULL, \ 1454 }; \ 1455 return do_vmaxv(s, a, fns[a->size]); \ 1456 } 1457 1458 DO_VMAXV(VMAXV_S, vmaxvs) 1459 DO_VMAXV(VMAXV_U, vmaxvu) 1460 DO_VMAXV(VMAXAV, vmaxav) 1461 DO_VMAXV(VMINV_S, vminvs) 1462 DO_VMAXV(VMINV_U, vminvu) 1463 DO_VMAXV(VMINAV, vminav) 1464 1465 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1466 { 1467 /* Absolute difference accumulated across vector */ 1468 TCGv_ptr qn, qm; 1469 TCGv_i32 rda; 1470 1471 if (!dc_isar_feature(aa32_mve, s) || 1472 !mve_check_qreg_bank(s, a->qm | a->qn) || 1473 !fn || a->rda == 13 || a->rda == 15) { 1474 /* Rda cases are UNPREDICTABLE */ 1475 return false; 1476 } 1477 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1478 return true; 1479 } 1480 1481 qm = mve_qreg_ptr(a->qm); 1482 qn = mve_qreg_ptr(a->qn); 1483 rda = load_reg(s, a->rda); 1484 fn(rda, cpu_env, qn, qm, rda); 1485 store_reg(s, a->rda, rda); 1486 tcg_temp_free_ptr(qm); 1487 tcg_temp_free_ptr(qn); 1488 mve_update_eci(s); 1489 return true; 1490 } 1491 1492 #define DO_VABAV(INSN, FN) \ 1493 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1494 { \ 1495 static MVEGenVABAVFn * const fns[] = { \ 1496 gen_helper_mve_##FN##b, \ 1497 gen_helper_mve_##FN##h, \ 1498 gen_helper_mve_##FN##w, \ 1499 NULL, \ 1500 }; \ 1501 return do_vabav(s, a, fns[a->size]); \ 1502 } 1503 1504 DO_VABAV(VABAV_S, vabavs) 1505 DO_VABAV(VABAV_U, vabavu) 1506