1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 52 static inline long mve_qreg_offset(unsigned reg) 53 { 54 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 55 } 56 57 static TCGv_ptr mve_qreg_ptr(unsigned reg) 58 { 59 TCGv_ptr ret = tcg_temp_new_ptr(); 60 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 61 return ret; 62 } 63 64 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 65 { 66 /* 67 * Check whether Qregs are in range. For v8.1M only Q0..Q7 68 * are supported, see VFPSmallRegisterBank(). 69 */ 70 return qmask < 8; 71 } 72 73 bool mve_eci_check(DisasContext *s) 74 { 75 /* 76 * This is a beatwise insn: check that ECI is valid (not a 77 * reserved value) and note that we are handling it. 78 * Return true if OK, false if we generated an exception. 79 */ 80 s->eci_handled = true; 81 switch (s->eci) { 82 case ECI_NONE: 83 case ECI_A0: 84 case ECI_A0A1: 85 case ECI_A0A1A2: 86 case ECI_A0A1A2B0: 87 return true; 88 default: 89 /* Reserved value: INVSTATE UsageFault */ 90 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 91 default_exception_el(s)); 92 return false; 93 } 94 } 95 96 static void mve_update_eci(DisasContext *s) 97 { 98 /* 99 * The helper function will always update the CPUState field, 100 * so we only need to update the DisasContext field. 101 */ 102 if (s->eci) { 103 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 104 } 105 } 106 107 void mve_update_and_store_eci(DisasContext *s) 108 { 109 /* 110 * For insns which don't call a helper function that will call 111 * mve_advance_vpt(), this version updates s->eci and also stores 112 * it out to the CPUState field. 113 */ 114 if (s->eci) { 115 mve_update_eci(s); 116 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 117 } 118 } 119 120 static bool mve_skip_first_beat(DisasContext *s) 121 { 122 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 123 switch (s->eci) { 124 case ECI_NONE: 125 return false; 126 case ECI_A0: 127 case ECI_A0A1: 128 case ECI_A0A1A2: 129 case ECI_A0A1A2B0: 130 return true; 131 default: 132 g_assert_not_reached(); 133 } 134 } 135 136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 137 unsigned msize) 138 { 139 TCGv_i32 addr; 140 uint32_t offset; 141 TCGv_ptr qreg; 142 143 if (!dc_isar_feature(aa32_mve, s) || 144 !mve_check_qreg_bank(s, a->qd) || 145 !fn) { 146 return false; 147 } 148 149 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 150 if (a->rn == 15 || (a->rn == 13 && a->w)) { 151 return false; 152 } 153 154 if (!mve_eci_check(s) || !vfp_access_check(s)) { 155 return true; 156 } 157 158 offset = a->imm << msize; 159 if (!a->a) { 160 offset = -offset; 161 } 162 addr = load_reg(s, a->rn); 163 if (a->p) { 164 tcg_gen_addi_i32(addr, addr, offset); 165 } 166 167 qreg = mve_qreg_ptr(a->qd); 168 fn(cpu_env, qreg, addr); 169 tcg_temp_free_ptr(qreg); 170 171 /* 172 * Writeback always happens after the last beat of the insn, 173 * regardless of predication 174 */ 175 if (a->w) { 176 if (!a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 store_reg(s, a->rn, addr); 180 } else { 181 tcg_temp_free_i32(addr); 182 } 183 mve_update_eci(s); 184 return true; 185 } 186 187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 188 { 189 static MVEGenLdStFn * const ldstfns[4][2] = { 190 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 191 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 192 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 193 { NULL, NULL } 194 }; 195 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 196 } 197 198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 199 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 200 { \ 201 static MVEGenLdStFn * const ldstfns[2][2] = { \ 202 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 203 { NULL, gen_helper_mve_##ULD }, \ 204 }; \ 205 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 206 } 207 208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 211 212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 213 { 214 TCGv_ptr qd; 215 TCGv_i32 rt; 216 217 if (!dc_isar_feature(aa32_mve, s) || 218 !mve_check_qreg_bank(s, a->qd)) { 219 return false; 220 } 221 if (a->rt == 13 || a->rt == 15) { 222 /* UNPREDICTABLE; we choose to UNDEF */ 223 return false; 224 } 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 qd = mve_qreg_ptr(a->qd); 230 rt = load_reg(s, a->rt); 231 tcg_gen_dup_i32(a->size, rt, rt); 232 gen_helper_mve_vdup(cpu_env, qd, rt); 233 tcg_temp_free_ptr(qd); 234 tcg_temp_free_i32(rt); 235 mve_update_eci(s); 236 return true; 237 } 238 239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 240 { 241 TCGv_ptr qd, qm; 242 243 if (!dc_isar_feature(aa32_mve, s) || 244 !mve_check_qreg_bank(s, a->qd | a->qm) || 245 !fn) { 246 return false; 247 } 248 249 if (!mve_eci_check(s) || !vfp_access_check(s)) { 250 return true; 251 } 252 253 qd = mve_qreg_ptr(a->qd); 254 qm = mve_qreg_ptr(a->qm); 255 fn(cpu_env, qd, qm); 256 tcg_temp_free_ptr(qd); 257 tcg_temp_free_ptr(qm); 258 mve_update_eci(s); 259 return true; 260 } 261 262 #define DO_1OP(INSN, FN) \ 263 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 264 { \ 265 static MVEGenOneOpFn * const fns[] = { \ 266 gen_helper_mve_##FN##b, \ 267 gen_helper_mve_##FN##h, \ 268 gen_helper_mve_##FN##w, \ 269 NULL, \ 270 }; \ 271 return do_1op(s, a, fns[a->size]); \ 272 } 273 274 DO_1OP(VCLZ, vclz) 275 DO_1OP(VCLS, vcls) 276 DO_1OP(VABS, vabs) 277 DO_1OP(VNEG, vneg) 278 279 /* Narrowing moves: only size 0 and 1 are valid */ 280 #define DO_VMOVN(INSN, FN) \ 281 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 282 { \ 283 static MVEGenOneOpFn * const fns[] = { \ 284 gen_helper_mve_##FN##b, \ 285 gen_helper_mve_##FN##h, \ 286 NULL, \ 287 NULL, \ 288 }; \ 289 return do_1op(s, a, fns[a->size]); \ 290 } 291 292 DO_VMOVN(VMOVNB, vmovnb) 293 DO_VMOVN(VMOVNT, vmovnt) 294 DO_VMOVN(VQMOVUNB, vqmovunb) 295 DO_VMOVN(VQMOVUNT, vqmovunt) 296 DO_VMOVN(VQMOVN_BS, vqmovnbs) 297 DO_VMOVN(VQMOVN_TS, vqmovnts) 298 DO_VMOVN(VQMOVN_BU, vqmovnbu) 299 DO_VMOVN(VQMOVN_TU, vqmovntu) 300 301 static bool trans_VREV16(DisasContext *s, arg_1op *a) 302 { 303 static MVEGenOneOpFn * const fns[] = { 304 gen_helper_mve_vrev16b, 305 NULL, 306 NULL, 307 NULL, 308 }; 309 return do_1op(s, a, fns[a->size]); 310 } 311 312 static bool trans_VREV32(DisasContext *s, arg_1op *a) 313 { 314 static MVEGenOneOpFn * const fns[] = { 315 gen_helper_mve_vrev32b, 316 gen_helper_mve_vrev32h, 317 NULL, 318 NULL, 319 }; 320 return do_1op(s, a, fns[a->size]); 321 } 322 323 static bool trans_VREV64(DisasContext *s, arg_1op *a) 324 { 325 static MVEGenOneOpFn * const fns[] = { 326 gen_helper_mve_vrev64b, 327 gen_helper_mve_vrev64h, 328 gen_helper_mve_vrev64w, 329 NULL, 330 }; 331 return do_1op(s, a, fns[a->size]); 332 } 333 334 static bool trans_VMVN(DisasContext *s, arg_1op *a) 335 { 336 return do_1op(s, a, gen_helper_mve_vmvn); 337 } 338 339 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 340 { 341 static MVEGenOneOpFn * const fns[] = { 342 NULL, 343 gen_helper_mve_vfabsh, 344 gen_helper_mve_vfabss, 345 NULL, 346 }; 347 if (!dc_isar_feature(aa32_mve_fp, s)) { 348 return false; 349 } 350 return do_1op(s, a, fns[a->size]); 351 } 352 353 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 354 { 355 static MVEGenOneOpFn * const fns[] = { 356 NULL, 357 gen_helper_mve_vfnegh, 358 gen_helper_mve_vfnegs, 359 NULL, 360 }; 361 if (!dc_isar_feature(aa32_mve_fp, s)) { 362 return false; 363 } 364 return do_1op(s, a, fns[a->size]); 365 } 366 367 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 368 { 369 TCGv_ptr qd, qn, qm; 370 371 if (!dc_isar_feature(aa32_mve, s) || 372 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 373 !fn) { 374 return false; 375 } 376 if (!mve_eci_check(s) || !vfp_access_check(s)) { 377 return true; 378 } 379 380 qd = mve_qreg_ptr(a->qd); 381 qn = mve_qreg_ptr(a->qn); 382 qm = mve_qreg_ptr(a->qm); 383 fn(cpu_env, qd, qn, qm); 384 tcg_temp_free_ptr(qd); 385 tcg_temp_free_ptr(qn); 386 tcg_temp_free_ptr(qm); 387 mve_update_eci(s); 388 return true; 389 } 390 391 #define DO_LOGIC(INSN, HELPER) \ 392 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 393 { \ 394 return do_2op(s, a, HELPER); \ 395 } 396 397 DO_LOGIC(VAND, gen_helper_mve_vand) 398 DO_LOGIC(VBIC, gen_helper_mve_vbic) 399 DO_LOGIC(VORR, gen_helper_mve_vorr) 400 DO_LOGIC(VORN, gen_helper_mve_vorn) 401 DO_LOGIC(VEOR, gen_helper_mve_veor) 402 403 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 404 405 #define DO_2OP(INSN, FN) \ 406 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 407 { \ 408 static MVEGenTwoOpFn * const fns[] = { \ 409 gen_helper_mve_##FN##b, \ 410 gen_helper_mve_##FN##h, \ 411 gen_helper_mve_##FN##w, \ 412 NULL, \ 413 }; \ 414 return do_2op(s, a, fns[a->size]); \ 415 } 416 417 DO_2OP(VADD, vadd) 418 DO_2OP(VSUB, vsub) 419 DO_2OP(VMUL, vmul) 420 DO_2OP(VMULH_S, vmulhs) 421 DO_2OP(VMULH_U, vmulhu) 422 DO_2OP(VRMULH_S, vrmulhs) 423 DO_2OP(VRMULH_U, vrmulhu) 424 DO_2OP(VMAX_S, vmaxs) 425 DO_2OP(VMAX_U, vmaxu) 426 DO_2OP(VMIN_S, vmins) 427 DO_2OP(VMIN_U, vminu) 428 DO_2OP(VABD_S, vabds) 429 DO_2OP(VABD_U, vabdu) 430 DO_2OP(VHADD_S, vhadds) 431 DO_2OP(VHADD_U, vhaddu) 432 DO_2OP(VHSUB_S, vhsubs) 433 DO_2OP(VHSUB_U, vhsubu) 434 DO_2OP(VMULL_BS, vmullbs) 435 DO_2OP(VMULL_BU, vmullbu) 436 DO_2OP(VMULL_TS, vmullts) 437 DO_2OP(VMULL_TU, vmulltu) 438 DO_2OP(VQDMULH, vqdmulh) 439 DO_2OP(VQRDMULH, vqrdmulh) 440 DO_2OP(VQADD_S, vqadds) 441 DO_2OP(VQADD_U, vqaddu) 442 DO_2OP(VQSUB_S, vqsubs) 443 DO_2OP(VQSUB_U, vqsubu) 444 DO_2OP(VSHL_S, vshls) 445 DO_2OP(VSHL_U, vshlu) 446 DO_2OP(VRSHL_S, vrshls) 447 DO_2OP(VRSHL_U, vrshlu) 448 DO_2OP(VQSHL_S, vqshls) 449 DO_2OP(VQSHL_U, vqshlu) 450 DO_2OP(VQRSHL_S, vqrshls) 451 DO_2OP(VQRSHL_U, vqrshlu) 452 DO_2OP(VQDMLADH, vqdmladh) 453 DO_2OP(VQDMLADHX, vqdmladhx) 454 DO_2OP(VQRDMLADH, vqrdmladh) 455 DO_2OP(VQRDMLADHX, vqrdmladhx) 456 DO_2OP(VQDMLSDH, vqdmlsdh) 457 DO_2OP(VQDMLSDHX, vqdmlsdhx) 458 DO_2OP(VQRDMLSDH, vqrdmlsdh) 459 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 460 DO_2OP(VRHADD_S, vrhadds) 461 DO_2OP(VRHADD_U, vrhaddu) 462 /* 463 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 464 * so we can reuse the DO_2OP macro. (Our implementation calculates the 465 * "expected" results in this case.) Similarly for VHCADD. 466 */ 467 DO_2OP(VCADD90, vcadd90) 468 DO_2OP(VCADD270, vcadd270) 469 DO_2OP(VHCADD90, vhcadd90) 470 DO_2OP(VHCADD270, vhcadd270) 471 472 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 473 { 474 static MVEGenTwoOpFn * const fns[] = { 475 NULL, 476 gen_helper_mve_vqdmullbh, 477 gen_helper_mve_vqdmullbw, 478 NULL, 479 }; 480 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 481 /* UNPREDICTABLE; we choose to undef */ 482 return false; 483 } 484 return do_2op(s, a, fns[a->size]); 485 } 486 487 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 488 { 489 static MVEGenTwoOpFn * const fns[] = { 490 NULL, 491 gen_helper_mve_vqdmullth, 492 gen_helper_mve_vqdmulltw, 493 NULL, 494 }; 495 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 496 /* UNPREDICTABLE; we choose to undef */ 497 return false; 498 } 499 return do_2op(s, a, fns[a->size]); 500 } 501 502 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 503 { 504 /* 505 * Note that a->size indicates the output size, ie VMULL.P8 506 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 507 * is the 16x16->32 operation and a->size is MO_32. 508 */ 509 static MVEGenTwoOpFn * const fns[] = { 510 NULL, 511 gen_helper_mve_vmullpbh, 512 gen_helper_mve_vmullpbw, 513 NULL, 514 }; 515 return do_2op(s, a, fns[a->size]); 516 } 517 518 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 519 { 520 /* a->size is as for trans_VMULLP_B */ 521 static MVEGenTwoOpFn * const fns[] = { 522 NULL, 523 gen_helper_mve_vmullpth, 524 gen_helper_mve_vmullptw, 525 NULL, 526 }; 527 return do_2op(s, a, fns[a->size]); 528 } 529 530 /* 531 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 532 * of the 32-bit elements in each lane of the input vectors, where the 533 * carry-out of each add is the carry-in of the next. The initial carry 534 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 535 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 536 * These insns are subject to beat-wise execution. Partial execution 537 * of an I=1 (initial carry input fixed) insn which does not 538 * execute the first beat must start with the current FPSCR.NZCV 539 * value, not the fixed constant input. 540 */ 541 static bool trans_VADC(DisasContext *s, arg_2op *a) 542 { 543 return do_2op(s, a, gen_helper_mve_vadc); 544 } 545 546 static bool trans_VADCI(DisasContext *s, arg_2op *a) 547 { 548 if (mve_skip_first_beat(s)) { 549 return trans_VADC(s, a); 550 } 551 return do_2op(s, a, gen_helper_mve_vadci); 552 } 553 554 static bool trans_VSBC(DisasContext *s, arg_2op *a) 555 { 556 return do_2op(s, a, gen_helper_mve_vsbc); 557 } 558 559 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 560 { 561 if (mve_skip_first_beat(s)) { 562 return trans_VSBC(s, a); 563 } 564 return do_2op(s, a, gen_helper_mve_vsbci); 565 } 566 567 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 568 MVEGenTwoOpScalarFn fn) 569 { 570 TCGv_ptr qd, qn; 571 TCGv_i32 rm; 572 573 if (!dc_isar_feature(aa32_mve, s) || 574 !mve_check_qreg_bank(s, a->qd | a->qn) || 575 !fn) { 576 return false; 577 } 578 if (a->rm == 13 || a->rm == 15) { 579 /* UNPREDICTABLE */ 580 return false; 581 } 582 if (!mve_eci_check(s) || !vfp_access_check(s)) { 583 return true; 584 } 585 586 qd = mve_qreg_ptr(a->qd); 587 qn = mve_qreg_ptr(a->qn); 588 rm = load_reg(s, a->rm); 589 fn(cpu_env, qd, qn, rm); 590 tcg_temp_free_i32(rm); 591 tcg_temp_free_ptr(qd); 592 tcg_temp_free_ptr(qn); 593 mve_update_eci(s); 594 return true; 595 } 596 597 #define DO_2OP_SCALAR(INSN, FN) \ 598 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 599 { \ 600 static MVEGenTwoOpScalarFn * const fns[] = { \ 601 gen_helper_mve_##FN##b, \ 602 gen_helper_mve_##FN##h, \ 603 gen_helper_mve_##FN##w, \ 604 NULL, \ 605 }; \ 606 return do_2op_scalar(s, a, fns[a->size]); \ 607 } 608 609 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 610 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 611 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 612 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 613 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 614 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 615 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 616 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 617 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 618 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 619 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 620 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 621 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 622 DO_2OP_SCALAR(VBRSR, vbrsr) 623 DO_2OP_SCALAR(VMLA, vmla) 624 DO_2OP_SCALAR(VMLAS, vmlas) 625 626 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 627 { 628 static MVEGenTwoOpScalarFn * const fns[] = { 629 NULL, 630 gen_helper_mve_vqdmullb_scalarh, 631 gen_helper_mve_vqdmullb_scalarw, 632 NULL, 633 }; 634 if (a->qd == a->qn && a->size == MO_32) { 635 /* UNPREDICTABLE; we choose to undef */ 636 return false; 637 } 638 return do_2op_scalar(s, a, fns[a->size]); 639 } 640 641 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 642 { 643 static MVEGenTwoOpScalarFn * const fns[] = { 644 NULL, 645 gen_helper_mve_vqdmullt_scalarh, 646 gen_helper_mve_vqdmullt_scalarw, 647 NULL, 648 }; 649 if (a->qd == a->qn && a->size == MO_32) { 650 /* UNPREDICTABLE; we choose to undef */ 651 return false; 652 } 653 return do_2op_scalar(s, a, fns[a->size]); 654 } 655 656 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 657 MVEGenLongDualAccOpFn *fn) 658 { 659 TCGv_ptr qn, qm; 660 TCGv_i64 rda; 661 TCGv_i32 rdalo, rdahi; 662 663 if (!dc_isar_feature(aa32_mve, s) || 664 !mve_check_qreg_bank(s, a->qn | a->qm) || 665 !fn) { 666 return false; 667 } 668 /* 669 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 670 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 671 */ 672 if (a->rdahi == 13 || a->rdahi == 15) { 673 return false; 674 } 675 if (!mve_eci_check(s) || !vfp_access_check(s)) { 676 return true; 677 } 678 679 qn = mve_qreg_ptr(a->qn); 680 qm = mve_qreg_ptr(a->qm); 681 682 /* 683 * This insn is subject to beat-wise execution. Partial execution 684 * of an A=0 (no-accumulate) insn which does not execute the first 685 * beat must start with the current rda value, not 0. 686 */ 687 if (a->a || mve_skip_first_beat(s)) { 688 rda = tcg_temp_new_i64(); 689 rdalo = load_reg(s, a->rdalo); 690 rdahi = load_reg(s, a->rdahi); 691 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 692 tcg_temp_free_i32(rdalo); 693 tcg_temp_free_i32(rdahi); 694 } else { 695 rda = tcg_const_i64(0); 696 } 697 698 fn(rda, cpu_env, qn, qm, rda); 699 tcg_temp_free_ptr(qn); 700 tcg_temp_free_ptr(qm); 701 702 rdalo = tcg_temp_new_i32(); 703 rdahi = tcg_temp_new_i32(); 704 tcg_gen_extrl_i64_i32(rdalo, rda); 705 tcg_gen_extrh_i64_i32(rdahi, rda); 706 store_reg(s, a->rdalo, rdalo); 707 store_reg(s, a->rdahi, rdahi); 708 tcg_temp_free_i64(rda); 709 mve_update_eci(s); 710 return true; 711 } 712 713 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 714 { 715 static MVEGenLongDualAccOpFn * const fns[4][2] = { 716 { NULL, NULL }, 717 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 718 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 719 { NULL, NULL }, 720 }; 721 return do_long_dual_acc(s, a, fns[a->size][a->x]); 722 } 723 724 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 725 { 726 static MVEGenLongDualAccOpFn * const fns[4][2] = { 727 { NULL, NULL }, 728 { gen_helper_mve_vmlaldavuh, NULL }, 729 { gen_helper_mve_vmlaldavuw, NULL }, 730 { NULL, NULL }, 731 }; 732 return do_long_dual_acc(s, a, fns[a->size][a->x]); 733 } 734 735 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 736 { 737 static MVEGenLongDualAccOpFn * const fns[4][2] = { 738 { NULL, NULL }, 739 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 740 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 741 { NULL, NULL }, 742 }; 743 return do_long_dual_acc(s, a, fns[a->size][a->x]); 744 } 745 746 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 747 { 748 static MVEGenLongDualAccOpFn * const fns[] = { 749 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 750 }; 751 return do_long_dual_acc(s, a, fns[a->x]); 752 } 753 754 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 755 { 756 static MVEGenLongDualAccOpFn * const fns[] = { 757 gen_helper_mve_vrmlaldavhuw, NULL, 758 }; 759 return do_long_dual_acc(s, a, fns[a->x]); 760 } 761 762 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 763 { 764 static MVEGenLongDualAccOpFn * const fns[] = { 765 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 766 }; 767 return do_long_dual_acc(s, a, fns[a->x]); 768 } 769 770 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 771 { 772 TCGv_ptr qn, qm; 773 TCGv_i32 rda; 774 775 if (!dc_isar_feature(aa32_mve, s) || 776 !mve_check_qreg_bank(s, a->qn) || 777 !fn) { 778 return false; 779 } 780 if (!mve_eci_check(s) || !vfp_access_check(s)) { 781 return true; 782 } 783 784 qn = mve_qreg_ptr(a->qn); 785 qm = mve_qreg_ptr(a->qm); 786 787 /* 788 * This insn is subject to beat-wise execution. Partial execution 789 * of an A=0 (no-accumulate) insn which does not execute the first 790 * beat must start with the current rda value, not 0. 791 */ 792 if (a->a || mve_skip_first_beat(s)) { 793 rda = load_reg(s, a->rda); 794 } else { 795 rda = tcg_const_i32(0); 796 } 797 798 fn(rda, cpu_env, qn, qm, rda); 799 store_reg(s, a->rda, rda); 800 tcg_temp_free_ptr(qn); 801 tcg_temp_free_ptr(qm); 802 803 mve_update_eci(s); 804 return true; 805 } 806 807 #define DO_DUAL_ACC(INSN, FN) \ 808 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 809 { \ 810 static MVEGenDualAccOpFn * const fns[4][2] = { \ 811 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 812 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 813 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 814 { NULL, NULL }, \ 815 }; \ 816 return do_dual_acc(s, a, fns[a->size][a->x]); \ 817 } 818 819 DO_DUAL_ACC(VMLADAV_S, vmladavs) 820 DO_DUAL_ACC(VMLSDAV, vmlsdav) 821 822 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 823 { 824 static MVEGenDualAccOpFn * const fns[4][2] = { 825 { gen_helper_mve_vmladavub, NULL }, 826 { gen_helper_mve_vmladavuh, NULL }, 827 { gen_helper_mve_vmladavuw, NULL }, 828 { NULL, NULL }, 829 }; 830 return do_dual_acc(s, a, fns[a->size][a->x]); 831 } 832 833 static void gen_vpst(DisasContext *s, uint32_t mask) 834 { 835 /* 836 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 837 * being adjacent fields in the register. 838 * 839 * Updating the masks is not predicated, but it is subject to beat-wise 840 * execution, and the mask is updated on the odd-numbered beats. 841 * So if PSR.ECI says we should skip beat 1, we mustn't update the 842 * 01 mask field. 843 */ 844 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 845 switch (s->eci) { 846 case ECI_NONE: 847 case ECI_A0: 848 /* Update both 01 and 23 fields */ 849 tcg_gen_deposit_i32(vpr, vpr, 850 tcg_constant_i32(mask | (mask << 4)), 851 R_V7M_VPR_MASK01_SHIFT, 852 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 853 break; 854 case ECI_A0A1: 855 case ECI_A0A1A2: 856 case ECI_A0A1A2B0: 857 /* Update only the 23 mask field */ 858 tcg_gen_deposit_i32(vpr, vpr, 859 tcg_constant_i32(mask), 860 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 861 break; 862 default: 863 g_assert_not_reached(); 864 } 865 store_cpu_field(vpr, v7m.vpr); 866 } 867 868 static bool trans_VPST(DisasContext *s, arg_VPST *a) 869 { 870 /* mask == 0 is a "related encoding" */ 871 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 872 return false; 873 } 874 if (!mve_eci_check(s) || !vfp_access_check(s)) { 875 return true; 876 } 877 gen_vpst(s, a->mask); 878 mve_update_and_store_eci(s); 879 return true; 880 } 881 882 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 883 { 884 /* VADDV: vector add across vector */ 885 static MVEGenVADDVFn * const fns[4][2] = { 886 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 887 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 888 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 889 { NULL, NULL } 890 }; 891 TCGv_ptr qm; 892 TCGv_i32 rda; 893 894 if (!dc_isar_feature(aa32_mve, s) || 895 a->size == 3) { 896 return false; 897 } 898 if (!mve_eci_check(s) || !vfp_access_check(s)) { 899 return true; 900 } 901 902 /* 903 * This insn is subject to beat-wise execution. Partial execution 904 * of an A=0 (no-accumulate) insn which does not execute the first 905 * beat must start with the current value of Rda, not zero. 906 */ 907 if (a->a || mve_skip_first_beat(s)) { 908 /* Accumulate input from Rda */ 909 rda = load_reg(s, a->rda); 910 } else { 911 /* Accumulate starting at zero */ 912 rda = tcg_const_i32(0); 913 } 914 915 qm = mve_qreg_ptr(a->qm); 916 fns[a->size][a->u](rda, cpu_env, qm, rda); 917 store_reg(s, a->rda, rda); 918 tcg_temp_free_ptr(qm); 919 920 mve_update_eci(s); 921 return true; 922 } 923 924 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 925 { 926 /* 927 * Vector Add Long Across Vector: accumulate the 32-bit 928 * elements of the vector into a 64-bit result stored in 929 * a pair of general-purpose registers. 930 * No need to check Qm's bank: it is only 3 bits in decode. 931 */ 932 TCGv_ptr qm; 933 TCGv_i64 rda; 934 TCGv_i32 rdalo, rdahi; 935 936 if (!dc_isar_feature(aa32_mve, s)) { 937 return false; 938 } 939 /* 940 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 941 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 942 */ 943 if (a->rdahi == 13 || a->rdahi == 15) { 944 return false; 945 } 946 if (!mve_eci_check(s) || !vfp_access_check(s)) { 947 return true; 948 } 949 950 /* 951 * This insn is subject to beat-wise execution. Partial execution 952 * of an A=0 (no-accumulate) insn which does not execute the first 953 * beat must start with the current value of RdaHi:RdaLo, not zero. 954 */ 955 if (a->a || mve_skip_first_beat(s)) { 956 /* Accumulate input from RdaHi:RdaLo */ 957 rda = tcg_temp_new_i64(); 958 rdalo = load_reg(s, a->rdalo); 959 rdahi = load_reg(s, a->rdahi); 960 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 961 tcg_temp_free_i32(rdalo); 962 tcg_temp_free_i32(rdahi); 963 } else { 964 /* Accumulate starting at zero */ 965 rda = tcg_const_i64(0); 966 } 967 968 qm = mve_qreg_ptr(a->qm); 969 if (a->u) { 970 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 971 } else { 972 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 973 } 974 tcg_temp_free_ptr(qm); 975 976 rdalo = tcg_temp_new_i32(); 977 rdahi = tcg_temp_new_i32(); 978 tcg_gen_extrl_i64_i32(rdalo, rda); 979 tcg_gen_extrh_i64_i32(rdahi, rda); 980 store_reg(s, a->rdalo, rdalo); 981 store_reg(s, a->rdahi, rdahi); 982 tcg_temp_free_i64(rda); 983 mve_update_eci(s); 984 return true; 985 } 986 987 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 988 { 989 TCGv_ptr qd; 990 uint64_t imm; 991 992 if (!dc_isar_feature(aa32_mve, s) || 993 !mve_check_qreg_bank(s, a->qd) || 994 !fn) { 995 return false; 996 } 997 if (!mve_eci_check(s) || !vfp_access_check(s)) { 998 return true; 999 } 1000 1001 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1002 1003 qd = mve_qreg_ptr(a->qd); 1004 fn(cpu_env, qd, tcg_constant_i64(imm)); 1005 tcg_temp_free_ptr(qd); 1006 mve_update_eci(s); 1007 return true; 1008 } 1009 1010 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1011 { 1012 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1013 MVEGenOneOpImmFn *fn; 1014 1015 if ((a->cmode & 1) && a->cmode < 12) { 1016 if (a->op) { 1017 /* 1018 * For op=1, the immediate will be inverted by asimd_imm_const(), 1019 * so the VBIC becomes a logical AND operation. 1020 */ 1021 fn = gen_helper_mve_vandi; 1022 } else { 1023 fn = gen_helper_mve_vorri; 1024 } 1025 } else { 1026 /* There is one unallocated cmode/op combination in this space */ 1027 if (a->cmode == 15 && a->op == 1) { 1028 return false; 1029 } 1030 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1031 fn = gen_helper_mve_vmovi; 1032 } 1033 return do_1imm(s, a, fn); 1034 } 1035 1036 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1037 bool negateshift) 1038 { 1039 TCGv_ptr qd, qm; 1040 int shift = a->shift; 1041 1042 if (!dc_isar_feature(aa32_mve, s) || 1043 !mve_check_qreg_bank(s, a->qd | a->qm) || 1044 !fn) { 1045 return false; 1046 } 1047 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1048 return true; 1049 } 1050 1051 /* 1052 * When we handle a right shift insn using a left-shift helper 1053 * which permits a negative shift count to indicate a right-shift, 1054 * we must negate the shift count. 1055 */ 1056 if (negateshift) { 1057 shift = -shift; 1058 } 1059 1060 qd = mve_qreg_ptr(a->qd); 1061 qm = mve_qreg_ptr(a->qm); 1062 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1063 tcg_temp_free_ptr(qd); 1064 tcg_temp_free_ptr(qm); 1065 mve_update_eci(s); 1066 return true; 1067 } 1068 1069 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1070 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1071 { \ 1072 static MVEGenTwoOpShiftFn * const fns[] = { \ 1073 gen_helper_mve_##FN##b, \ 1074 gen_helper_mve_##FN##h, \ 1075 gen_helper_mve_##FN##w, \ 1076 NULL, \ 1077 }; \ 1078 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1079 } 1080 1081 DO_2SHIFT(VSHLI, vshli_u, false) 1082 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1083 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1084 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1085 /* These right shifts use a left-shift helper with negated shift count */ 1086 DO_2SHIFT(VSHRI_S, vshli_s, true) 1087 DO_2SHIFT(VSHRI_U, vshli_u, true) 1088 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1089 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1090 1091 DO_2SHIFT(VSRI, vsri, false) 1092 DO_2SHIFT(VSLI, vsli, false) 1093 1094 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1095 MVEGenTwoOpShiftFn *fn) 1096 { 1097 TCGv_ptr qda; 1098 TCGv_i32 rm; 1099 1100 if (!dc_isar_feature(aa32_mve, s) || 1101 !mve_check_qreg_bank(s, a->qda) || 1102 a->rm == 13 || a->rm == 15 || !fn) { 1103 /* Rm cases are UNPREDICTABLE */ 1104 return false; 1105 } 1106 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1107 return true; 1108 } 1109 1110 qda = mve_qreg_ptr(a->qda); 1111 rm = load_reg(s, a->rm); 1112 fn(cpu_env, qda, qda, rm); 1113 tcg_temp_free_ptr(qda); 1114 tcg_temp_free_i32(rm); 1115 mve_update_eci(s); 1116 return true; 1117 } 1118 1119 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1120 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1121 { \ 1122 static MVEGenTwoOpShiftFn * const fns[] = { \ 1123 gen_helper_mve_##FN##b, \ 1124 gen_helper_mve_##FN##h, \ 1125 gen_helper_mve_##FN##w, \ 1126 NULL, \ 1127 }; \ 1128 return do_2shift_scalar(s, a, fns[a->size]); \ 1129 } 1130 1131 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1132 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1133 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1134 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1135 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1136 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1137 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1138 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1139 1140 #define DO_VSHLL(INSN, FN) \ 1141 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1142 { \ 1143 static MVEGenTwoOpShiftFn * const fns[] = { \ 1144 gen_helper_mve_##FN##b, \ 1145 gen_helper_mve_##FN##h, \ 1146 }; \ 1147 return do_2shift(s, a, fns[a->size], false); \ 1148 } 1149 1150 DO_VSHLL(VSHLL_BS, vshllbs) 1151 DO_VSHLL(VSHLL_BU, vshllbu) 1152 DO_VSHLL(VSHLL_TS, vshllts) 1153 DO_VSHLL(VSHLL_TU, vshlltu) 1154 1155 #define DO_2SHIFT_N(INSN, FN) \ 1156 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1157 { \ 1158 static MVEGenTwoOpShiftFn * const fns[] = { \ 1159 gen_helper_mve_##FN##b, \ 1160 gen_helper_mve_##FN##h, \ 1161 }; \ 1162 return do_2shift(s, a, fns[a->size], false); \ 1163 } 1164 1165 DO_2SHIFT_N(VSHRNB, vshrnb) 1166 DO_2SHIFT_N(VSHRNT, vshrnt) 1167 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1168 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1169 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1170 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1171 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1172 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1173 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1174 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1175 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1176 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1177 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1178 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1179 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1180 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1181 1182 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1183 { 1184 /* 1185 * Whole Vector Left Shift with Carry. The carry is taken 1186 * from a general purpose register and written back there. 1187 * An imm of 0 means "shift by 32". 1188 */ 1189 TCGv_ptr qd; 1190 TCGv_i32 rdm; 1191 1192 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1193 return false; 1194 } 1195 if (a->rdm == 13 || a->rdm == 15) { 1196 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1197 return false; 1198 } 1199 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1200 return true; 1201 } 1202 1203 qd = mve_qreg_ptr(a->qd); 1204 rdm = load_reg(s, a->rdm); 1205 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1206 store_reg(s, a->rdm, rdm); 1207 tcg_temp_free_ptr(qd); 1208 mve_update_eci(s); 1209 return true; 1210 } 1211 1212 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1213 { 1214 TCGv_ptr qd; 1215 TCGv_i32 rn; 1216 1217 /* 1218 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1219 * This fills the vector with elements of successively increasing 1220 * or decreasing values, starting from Rn. 1221 */ 1222 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1223 return false; 1224 } 1225 if (a->size == MO_64) { 1226 /* size 0b11 is another encoding */ 1227 return false; 1228 } 1229 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1230 return true; 1231 } 1232 1233 qd = mve_qreg_ptr(a->qd); 1234 rn = load_reg(s, a->rn); 1235 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1236 store_reg(s, a->rn, rn); 1237 tcg_temp_free_ptr(qd); 1238 mve_update_eci(s); 1239 return true; 1240 } 1241 1242 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1243 { 1244 TCGv_ptr qd; 1245 TCGv_i32 rn, rm; 1246 1247 /* 1248 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1249 * This fills the vector with elements of successively increasing 1250 * or decreasing values, starting from Rn. Rm specifies a point where 1251 * the count wraps back around to 0. The updated offset is written back 1252 * to Rn. 1253 */ 1254 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1255 return false; 1256 } 1257 if (!fn || a->rm == 13 || a->rm == 15) { 1258 /* 1259 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1260 * Rm == 13 is VIWDUP, VDWDUP. 1261 */ 1262 return false; 1263 } 1264 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1265 return true; 1266 } 1267 1268 qd = mve_qreg_ptr(a->qd); 1269 rn = load_reg(s, a->rn); 1270 rm = load_reg(s, a->rm); 1271 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1272 store_reg(s, a->rn, rn); 1273 tcg_temp_free_ptr(qd); 1274 tcg_temp_free_i32(rm); 1275 mve_update_eci(s); 1276 return true; 1277 } 1278 1279 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1280 { 1281 static MVEGenVIDUPFn * const fns[] = { 1282 gen_helper_mve_vidupb, 1283 gen_helper_mve_viduph, 1284 gen_helper_mve_vidupw, 1285 NULL, 1286 }; 1287 return do_vidup(s, a, fns[a->size]); 1288 } 1289 1290 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1291 { 1292 static MVEGenVIDUPFn * const fns[] = { 1293 gen_helper_mve_vidupb, 1294 gen_helper_mve_viduph, 1295 gen_helper_mve_vidupw, 1296 NULL, 1297 }; 1298 /* VDDUP is just like VIDUP but with a negative immediate */ 1299 a->imm = -a->imm; 1300 return do_vidup(s, a, fns[a->size]); 1301 } 1302 1303 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1304 { 1305 static MVEGenVIWDUPFn * const fns[] = { 1306 gen_helper_mve_viwdupb, 1307 gen_helper_mve_viwduph, 1308 gen_helper_mve_viwdupw, 1309 NULL, 1310 }; 1311 return do_viwdup(s, a, fns[a->size]); 1312 } 1313 1314 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1315 { 1316 static MVEGenVIWDUPFn * const fns[] = { 1317 gen_helper_mve_vdwdupb, 1318 gen_helper_mve_vdwduph, 1319 gen_helper_mve_vdwdupw, 1320 NULL, 1321 }; 1322 return do_viwdup(s, a, fns[a->size]); 1323 } 1324 1325 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1326 { 1327 TCGv_ptr qn, qm; 1328 1329 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1330 !fn) { 1331 return false; 1332 } 1333 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1334 return true; 1335 } 1336 1337 qn = mve_qreg_ptr(a->qn); 1338 qm = mve_qreg_ptr(a->qm); 1339 fn(cpu_env, qn, qm); 1340 tcg_temp_free_ptr(qn); 1341 tcg_temp_free_ptr(qm); 1342 if (a->mask) { 1343 /* VPT */ 1344 gen_vpst(s, a->mask); 1345 } 1346 mve_update_eci(s); 1347 return true; 1348 } 1349 1350 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1351 MVEGenScalarCmpFn *fn) 1352 { 1353 TCGv_ptr qn; 1354 TCGv_i32 rm; 1355 1356 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1357 return false; 1358 } 1359 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1360 return true; 1361 } 1362 1363 qn = mve_qreg_ptr(a->qn); 1364 if (a->rm == 15) { 1365 /* Encoding Rm=0b1111 means "constant zero" */ 1366 rm = tcg_constant_i32(0); 1367 } else { 1368 rm = load_reg(s, a->rm); 1369 } 1370 fn(cpu_env, qn, rm); 1371 tcg_temp_free_ptr(qn); 1372 tcg_temp_free_i32(rm); 1373 if (a->mask) { 1374 /* VPT */ 1375 gen_vpst(s, a->mask); 1376 } 1377 mve_update_eci(s); 1378 return true; 1379 } 1380 1381 #define DO_VCMP(INSN, FN) \ 1382 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1383 { \ 1384 static MVEGenCmpFn * const fns[] = { \ 1385 gen_helper_mve_##FN##b, \ 1386 gen_helper_mve_##FN##h, \ 1387 gen_helper_mve_##FN##w, \ 1388 NULL, \ 1389 }; \ 1390 return do_vcmp(s, a, fns[a->size]); \ 1391 } \ 1392 static bool trans_##INSN##_scalar(DisasContext *s, \ 1393 arg_vcmp_scalar *a) \ 1394 { \ 1395 static MVEGenScalarCmpFn * const fns[] = { \ 1396 gen_helper_mve_##FN##_scalarb, \ 1397 gen_helper_mve_##FN##_scalarh, \ 1398 gen_helper_mve_##FN##_scalarw, \ 1399 NULL, \ 1400 }; \ 1401 return do_vcmp_scalar(s, a, fns[a->size]); \ 1402 } 1403 1404 DO_VCMP(VCMPEQ, vcmpeq) 1405 DO_VCMP(VCMPNE, vcmpne) 1406 DO_VCMP(VCMPCS, vcmpcs) 1407 DO_VCMP(VCMPHI, vcmphi) 1408 DO_VCMP(VCMPGE, vcmpge) 1409 DO_VCMP(VCMPLT, vcmplt) 1410 DO_VCMP(VCMPGT, vcmpgt) 1411 DO_VCMP(VCMPLE, vcmple) 1412 1413 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1414 { 1415 /* 1416 * MIN/MAX operations across a vector: compute the min or 1417 * max of the initial value in a general purpose register 1418 * and all the elements in the vector, and store it back 1419 * into the general purpose register. 1420 */ 1421 TCGv_ptr qm; 1422 TCGv_i32 rda; 1423 1424 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1425 !fn || a->rda == 13 || a->rda == 15) { 1426 /* Rda cases are UNPREDICTABLE */ 1427 return false; 1428 } 1429 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1430 return true; 1431 } 1432 1433 qm = mve_qreg_ptr(a->qm); 1434 rda = load_reg(s, a->rda); 1435 fn(rda, cpu_env, qm, rda); 1436 store_reg(s, a->rda, rda); 1437 tcg_temp_free_ptr(qm); 1438 mve_update_eci(s); 1439 return true; 1440 } 1441 1442 #define DO_VMAXV(INSN, FN) \ 1443 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1444 { \ 1445 static MVEGenVADDVFn * const fns[] = { \ 1446 gen_helper_mve_##FN##b, \ 1447 gen_helper_mve_##FN##h, \ 1448 gen_helper_mve_##FN##w, \ 1449 NULL, \ 1450 }; \ 1451 return do_vmaxv(s, a, fns[a->size]); \ 1452 } 1453 1454 DO_VMAXV(VMAXV_S, vmaxvs) 1455 DO_VMAXV(VMAXV_U, vmaxvu) 1456 DO_VMAXV(VMAXAV, vmaxav) 1457 DO_VMAXV(VMINV_S, vminvs) 1458 DO_VMAXV(VMINV_U, vminvu) 1459 DO_VMAXV(VMINAV, vminav) 1460 1461 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1462 { 1463 /* Absolute difference accumulated across vector */ 1464 TCGv_ptr qn, qm; 1465 TCGv_i32 rda; 1466 1467 if (!dc_isar_feature(aa32_mve, s) || 1468 !mve_check_qreg_bank(s, a->qm | a->qn) || 1469 !fn || a->rda == 13 || a->rda == 15) { 1470 /* Rda cases are UNPREDICTABLE */ 1471 return false; 1472 } 1473 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1474 return true; 1475 } 1476 1477 qm = mve_qreg_ptr(a->qm); 1478 qn = mve_qreg_ptr(a->qn); 1479 rda = load_reg(s, a->rda); 1480 fn(rda, cpu_env, qn, qm, rda); 1481 store_reg(s, a->rda, rda); 1482 tcg_temp_free_ptr(qm); 1483 tcg_temp_free_ptr(qn); 1484 mve_update_eci(s); 1485 return true; 1486 } 1487 1488 #define DO_VABAV(INSN, FN) \ 1489 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1490 { \ 1491 static MVEGenVABAVFn * const fns[] = { \ 1492 gen_helper_mve_##FN##b, \ 1493 gen_helper_mve_##FN##h, \ 1494 gen_helper_mve_##FN##w, \ 1495 NULL, \ 1496 }; \ 1497 return do_vabav(s, a, fns[a->size]); \ 1498 } 1499 1500 DO_VABAV(VABAV_S, vabavs) 1501 DO_VABAV(VABAV_U, vabavu) 1502