1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 52 static inline long mve_qreg_offset(unsigned reg) 53 { 54 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 55 } 56 57 static TCGv_ptr mve_qreg_ptr(unsigned reg) 58 { 59 TCGv_ptr ret = tcg_temp_new_ptr(); 60 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 61 return ret; 62 } 63 64 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 65 { 66 /* 67 * Check whether Qregs are in range. For v8.1M only Q0..Q7 68 * are supported, see VFPSmallRegisterBank(). 69 */ 70 return qmask < 8; 71 } 72 73 bool mve_eci_check(DisasContext *s) 74 { 75 /* 76 * This is a beatwise insn: check that ECI is valid (not a 77 * reserved value) and note that we are handling it. 78 * Return true if OK, false if we generated an exception. 79 */ 80 s->eci_handled = true; 81 switch (s->eci) { 82 case ECI_NONE: 83 case ECI_A0: 84 case ECI_A0A1: 85 case ECI_A0A1A2: 86 case ECI_A0A1A2B0: 87 return true; 88 default: 89 /* Reserved value: INVSTATE UsageFault */ 90 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 91 default_exception_el(s)); 92 return false; 93 } 94 } 95 96 static void mve_update_eci(DisasContext *s) 97 { 98 /* 99 * The helper function will always update the CPUState field, 100 * so we only need to update the DisasContext field. 101 */ 102 if (s->eci) { 103 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 104 } 105 } 106 107 void mve_update_and_store_eci(DisasContext *s) 108 { 109 /* 110 * For insns which don't call a helper function that will call 111 * mve_advance_vpt(), this version updates s->eci and also stores 112 * it out to the CPUState field. 113 */ 114 if (s->eci) { 115 mve_update_eci(s); 116 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 117 } 118 } 119 120 static bool mve_skip_first_beat(DisasContext *s) 121 { 122 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 123 switch (s->eci) { 124 case ECI_NONE: 125 return false; 126 case ECI_A0: 127 case ECI_A0A1: 128 case ECI_A0A1A2: 129 case ECI_A0A1A2B0: 130 return true; 131 default: 132 g_assert_not_reached(); 133 } 134 } 135 136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 137 unsigned msize) 138 { 139 TCGv_i32 addr; 140 uint32_t offset; 141 TCGv_ptr qreg; 142 143 if (!dc_isar_feature(aa32_mve, s) || 144 !mve_check_qreg_bank(s, a->qd) || 145 !fn) { 146 return false; 147 } 148 149 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 150 if (a->rn == 15 || (a->rn == 13 && a->w)) { 151 return false; 152 } 153 154 if (!mve_eci_check(s) || !vfp_access_check(s)) { 155 return true; 156 } 157 158 offset = a->imm << msize; 159 if (!a->a) { 160 offset = -offset; 161 } 162 addr = load_reg(s, a->rn); 163 if (a->p) { 164 tcg_gen_addi_i32(addr, addr, offset); 165 } 166 167 qreg = mve_qreg_ptr(a->qd); 168 fn(cpu_env, qreg, addr); 169 tcg_temp_free_ptr(qreg); 170 171 /* 172 * Writeback always happens after the last beat of the insn, 173 * regardless of predication 174 */ 175 if (a->w) { 176 if (!a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 store_reg(s, a->rn, addr); 180 } else { 181 tcg_temp_free_i32(addr); 182 } 183 mve_update_eci(s); 184 return true; 185 } 186 187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 188 { 189 static MVEGenLdStFn * const ldstfns[4][2] = { 190 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 191 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 192 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 193 { NULL, NULL } 194 }; 195 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 196 } 197 198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 199 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 200 { \ 201 static MVEGenLdStFn * const ldstfns[2][2] = { \ 202 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 203 { NULL, gen_helper_mve_##ULD }, \ 204 }; \ 205 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 206 } 207 208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 211 212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 213 { 214 TCGv_ptr qd; 215 TCGv_i32 rt; 216 217 if (!dc_isar_feature(aa32_mve, s) || 218 !mve_check_qreg_bank(s, a->qd)) { 219 return false; 220 } 221 if (a->rt == 13 || a->rt == 15) { 222 /* UNPREDICTABLE; we choose to UNDEF */ 223 return false; 224 } 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 qd = mve_qreg_ptr(a->qd); 230 rt = load_reg(s, a->rt); 231 tcg_gen_dup_i32(a->size, rt, rt); 232 gen_helper_mve_vdup(cpu_env, qd, rt); 233 tcg_temp_free_ptr(qd); 234 tcg_temp_free_i32(rt); 235 mve_update_eci(s); 236 return true; 237 } 238 239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 240 { 241 TCGv_ptr qd, qm; 242 243 if (!dc_isar_feature(aa32_mve, s) || 244 !mve_check_qreg_bank(s, a->qd | a->qm) || 245 !fn) { 246 return false; 247 } 248 249 if (!mve_eci_check(s) || !vfp_access_check(s)) { 250 return true; 251 } 252 253 qd = mve_qreg_ptr(a->qd); 254 qm = mve_qreg_ptr(a->qm); 255 fn(cpu_env, qd, qm); 256 tcg_temp_free_ptr(qd); 257 tcg_temp_free_ptr(qm); 258 mve_update_eci(s); 259 return true; 260 } 261 262 #define DO_1OP(INSN, FN) \ 263 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 264 { \ 265 static MVEGenOneOpFn * const fns[] = { \ 266 gen_helper_mve_##FN##b, \ 267 gen_helper_mve_##FN##h, \ 268 gen_helper_mve_##FN##w, \ 269 NULL, \ 270 }; \ 271 return do_1op(s, a, fns[a->size]); \ 272 } 273 274 DO_1OP(VCLZ, vclz) 275 DO_1OP(VCLS, vcls) 276 DO_1OP(VABS, vabs) 277 DO_1OP(VNEG, vneg) 278 279 /* Narrowing moves: only size 0 and 1 are valid */ 280 #define DO_VMOVN(INSN, FN) \ 281 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 282 { \ 283 static MVEGenOneOpFn * const fns[] = { \ 284 gen_helper_mve_##FN##b, \ 285 gen_helper_mve_##FN##h, \ 286 NULL, \ 287 NULL, \ 288 }; \ 289 return do_1op(s, a, fns[a->size]); \ 290 } 291 292 DO_VMOVN(VMOVNB, vmovnb) 293 DO_VMOVN(VMOVNT, vmovnt) 294 DO_VMOVN(VQMOVUNB, vqmovunb) 295 DO_VMOVN(VQMOVUNT, vqmovunt) 296 DO_VMOVN(VQMOVN_BS, vqmovnbs) 297 DO_VMOVN(VQMOVN_TS, vqmovnts) 298 DO_VMOVN(VQMOVN_BU, vqmovnbu) 299 DO_VMOVN(VQMOVN_TU, vqmovntu) 300 301 static bool trans_VREV16(DisasContext *s, arg_1op *a) 302 { 303 static MVEGenOneOpFn * const fns[] = { 304 gen_helper_mve_vrev16b, 305 NULL, 306 NULL, 307 NULL, 308 }; 309 return do_1op(s, a, fns[a->size]); 310 } 311 312 static bool trans_VREV32(DisasContext *s, arg_1op *a) 313 { 314 static MVEGenOneOpFn * const fns[] = { 315 gen_helper_mve_vrev32b, 316 gen_helper_mve_vrev32h, 317 NULL, 318 NULL, 319 }; 320 return do_1op(s, a, fns[a->size]); 321 } 322 323 static bool trans_VREV64(DisasContext *s, arg_1op *a) 324 { 325 static MVEGenOneOpFn * const fns[] = { 326 gen_helper_mve_vrev64b, 327 gen_helper_mve_vrev64h, 328 gen_helper_mve_vrev64w, 329 NULL, 330 }; 331 return do_1op(s, a, fns[a->size]); 332 } 333 334 static bool trans_VMVN(DisasContext *s, arg_1op *a) 335 { 336 return do_1op(s, a, gen_helper_mve_vmvn); 337 } 338 339 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 340 { 341 static MVEGenOneOpFn * const fns[] = { 342 NULL, 343 gen_helper_mve_vfabsh, 344 gen_helper_mve_vfabss, 345 NULL, 346 }; 347 if (!dc_isar_feature(aa32_mve_fp, s)) { 348 return false; 349 } 350 return do_1op(s, a, fns[a->size]); 351 } 352 353 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 354 { 355 static MVEGenOneOpFn * const fns[] = { 356 NULL, 357 gen_helper_mve_vfnegh, 358 gen_helper_mve_vfnegs, 359 NULL, 360 }; 361 if (!dc_isar_feature(aa32_mve_fp, s)) { 362 return false; 363 } 364 return do_1op(s, a, fns[a->size]); 365 } 366 367 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 368 { 369 TCGv_ptr qd, qn, qm; 370 371 if (!dc_isar_feature(aa32_mve, s) || 372 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 373 !fn) { 374 return false; 375 } 376 if (!mve_eci_check(s) || !vfp_access_check(s)) { 377 return true; 378 } 379 380 qd = mve_qreg_ptr(a->qd); 381 qn = mve_qreg_ptr(a->qn); 382 qm = mve_qreg_ptr(a->qm); 383 fn(cpu_env, qd, qn, qm); 384 tcg_temp_free_ptr(qd); 385 tcg_temp_free_ptr(qn); 386 tcg_temp_free_ptr(qm); 387 mve_update_eci(s); 388 return true; 389 } 390 391 #define DO_LOGIC(INSN, HELPER) \ 392 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 393 { \ 394 return do_2op(s, a, HELPER); \ 395 } 396 397 DO_LOGIC(VAND, gen_helper_mve_vand) 398 DO_LOGIC(VBIC, gen_helper_mve_vbic) 399 DO_LOGIC(VORR, gen_helper_mve_vorr) 400 DO_LOGIC(VORN, gen_helper_mve_vorn) 401 DO_LOGIC(VEOR, gen_helper_mve_veor) 402 403 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 404 405 #define DO_2OP(INSN, FN) \ 406 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 407 { \ 408 static MVEGenTwoOpFn * const fns[] = { \ 409 gen_helper_mve_##FN##b, \ 410 gen_helper_mve_##FN##h, \ 411 gen_helper_mve_##FN##w, \ 412 NULL, \ 413 }; \ 414 return do_2op(s, a, fns[a->size]); \ 415 } 416 417 DO_2OP(VADD, vadd) 418 DO_2OP(VSUB, vsub) 419 DO_2OP(VMUL, vmul) 420 DO_2OP(VMULH_S, vmulhs) 421 DO_2OP(VMULH_U, vmulhu) 422 DO_2OP(VRMULH_S, vrmulhs) 423 DO_2OP(VRMULH_U, vrmulhu) 424 DO_2OP(VMAX_S, vmaxs) 425 DO_2OP(VMAX_U, vmaxu) 426 DO_2OP(VMIN_S, vmins) 427 DO_2OP(VMIN_U, vminu) 428 DO_2OP(VABD_S, vabds) 429 DO_2OP(VABD_U, vabdu) 430 DO_2OP(VHADD_S, vhadds) 431 DO_2OP(VHADD_U, vhaddu) 432 DO_2OP(VHSUB_S, vhsubs) 433 DO_2OP(VHSUB_U, vhsubu) 434 DO_2OP(VMULL_BS, vmullbs) 435 DO_2OP(VMULL_BU, vmullbu) 436 DO_2OP(VMULL_TS, vmullts) 437 DO_2OP(VMULL_TU, vmulltu) 438 DO_2OP(VQDMULH, vqdmulh) 439 DO_2OP(VQRDMULH, vqrdmulh) 440 DO_2OP(VQADD_S, vqadds) 441 DO_2OP(VQADD_U, vqaddu) 442 DO_2OP(VQSUB_S, vqsubs) 443 DO_2OP(VQSUB_U, vqsubu) 444 DO_2OP(VSHL_S, vshls) 445 DO_2OP(VSHL_U, vshlu) 446 DO_2OP(VRSHL_S, vrshls) 447 DO_2OP(VRSHL_U, vrshlu) 448 DO_2OP(VQSHL_S, vqshls) 449 DO_2OP(VQSHL_U, vqshlu) 450 DO_2OP(VQRSHL_S, vqrshls) 451 DO_2OP(VQRSHL_U, vqrshlu) 452 DO_2OP(VQDMLADH, vqdmladh) 453 DO_2OP(VQDMLADHX, vqdmladhx) 454 DO_2OP(VQRDMLADH, vqrdmladh) 455 DO_2OP(VQRDMLADHX, vqrdmladhx) 456 DO_2OP(VQDMLSDH, vqdmlsdh) 457 DO_2OP(VQDMLSDHX, vqdmlsdhx) 458 DO_2OP(VQRDMLSDH, vqrdmlsdh) 459 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 460 DO_2OP(VRHADD_S, vrhadds) 461 DO_2OP(VRHADD_U, vrhaddu) 462 /* 463 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 464 * so we can reuse the DO_2OP macro. (Our implementation calculates the 465 * "expected" results in this case.) Similarly for VHCADD. 466 */ 467 DO_2OP(VCADD90, vcadd90) 468 DO_2OP(VCADD270, vcadd270) 469 DO_2OP(VHCADD90, vhcadd90) 470 DO_2OP(VHCADD270, vhcadd270) 471 472 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 473 { 474 static MVEGenTwoOpFn * const fns[] = { 475 NULL, 476 gen_helper_mve_vqdmullbh, 477 gen_helper_mve_vqdmullbw, 478 NULL, 479 }; 480 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 481 /* UNPREDICTABLE; we choose to undef */ 482 return false; 483 } 484 return do_2op(s, a, fns[a->size]); 485 } 486 487 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 488 { 489 static MVEGenTwoOpFn * const fns[] = { 490 NULL, 491 gen_helper_mve_vqdmullth, 492 gen_helper_mve_vqdmulltw, 493 NULL, 494 }; 495 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 496 /* UNPREDICTABLE; we choose to undef */ 497 return false; 498 } 499 return do_2op(s, a, fns[a->size]); 500 } 501 502 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 503 { 504 /* 505 * Note that a->size indicates the output size, ie VMULL.P8 506 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 507 * is the 16x16->32 operation and a->size is MO_32. 508 */ 509 static MVEGenTwoOpFn * const fns[] = { 510 NULL, 511 gen_helper_mve_vmullpbh, 512 gen_helper_mve_vmullpbw, 513 NULL, 514 }; 515 return do_2op(s, a, fns[a->size]); 516 } 517 518 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 519 { 520 /* a->size is as for trans_VMULLP_B */ 521 static MVEGenTwoOpFn * const fns[] = { 522 NULL, 523 gen_helper_mve_vmullpth, 524 gen_helper_mve_vmullptw, 525 NULL, 526 }; 527 return do_2op(s, a, fns[a->size]); 528 } 529 530 /* 531 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 532 * of the 32-bit elements in each lane of the input vectors, where the 533 * carry-out of each add is the carry-in of the next. The initial carry 534 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 535 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 536 * These insns are subject to beat-wise execution. Partial execution 537 * of an I=1 (initial carry input fixed) insn which does not 538 * execute the first beat must start with the current FPSCR.NZCV 539 * value, not the fixed constant input. 540 */ 541 static bool trans_VADC(DisasContext *s, arg_2op *a) 542 { 543 return do_2op(s, a, gen_helper_mve_vadc); 544 } 545 546 static bool trans_VADCI(DisasContext *s, arg_2op *a) 547 { 548 if (mve_skip_first_beat(s)) { 549 return trans_VADC(s, a); 550 } 551 return do_2op(s, a, gen_helper_mve_vadci); 552 } 553 554 static bool trans_VSBC(DisasContext *s, arg_2op *a) 555 { 556 return do_2op(s, a, gen_helper_mve_vsbc); 557 } 558 559 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 560 { 561 if (mve_skip_first_beat(s)) { 562 return trans_VSBC(s, a); 563 } 564 return do_2op(s, a, gen_helper_mve_vsbci); 565 } 566 567 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 568 MVEGenTwoOpScalarFn fn) 569 { 570 TCGv_ptr qd, qn; 571 TCGv_i32 rm; 572 573 if (!dc_isar_feature(aa32_mve, s) || 574 !mve_check_qreg_bank(s, a->qd | a->qn) || 575 !fn) { 576 return false; 577 } 578 if (a->rm == 13 || a->rm == 15) { 579 /* UNPREDICTABLE */ 580 return false; 581 } 582 if (!mve_eci_check(s) || !vfp_access_check(s)) { 583 return true; 584 } 585 586 qd = mve_qreg_ptr(a->qd); 587 qn = mve_qreg_ptr(a->qn); 588 rm = load_reg(s, a->rm); 589 fn(cpu_env, qd, qn, rm); 590 tcg_temp_free_i32(rm); 591 tcg_temp_free_ptr(qd); 592 tcg_temp_free_ptr(qn); 593 mve_update_eci(s); 594 return true; 595 } 596 597 #define DO_2OP_SCALAR(INSN, FN) \ 598 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 599 { \ 600 static MVEGenTwoOpScalarFn * const fns[] = { \ 601 gen_helper_mve_##FN##b, \ 602 gen_helper_mve_##FN##h, \ 603 gen_helper_mve_##FN##w, \ 604 NULL, \ 605 }; \ 606 return do_2op_scalar(s, a, fns[a->size]); \ 607 } 608 609 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 610 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 611 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 612 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 613 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 614 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 615 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 616 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 617 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 618 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 619 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 620 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 621 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 622 DO_2OP_SCALAR(VBRSR, vbrsr) 623 DO_2OP_SCALAR(VMLAS, vmlas) 624 625 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 626 { 627 static MVEGenTwoOpScalarFn * const fns[] = { 628 NULL, 629 gen_helper_mve_vqdmullb_scalarh, 630 gen_helper_mve_vqdmullb_scalarw, 631 NULL, 632 }; 633 if (a->qd == a->qn && a->size == MO_32) { 634 /* UNPREDICTABLE; we choose to undef */ 635 return false; 636 } 637 return do_2op_scalar(s, a, fns[a->size]); 638 } 639 640 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 641 { 642 static MVEGenTwoOpScalarFn * const fns[] = { 643 NULL, 644 gen_helper_mve_vqdmullt_scalarh, 645 gen_helper_mve_vqdmullt_scalarw, 646 NULL, 647 }; 648 if (a->qd == a->qn && a->size == MO_32) { 649 /* UNPREDICTABLE; we choose to undef */ 650 return false; 651 } 652 return do_2op_scalar(s, a, fns[a->size]); 653 } 654 655 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 656 MVEGenLongDualAccOpFn *fn) 657 { 658 TCGv_ptr qn, qm; 659 TCGv_i64 rda; 660 TCGv_i32 rdalo, rdahi; 661 662 if (!dc_isar_feature(aa32_mve, s) || 663 !mve_check_qreg_bank(s, a->qn | a->qm) || 664 !fn) { 665 return false; 666 } 667 /* 668 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 669 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 670 */ 671 if (a->rdahi == 13 || a->rdahi == 15) { 672 return false; 673 } 674 if (!mve_eci_check(s) || !vfp_access_check(s)) { 675 return true; 676 } 677 678 qn = mve_qreg_ptr(a->qn); 679 qm = mve_qreg_ptr(a->qm); 680 681 /* 682 * This insn is subject to beat-wise execution. Partial execution 683 * of an A=0 (no-accumulate) insn which does not execute the first 684 * beat must start with the current rda value, not 0. 685 */ 686 if (a->a || mve_skip_first_beat(s)) { 687 rda = tcg_temp_new_i64(); 688 rdalo = load_reg(s, a->rdalo); 689 rdahi = load_reg(s, a->rdahi); 690 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 691 tcg_temp_free_i32(rdalo); 692 tcg_temp_free_i32(rdahi); 693 } else { 694 rda = tcg_const_i64(0); 695 } 696 697 fn(rda, cpu_env, qn, qm, rda); 698 tcg_temp_free_ptr(qn); 699 tcg_temp_free_ptr(qm); 700 701 rdalo = tcg_temp_new_i32(); 702 rdahi = tcg_temp_new_i32(); 703 tcg_gen_extrl_i64_i32(rdalo, rda); 704 tcg_gen_extrh_i64_i32(rdahi, rda); 705 store_reg(s, a->rdalo, rdalo); 706 store_reg(s, a->rdahi, rdahi); 707 tcg_temp_free_i64(rda); 708 mve_update_eci(s); 709 return true; 710 } 711 712 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 713 { 714 static MVEGenLongDualAccOpFn * const fns[4][2] = { 715 { NULL, NULL }, 716 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 717 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 718 { NULL, NULL }, 719 }; 720 return do_long_dual_acc(s, a, fns[a->size][a->x]); 721 } 722 723 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 724 { 725 static MVEGenLongDualAccOpFn * const fns[4][2] = { 726 { NULL, NULL }, 727 { gen_helper_mve_vmlaldavuh, NULL }, 728 { gen_helper_mve_vmlaldavuw, NULL }, 729 { NULL, NULL }, 730 }; 731 return do_long_dual_acc(s, a, fns[a->size][a->x]); 732 } 733 734 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 735 { 736 static MVEGenLongDualAccOpFn * const fns[4][2] = { 737 { NULL, NULL }, 738 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 739 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 740 { NULL, NULL }, 741 }; 742 return do_long_dual_acc(s, a, fns[a->size][a->x]); 743 } 744 745 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 746 { 747 static MVEGenLongDualAccOpFn * const fns[] = { 748 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 749 }; 750 return do_long_dual_acc(s, a, fns[a->x]); 751 } 752 753 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 754 { 755 static MVEGenLongDualAccOpFn * const fns[] = { 756 gen_helper_mve_vrmlaldavhuw, NULL, 757 }; 758 return do_long_dual_acc(s, a, fns[a->x]); 759 } 760 761 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 762 { 763 static MVEGenLongDualAccOpFn * const fns[] = { 764 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 765 }; 766 return do_long_dual_acc(s, a, fns[a->x]); 767 } 768 769 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 770 { 771 TCGv_ptr qn, qm; 772 TCGv_i32 rda; 773 774 if (!dc_isar_feature(aa32_mve, s) || 775 !mve_check_qreg_bank(s, a->qn) || 776 !fn) { 777 return false; 778 } 779 if (!mve_eci_check(s) || !vfp_access_check(s)) { 780 return true; 781 } 782 783 qn = mve_qreg_ptr(a->qn); 784 qm = mve_qreg_ptr(a->qm); 785 786 /* 787 * This insn is subject to beat-wise execution. Partial execution 788 * of an A=0 (no-accumulate) insn which does not execute the first 789 * beat must start with the current rda value, not 0. 790 */ 791 if (a->a || mve_skip_first_beat(s)) { 792 rda = load_reg(s, a->rda); 793 } else { 794 rda = tcg_const_i32(0); 795 } 796 797 fn(rda, cpu_env, qn, qm, rda); 798 store_reg(s, a->rda, rda); 799 tcg_temp_free_ptr(qn); 800 tcg_temp_free_ptr(qm); 801 802 mve_update_eci(s); 803 return true; 804 } 805 806 #define DO_DUAL_ACC(INSN, FN) \ 807 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 808 { \ 809 static MVEGenDualAccOpFn * const fns[4][2] = { \ 810 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 811 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 812 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 813 { NULL, NULL }, \ 814 }; \ 815 return do_dual_acc(s, a, fns[a->size][a->x]); \ 816 } 817 818 DO_DUAL_ACC(VMLADAV_S, vmladavs) 819 DO_DUAL_ACC(VMLSDAV, vmlsdav) 820 821 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 822 { 823 static MVEGenDualAccOpFn * const fns[4][2] = { 824 { gen_helper_mve_vmladavub, NULL }, 825 { gen_helper_mve_vmladavuh, NULL }, 826 { gen_helper_mve_vmladavuw, NULL }, 827 { NULL, NULL }, 828 }; 829 return do_dual_acc(s, a, fns[a->size][a->x]); 830 } 831 832 static void gen_vpst(DisasContext *s, uint32_t mask) 833 { 834 /* 835 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 836 * being adjacent fields in the register. 837 * 838 * Updating the masks is not predicated, but it is subject to beat-wise 839 * execution, and the mask is updated on the odd-numbered beats. 840 * So if PSR.ECI says we should skip beat 1, we mustn't update the 841 * 01 mask field. 842 */ 843 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 844 switch (s->eci) { 845 case ECI_NONE: 846 case ECI_A0: 847 /* Update both 01 and 23 fields */ 848 tcg_gen_deposit_i32(vpr, vpr, 849 tcg_constant_i32(mask | (mask << 4)), 850 R_V7M_VPR_MASK01_SHIFT, 851 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 852 break; 853 case ECI_A0A1: 854 case ECI_A0A1A2: 855 case ECI_A0A1A2B0: 856 /* Update only the 23 mask field */ 857 tcg_gen_deposit_i32(vpr, vpr, 858 tcg_constant_i32(mask), 859 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 860 break; 861 default: 862 g_assert_not_reached(); 863 } 864 store_cpu_field(vpr, v7m.vpr); 865 } 866 867 static bool trans_VPST(DisasContext *s, arg_VPST *a) 868 { 869 /* mask == 0 is a "related encoding" */ 870 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 871 return false; 872 } 873 if (!mve_eci_check(s) || !vfp_access_check(s)) { 874 return true; 875 } 876 gen_vpst(s, a->mask); 877 mve_update_and_store_eci(s); 878 return true; 879 } 880 881 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 882 { 883 /* VADDV: vector add across vector */ 884 static MVEGenVADDVFn * const fns[4][2] = { 885 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 886 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 887 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 888 { NULL, NULL } 889 }; 890 TCGv_ptr qm; 891 TCGv_i32 rda; 892 893 if (!dc_isar_feature(aa32_mve, s) || 894 a->size == 3) { 895 return false; 896 } 897 if (!mve_eci_check(s) || !vfp_access_check(s)) { 898 return true; 899 } 900 901 /* 902 * This insn is subject to beat-wise execution. Partial execution 903 * of an A=0 (no-accumulate) insn which does not execute the first 904 * beat must start with the current value of Rda, not zero. 905 */ 906 if (a->a || mve_skip_first_beat(s)) { 907 /* Accumulate input from Rda */ 908 rda = load_reg(s, a->rda); 909 } else { 910 /* Accumulate starting at zero */ 911 rda = tcg_const_i32(0); 912 } 913 914 qm = mve_qreg_ptr(a->qm); 915 fns[a->size][a->u](rda, cpu_env, qm, rda); 916 store_reg(s, a->rda, rda); 917 tcg_temp_free_ptr(qm); 918 919 mve_update_eci(s); 920 return true; 921 } 922 923 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 924 { 925 /* 926 * Vector Add Long Across Vector: accumulate the 32-bit 927 * elements of the vector into a 64-bit result stored in 928 * a pair of general-purpose registers. 929 * No need to check Qm's bank: it is only 3 bits in decode. 930 */ 931 TCGv_ptr qm; 932 TCGv_i64 rda; 933 TCGv_i32 rdalo, rdahi; 934 935 if (!dc_isar_feature(aa32_mve, s)) { 936 return false; 937 } 938 /* 939 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 940 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 941 */ 942 if (a->rdahi == 13 || a->rdahi == 15) { 943 return false; 944 } 945 if (!mve_eci_check(s) || !vfp_access_check(s)) { 946 return true; 947 } 948 949 /* 950 * This insn is subject to beat-wise execution. Partial execution 951 * of an A=0 (no-accumulate) insn which does not execute the first 952 * beat must start with the current value of RdaHi:RdaLo, not zero. 953 */ 954 if (a->a || mve_skip_first_beat(s)) { 955 /* Accumulate input from RdaHi:RdaLo */ 956 rda = tcg_temp_new_i64(); 957 rdalo = load_reg(s, a->rdalo); 958 rdahi = load_reg(s, a->rdahi); 959 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 960 tcg_temp_free_i32(rdalo); 961 tcg_temp_free_i32(rdahi); 962 } else { 963 /* Accumulate starting at zero */ 964 rda = tcg_const_i64(0); 965 } 966 967 qm = mve_qreg_ptr(a->qm); 968 if (a->u) { 969 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 970 } else { 971 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 972 } 973 tcg_temp_free_ptr(qm); 974 975 rdalo = tcg_temp_new_i32(); 976 rdahi = tcg_temp_new_i32(); 977 tcg_gen_extrl_i64_i32(rdalo, rda); 978 tcg_gen_extrh_i64_i32(rdahi, rda); 979 store_reg(s, a->rdalo, rdalo); 980 store_reg(s, a->rdahi, rdahi); 981 tcg_temp_free_i64(rda); 982 mve_update_eci(s); 983 return true; 984 } 985 986 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 987 { 988 TCGv_ptr qd; 989 uint64_t imm; 990 991 if (!dc_isar_feature(aa32_mve, s) || 992 !mve_check_qreg_bank(s, a->qd) || 993 !fn) { 994 return false; 995 } 996 if (!mve_eci_check(s) || !vfp_access_check(s)) { 997 return true; 998 } 999 1000 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1001 1002 qd = mve_qreg_ptr(a->qd); 1003 fn(cpu_env, qd, tcg_constant_i64(imm)); 1004 tcg_temp_free_ptr(qd); 1005 mve_update_eci(s); 1006 return true; 1007 } 1008 1009 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1010 { 1011 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1012 MVEGenOneOpImmFn *fn; 1013 1014 if ((a->cmode & 1) && a->cmode < 12) { 1015 if (a->op) { 1016 /* 1017 * For op=1, the immediate will be inverted by asimd_imm_const(), 1018 * so the VBIC becomes a logical AND operation. 1019 */ 1020 fn = gen_helper_mve_vandi; 1021 } else { 1022 fn = gen_helper_mve_vorri; 1023 } 1024 } else { 1025 /* There is one unallocated cmode/op combination in this space */ 1026 if (a->cmode == 15 && a->op == 1) { 1027 return false; 1028 } 1029 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1030 fn = gen_helper_mve_vmovi; 1031 } 1032 return do_1imm(s, a, fn); 1033 } 1034 1035 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1036 bool negateshift) 1037 { 1038 TCGv_ptr qd, qm; 1039 int shift = a->shift; 1040 1041 if (!dc_isar_feature(aa32_mve, s) || 1042 !mve_check_qreg_bank(s, a->qd | a->qm) || 1043 !fn) { 1044 return false; 1045 } 1046 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1047 return true; 1048 } 1049 1050 /* 1051 * When we handle a right shift insn using a left-shift helper 1052 * which permits a negative shift count to indicate a right-shift, 1053 * we must negate the shift count. 1054 */ 1055 if (negateshift) { 1056 shift = -shift; 1057 } 1058 1059 qd = mve_qreg_ptr(a->qd); 1060 qm = mve_qreg_ptr(a->qm); 1061 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1062 tcg_temp_free_ptr(qd); 1063 tcg_temp_free_ptr(qm); 1064 mve_update_eci(s); 1065 return true; 1066 } 1067 1068 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1069 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1070 { \ 1071 static MVEGenTwoOpShiftFn * const fns[] = { \ 1072 gen_helper_mve_##FN##b, \ 1073 gen_helper_mve_##FN##h, \ 1074 gen_helper_mve_##FN##w, \ 1075 NULL, \ 1076 }; \ 1077 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1078 } 1079 1080 DO_2SHIFT(VSHLI, vshli_u, false) 1081 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1082 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1083 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1084 /* These right shifts use a left-shift helper with negated shift count */ 1085 DO_2SHIFT(VSHRI_S, vshli_s, true) 1086 DO_2SHIFT(VSHRI_U, vshli_u, true) 1087 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1088 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1089 1090 DO_2SHIFT(VSRI, vsri, false) 1091 DO_2SHIFT(VSLI, vsli, false) 1092 1093 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1094 MVEGenTwoOpShiftFn *fn) 1095 { 1096 TCGv_ptr qda; 1097 TCGv_i32 rm; 1098 1099 if (!dc_isar_feature(aa32_mve, s) || 1100 !mve_check_qreg_bank(s, a->qda) || 1101 a->rm == 13 || a->rm == 15 || !fn) { 1102 /* Rm cases are UNPREDICTABLE */ 1103 return false; 1104 } 1105 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1106 return true; 1107 } 1108 1109 qda = mve_qreg_ptr(a->qda); 1110 rm = load_reg(s, a->rm); 1111 fn(cpu_env, qda, qda, rm); 1112 tcg_temp_free_ptr(qda); 1113 tcg_temp_free_i32(rm); 1114 mve_update_eci(s); 1115 return true; 1116 } 1117 1118 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1119 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1120 { \ 1121 static MVEGenTwoOpShiftFn * const fns[] = { \ 1122 gen_helper_mve_##FN##b, \ 1123 gen_helper_mve_##FN##h, \ 1124 gen_helper_mve_##FN##w, \ 1125 NULL, \ 1126 }; \ 1127 return do_2shift_scalar(s, a, fns[a->size]); \ 1128 } 1129 1130 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1131 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1132 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1133 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1134 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1135 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1136 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1137 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1138 1139 #define DO_VSHLL(INSN, FN) \ 1140 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1141 { \ 1142 static MVEGenTwoOpShiftFn * const fns[] = { \ 1143 gen_helper_mve_##FN##b, \ 1144 gen_helper_mve_##FN##h, \ 1145 }; \ 1146 return do_2shift(s, a, fns[a->size], false); \ 1147 } 1148 1149 DO_VSHLL(VSHLL_BS, vshllbs) 1150 DO_VSHLL(VSHLL_BU, vshllbu) 1151 DO_VSHLL(VSHLL_TS, vshllts) 1152 DO_VSHLL(VSHLL_TU, vshlltu) 1153 1154 #define DO_2SHIFT_N(INSN, FN) \ 1155 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1156 { \ 1157 static MVEGenTwoOpShiftFn * const fns[] = { \ 1158 gen_helper_mve_##FN##b, \ 1159 gen_helper_mve_##FN##h, \ 1160 }; \ 1161 return do_2shift(s, a, fns[a->size], false); \ 1162 } 1163 1164 DO_2SHIFT_N(VSHRNB, vshrnb) 1165 DO_2SHIFT_N(VSHRNT, vshrnt) 1166 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1167 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1168 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1169 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1170 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1171 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1172 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1173 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1174 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1175 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1176 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1177 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1178 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1179 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1180 1181 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1182 { 1183 /* 1184 * Whole Vector Left Shift with Carry. The carry is taken 1185 * from a general purpose register and written back there. 1186 * An imm of 0 means "shift by 32". 1187 */ 1188 TCGv_ptr qd; 1189 TCGv_i32 rdm; 1190 1191 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1192 return false; 1193 } 1194 if (a->rdm == 13 || a->rdm == 15) { 1195 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1196 return false; 1197 } 1198 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1199 return true; 1200 } 1201 1202 qd = mve_qreg_ptr(a->qd); 1203 rdm = load_reg(s, a->rdm); 1204 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1205 store_reg(s, a->rdm, rdm); 1206 tcg_temp_free_ptr(qd); 1207 mve_update_eci(s); 1208 return true; 1209 } 1210 1211 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1212 { 1213 TCGv_ptr qd; 1214 TCGv_i32 rn; 1215 1216 /* 1217 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1218 * This fills the vector with elements of successively increasing 1219 * or decreasing values, starting from Rn. 1220 */ 1221 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1222 return false; 1223 } 1224 if (a->size == MO_64) { 1225 /* size 0b11 is another encoding */ 1226 return false; 1227 } 1228 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1229 return true; 1230 } 1231 1232 qd = mve_qreg_ptr(a->qd); 1233 rn = load_reg(s, a->rn); 1234 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1235 store_reg(s, a->rn, rn); 1236 tcg_temp_free_ptr(qd); 1237 mve_update_eci(s); 1238 return true; 1239 } 1240 1241 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1242 { 1243 TCGv_ptr qd; 1244 TCGv_i32 rn, rm; 1245 1246 /* 1247 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1248 * This fills the vector with elements of successively increasing 1249 * or decreasing values, starting from Rn. Rm specifies a point where 1250 * the count wraps back around to 0. The updated offset is written back 1251 * to Rn. 1252 */ 1253 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1254 return false; 1255 } 1256 if (!fn || a->rm == 13 || a->rm == 15) { 1257 /* 1258 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1259 * Rm == 13 is VIWDUP, VDWDUP. 1260 */ 1261 return false; 1262 } 1263 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1264 return true; 1265 } 1266 1267 qd = mve_qreg_ptr(a->qd); 1268 rn = load_reg(s, a->rn); 1269 rm = load_reg(s, a->rm); 1270 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1271 store_reg(s, a->rn, rn); 1272 tcg_temp_free_ptr(qd); 1273 tcg_temp_free_i32(rm); 1274 mve_update_eci(s); 1275 return true; 1276 } 1277 1278 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1279 { 1280 static MVEGenVIDUPFn * const fns[] = { 1281 gen_helper_mve_vidupb, 1282 gen_helper_mve_viduph, 1283 gen_helper_mve_vidupw, 1284 NULL, 1285 }; 1286 return do_vidup(s, a, fns[a->size]); 1287 } 1288 1289 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1290 { 1291 static MVEGenVIDUPFn * const fns[] = { 1292 gen_helper_mve_vidupb, 1293 gen_helper_mve_viduph, 1294 gen_helper_mve_vidupw, 1295 NULL, 1296 }; 1297 /* VDDUP is just like VIDUP but with a negative immediate */ 1298 a->imm = -a->imm; 1299 return do_vidup(s, a, fns[a->size]); 1300 } 1301 1302 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1303 { 1304 static MVEGenVIWDUPFn * const fns[] = { 1305 gen_helper_mve_viwdupb, 1306 gen_helper_mve_viwduph, 1307 gen_helper_mve_viwdupw, 1308 NULL, 1309 }; 1310 return do_viwdup(s, a, fns[a->size]); 1311 } 1312 1313 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1314 { 1315 static MVEGenVIWDUPFn * const fns[] = { 1316 gen_helper_mve_vdwdupb, 1317 gen_helper_mve_vdwduph, 1318 gen_helper_mve_vdwdupw, 1319 NULL, 1320 }; 1321 return do_viwdup(s, a, fns[a->size]); 1322 } 1323 1324 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1325 { 1326 TCGv_ptr qn, qm; 1327 1328 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1329 !fn) { 1330 return false; 1331 } 1332 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1333 return true; 1334 } 1335 1336 qn = mve_qreg_ptr(a->qn); 1337 qm = mve_qreg_ptr(a->qm); 1338 fn(cpu_env, qn, qm); 1339 tcg_temp_free_ptr(qn); 1340 tcg_temp_free_ptr(qm); 1341 if (a->mask) { 1342 /* VPT */ 1343 gen_vpst(s, a->mask); 1344 } 1345 mve_update_eci(s); 1346 return true; 1347 } 1348 1349 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1350 MVEGenScalarCmpFn *fn) 1351 { 1352 TCGv_ptr qn; 1353 TCGv_i32 rm; 1354 1355 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1356 return false; 1357 } 1358 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1359 return true; 1360 } 1361 1362 qn = mve_qreg_ptr(a->qn); 1363 if (a->rm == 15) { 1364 /* Encoding Rm=0b1111 means "constant zero" */ 1365 rm = tcg_constant_i32(0); 1366 } else { 1367 rm = load_reg(s, a->rm); 1368 } 1369 fn(cpu_env, qn, rm); 1370 tcg_temp_free_ptr(qn); 1371 tcg_temp_free_i32(rm); 1372 if (a->mask) { 1373 /* VPT */ 1374 gen_vpst(s, a->mask); 1375 } 1376 mve_update_eci(s); 1377 return true; 1378 } 1379 1380 #define DO_VCMP(INSN, FN) \ 1381 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1382 { \ 1383 static MVEGenCmpFn * const fns[] = { \ 1384 gen_helper_mve_##FN##b, \ 1385 gen_helper_mve_##FN##h, \ 1386 gen_helper_mve_##FN##w, \ 1387 NULL, \ 1388 }; \ 1389 return do_vcmp(s, a, fns[a->size]); \ 1390 } \ 1391 static bool trans_##INSN##_scalar(DisasContext *s, \ 1392 arg_vcmp_scalar *a) \ 1393 { \ 1394 static MVEGenScalarCmpFn * const fns[] = { \ 1395 gen_helper_mve_##FN##_scalarb, \ 1396 gen_helper_mve_##FN##_scalarh, \ 1397 gen_helper_mve_##FN##_scalarw, \ 1398 NULL, \ 1399 }; \ 1400 return do_vcmp_scalar(s, a, fns[a->size]); \ 1401 } 1402 1403 DO_VCMP(VCMPEQ, vcmpeq) 1404 DO_VCMP(VCMPNE, vcmpne) 1405 DO_VCMP(VCMPCS, vcmpcs) 1406 DO_VCMP(VCMPHI, vcmphi) 1407 DO_VCMP(VCMPGE, vcmpge) 1408 DO_VCMP(VCMPLT, vcmplt) 1409 DO_VCMP(VCMPGT, vcmpgt) 1410 DO_VCMP(VCMPLE, vcmple) 1411 1412 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1413 { 1414 /* 1415 * MIN/MAX operations across a vector: compute the min or 1416 * max of the initial value in a general purpose register 1417 * and all the elements in the vector, and store it back 1418 * into the general purpose register. 1419 */ 1420 TCGv_ptr qm; 1421 TCGv_i32 rda; 1422 1423 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1424 !fn || a->rda == 13 || a->rda == 15) { 1425 /* Rda cases are UNPREDICTABLE */ 1426 return false; 1427 } 1428 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1429 return true; 1430 } 1431 1432 qm = mve_qreg_ptr(a->qm); 1433 rda = load_reg(s, a->rda); 1434 fn(rda, cpu_env, qm, rda); 1435 store_reg(s, a->rda, rda); 1436 tcg_temp_free_ptr(qm); 1437 mve_update_eci(s); 1438 return true; 1439 } 1440 1441 #define DO_VMAXV(INSN, FN) \ 1442 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1443 { \ 1444 static MVEGenVADDVFn * const fns[] = { \ 1445 gen_helper_mve_##FN##b, \ 1446 gen_helper_mve_##FN##h, \ 1447 gen_helper_mve_##FN##w, \ 1448 NULL, \ 1449 }; \ 1450 return do_vmaxv(s, a, fns[a->size]); \ 1451 } 1452 1453 DO_VMAXV(VMAXV_S, vmaxvs) 1454 DO_VMAXV(VMAXV_U, vmaxvu) 1455 DO_VMAXV(VMAXAV, vmaxav) 1456 DO_VMAXV(VMINV_S, vminvs) 1457 DO_VMAXV(VMINV_U, vminvu) 1458 DO_VMAXV(VMINAV, vminav) 1459 1460 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1461 { 1462 /* Absolute difference accumulated across vector */ 1463 TCGv_ptr qn, qm; 1464 TCGv_i32 rda; 1465 1466 if (!dc_isar_feature(aa32_mve, s) || 1467 !mve_check_qreg_bank(s, a->qm | a->qn) || 1468 !fn || a->rda == 13 || a->rda == 15) { 1469 /* Rda cases are UNPREDICTABLE */ 1470 return false; 1471 } 1472 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1473 return true; 1474 } 1475 1476 qm = mve_qreg_ptr(a->qm); 1477 qn = mve_qreg_ptr(a->qn); 1478 rda = load_reg(s, a->rda); 1479 fn(rda, cpu_env, qn, qm, rda); 1480 store_reg(s, a->rda, rda); 1481 tcg_temp_free_ptr(qm); 1482 tcg_temp_free_ptr(qn); 1483 mve_update_eci(s); 1484 return true; 1485 } 1486 1487 #define DO_VABAV(INSN, FN) \ 1488 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1489 { \ 1490 static MVEGenVABAVFn * const fns[] = { \ 1491 gen_helper_mve_##FN##b, \ 1492 gen_helper_mve_##FN##h, \ 1493 gen_helper_mve_##FN##w, \ 1494 NULL, \ 1495 }; \ 1496 return do_vabav(s, a, fns[a->size]); \ 1497 } 1498 1499 DO_VABAV(VABAV_S, vabavs) 1500 DO_VABAV(VABAV_U, vabavu) 1501