1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 52 static inline long mve_qreg_offset(unsigned reg) 53 { 54 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 55 } 56 57 static TCGv_ptr mve_qreg_ptr(unsigned reg) 58 { 59 TCGv_ptr ret = tcg_temp_new_ptr(); 60 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 61 return ret; 62 } 63 64 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 65 { 66 /* 67 * Check whether Qregs are in range. For v8.1M only Q0..Q7 68 * are supported, see VFPSmallRegisterBank(). 69 */ 70 return qmask < 8; 71 } 72 73 bool mve_eci_check(DisasContext *s) 74 { 75 /* 76 * This is a beatwise insn: check that ECI is valid (not a 77 * reserved value) and note that we are handling it. 78 * Return true if OK, false if we generated an exception. 79 */ 80 s->eci_handled = true; 81 switch (s->eci) { 82 case ECI_NONE: 83 case ECI_A0: 84 case ECI_A0A1: 85 case ECI_A0A1A2: 86 case ECI_A0A1A2B0: 87 return true; 88 default: 89 /* Reserved value: INVSTATE UsageFault */ 90 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 91 default_exception_el(s)); 92 return false; 93 } 94 } 95 96 static void mve_update_eci(DisasContext *s) 97 { 98 /* 99 * The helper function will always update the CPUState field, 100 * so we only need to update the DisasContext field. 101 */ 102 if (s->eci) { 103 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 104 } 105 } 106 107 void mve_update_and_store_eci(DisasContext *s) 108 { 109 /* 110 * For insns which don't call a helper function that will call 111 * mve_advance_vpt(), this version updates s->eci and also stores 112 * it out to the CPUState field. 113 */ 114 if (s->eci) { 115 mve_update_eci(s); 116 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 117 } 118 } 119 120 static bool mve_skip_first_beat(DisasContext *s) 121 { 122 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 123 switch (s->eci) { 124 case ECI_NONE: 125 return false; 126 case ECI_A0: 127 case ECI_A0A1: 128 case ECI_A0A1A2: 129 case ECI_A0A1A2B0: 130 return true; 131 default: 132 g_assert_not_reached(); 133 } 134 } 135 136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 137 unsigned msize) 138 { 139 TCGv_i32 addr; 140 uint32_t offset; 141 TCGv_ptr qreg; 142 143 if (!dc_isar_feature(aa32_mve, s) || 144 !mve_check_qreg_bank(s, a->qd) || 145 !fn) { 146 return false; 147 } 148 149 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 150 if (a->rn == 15 || (a->rn == 13 && a->w)) { 151 return false; 152 } 153 154 if (!mve_eci_check(s) || !vfp_access_check(s)) { 155 return true; 156 } 157 158 offset = a->imm << msize; 159 if (!a->a) { 160 offset = -offset; 161 } 162 addr = load_reg(s, a->rn); 163 if (a->p) { 164 tcg_gen_addi_i32(addr, addr, offset); 165 } 166 167 qreg = mve_qreg_ptr(a->qd); 168 fn(cpu_env, qreg, addr); 169 tcg_temp_free_ptr(qreg); 170 171 /* 172 * Writeback always happens after the last beat of the insn, 173 * regardless of predication 174 */ 175 if (a->w) { 176 if (!a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 store_reg(s, a->rn, addr); 180 } else { 181 tcg_temp_free_i32(addr); 182 } 183 mve_update_eci(s); 184 return true; 185 } 186 187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 188 { 189 static MVEGenLdStFn * const ldstfns[4][2] = { 190 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 191 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 192 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 193 { NULL, NULL } 194 }; 195 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 196 } 197 198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 199 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 200 { \ 201 static MVEGenLdStFn * const ldstfns[2][2] = { \ 202 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 203 { NULL, gen_helper_mve_##ULD }, \ 204 }; \ 205 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 206 } 207 208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 211 212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 213 { 214 TCGv_ptr qd; 215 TCGv_i32 rt; 216 217 if (!dc_isar_feature(aa32_mve, s) || 218 !mve_check_qreg_bank(s, a->qd)) { 219 return false; 220 } 221 if (a->rt == 13 || a->rt == 15) { 222 /* UNPREDICTABLE; we choose to UNDEF */ 223 return false; 224 } 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 qd = mve_qreg_ptr(a->qd); 230 rt = load_reg(s, a->rt); 231 tcg_gen_dup_i32(a->size, rt, rt); 232 gen_helper_mve_vdup(cpu_env, qd, rt); 233 tcg_temp_free_ptr(qd); 234 tcg_temp_free_i32(rt); 235 mve_update_eci(s); 236 return true; 237 } 238 239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 240 { 241 TCGv_ptr qd, qm; 242 243 if (!dc_isar_feature(aa32_mve, s) || 244 !mve_check_qreg_bank(s, a->qd | a->qm) || 245 !fn) { 246 return false; 247 } 248 249 if (!mve_eci_check(s) || !vfp_access_check(s)) { 250 return true; 251 } 252 253 qd = mve_qreg_ptr(a->qd); 254 qm = mve_qreg_ptr(a->qm); 255 fn(cpu_env, qd, qm); 256 tcg_temp_free_ptr(qd); 257 tcg_temp_free_ptr(qm); 258 mve_update_eci(s); 259 return true; 260 } 261 262 #define DO_1OP(INSN, FN) \ 263 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 264 { \ 265 static MVEGenOneOpFn * const fns[] = { \ 266 gen_helper_mve_##FN##b, \ 267 gen_helper_mve_##FN##h, \ 268 gen_helper_mve_##FN##w, \ 269 NULL, \ 270 }; \ 271 return do_1op(s, a, fns[a->size]); \ 272 } 273 274 DO_1OP(VCLZ, vclz) 275 DO_1OP(VCLS, vcls) 276 DO_1OP(VABS, vabs) 277 DO_1OP(VNEG, vneg) 278 DO_1OP(VQABS, vqabs) 279 DO_1OP(VQNEG, vqneg) 280 281 /* Narrowing moves: only size 0 and 1 are valid */ 282 #define DO_VMOVN(INSN, FN) \ 283 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 284 { \ 285 static MVEGenOneOpFn * const fns[] = { \ 286 gen_helper_mve_##FN##b, \ 287 gen_helper_mve_##FN##h, \ 288 NULL, \ 289 NULL, \ 290 }; \ 291 return do_1op(s, a, fns[a->size]); \ 292 } 293 294 DO_VMOVN(VMOVNB, vmovnb) 295 DO_VMOVN(VMOVNT, vmovnt) 296 DO_VMOVN(VQMOVUNB, vqmovunb) 297 DO_VMOVN(VQMOVUNT, vqmovunt) 298 DO_VMOVN(VQMOVN_BS, vqmovnbs) 299 DO_VMOVN(VQMOVN_TS, vqmovnts) 300 DO_VMOVN(VQMOVN_BU, vqmovnbu) 301 DO_VMOVN(VQMOVN_TU, vqmovntu) 302 303 static bool trans_VREV16(DisasContext *s, arg_1op *a) 304 { 305 static MVEGenOneOpFn * const fns[] = { 306 gen_helper_mve_vrev16b, 307 NULL, 308 NULL, 309 NULL, 310 }; 311 return do_1op(s, a, fns[a->size]); 312 } 313 314 static bool trans_VREV32(DisasContext *s, arg_1op *a) 315 { 316 static MVEGenOneOpFn * const fns[] = { 317 gen_helper_mve_vrev32b, 318 gen_helper_mve_vrev32h, 319 NULL, 320 NULL, 321 }; 322 return do_1op(s, a, fns[a->size]); 323 } 324 325 static bool trans_VREV64(DisasContext *s, arg_1op *a) 326 { 327 static MVEGenOneOpFn * const fns[] = { 328 gen_helper_mve_vrev64b, 329 gen_helper_mve_vrev64h, 330 gen_helper_mve_vrev64w, 331 NULL, 332 }; 333 return do_1op(s, a, fns[a->size]); 334 } 335 336 static bool trans_VMVN(DisasContext *s, arg_1op *a) 337 { 338 return do_1op(s, a, gen_helper_mve_vmvn); 339 } 340 341 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 342 { 343 static MVEGenOneOpFn * const fns[] = { 344 NULL, 345 gen_helper_mve_vfabsh, 346 gen_helper_mve_vfabss, 347 NULL, 348 }; 349 if (!dc_isar_feature(aa32_mve_fp, s)) { 350 return false; 351 } 352 return do_1op(s, a, fns[a->size]); 353 } 354 355 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 356 { 357 static MVEGenOneOpFn * const fns[] = { 358 NULL, 359 gen_helper_mve_vfnegh, 360 gen_helper_mve_vfnegs, 361 NULL, 362 }; 363 if (!dc_isar_feature(aa32_mve_fp, s)) { 364 return false; 365 } 366 return do_1op(s, a, fns[a->size]); 367 } 368 369 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 370 { 371 TCGv_ptr qd, qn, qm; 372 373 if (!dc_isar_feature(aa32_mve, s) || 374 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 375 !fn) { 376 return false; 377 } 378 if (!mve_eci_check(s) || !vfp_access_check(s)) { 379 return true; 380 } 381 382 qd = mve_qreg_ptr(a->qd); 383 qn = mve_qreg_ptr(a->qn); 384 qm = mve_qreg_ptr(a->qm); 385 fn(cpu_env, qd, qn, qm); 386 tcg_temp_free_ptr(qd); 387 tcg_temp_free_ptr(qn); 388 tcg_temp_free_ptr(qm); 389 mve_update_eci(s); 390 return true; 391 } 392 393 #define DO_LOGIC(INSN, HELPER) \ 394 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 395 { \ 396 return do_2op(s, a, HELPER); \ 397 } 398 399 DO_LOGIC(VAND, gen_helper_mve_vand) 400 DO_LOGIC(VBIC, gen_helper_mve_vbic) 401 DO_LOGIC(VORR, gen_helper_mve_vorr) 402 DO_LOGIC(VORN, gen_helper_mve_vorn) 403 DO_LOGIC(VEOR, gen_helper_mve_veor) 404 405 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 406 407 #define DO_2OP(INSN, FN) \ 408 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 409 { \ 410 static MVEGenTwoOpFn * const fns[] = { \ 411 gen_helper_mve_##FN##b, \ 412 gen_helper_mve_##FN##h, \ 413 gen_helper_mve_##FN##w, \ 414 NULL, \ 415 }; \ 416 return do_2op(s, a, fns[a->size]); \ 417 } 418 419 DO_2OP(VADD, vadd) 420 DO_2OP(VSUB, vsub) 421 DO_2OP(VMUL, vmul) 422 DO_2OP(VMULH_S, vmulhs) 423 DO_2OP(VMULH_U, vmulhu) 424 DO_2OP(VRMULH_S, vrmulhs) 425 DO_2OP(VRMULH_U, vrmulhu) 426 DO_2OP(VMAX_S, vmaxs) 427 DO_2OP(VMAX_U, vmaxu) 428 DO_2OP(VMIN_S, vmins) 429 DO_2OP(VMIN_U, vminu) 430 DO_2OP(VABD_S, vabds) 431 DO_2OP(VABD_U, vabdu) 432 DO_2OP(VHADD_S, vhadds) 433 DO_2OP(VHADD_U, vhaddu) 434 DO_2OP(VHSUB_S, vhsubs) 435 DO_2OP(VHSUB_U, vhsubu) 436 DO_2OP(VMULL_BS, vmullbs) 437 DO_2OP(VMULL_BU, vmullbu) 438 DO_2OP(VMULL_TS, vmullts) 439 DO_2OP(VMULL_TU, vmulltu) 440 DO_2OP(VQDMULH, vqdmulh) 441 DO_2OP(VQRDMULH, vqrdmulh) 442 DO_2OP(VQADD_S, vqadds) 443 DO_2OP(VQADD_U, vqaddu) 444 DO_2OP(VQSUB_S, vqsubs) 445 DO_2OP(VQSUB_U, vqsubu) 446 DO_2OP(VSHL_S, vshls) 447 DO_2OP(VSHL_U, vshlu) 448 DO_2OP(VRSHL_S, vrshls) 449 DO_2OP(VRSHL_U, vrshlu) 450 DO_2OP(VQSHL_S, vqshls) 451 DO_2OP(VQSHL_U, vqshlu) 452 DO_2OP(VQRSHL_S, vqrshls) 453 DO_2OP(VQRSHL_U, vqrshlu) 454 DO_2OP(VQDMLADH, vqdmladh) 455 DO_2OP(VQDMLADHX, vqdmladhx) 456 DO_2OP(VQRDMLADH, vqrdmladh) 457 DO_2OP(VQRDMLADHX, vqrdmladhx) 458 DO_2OP(VQDMLSDH, vqdmlsdh) 459 DO_2OP(VQDMLSDHX, vqdmlsdhx) 460 DO_2OP(VQRDMLSDH, vqrdmlsdh) 461 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 462 DO_2OP(VRHADD_S, vrhadds) 463 DO_2OP(VRHADD_U, vrhaddu) 464 /* 465 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 466 * so we can reuse the DO_2OP macro. (Our implementation calculates the 467 * "expected" results in this case.) Similarly for VHCADD. 468 */ 469 DO_2OP(VCADD90, vcadd90) 470 DO_2OP(VCADD270, vcadd270) 471 DO_2OP(VHCADD90, vhcadd90) 472 DO_2OP(VHCADD270, vhcadd270) 473 474 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 475 { 476 static MVEGenTwoOpFn * const fns[] = { 477 NULL, 478 gen_helper_mve_vqdmullbh, 479 gen_helper_mve_vqdmullbw, 480 NULL, 481 }; 482 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 483 /* UNPREDICTABLE; we choose to undef */ 484 return false; 485 } 486 return do_2op(s, a, fns[a->size]); 487 } 488 489 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 490 { 491 static MVEGenTwoOpFn * const fns[] = { 492 NULL, 493 gen_helper_mve_vqdmullth, 494 gen_helper_mve_vqdmulltw, 495 NULL, 496 }; 497 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 498 /* UNPREDICTABLE; we choose to undef */ 499 return false; 500 } 501 return do_2op(s, a, fns[a->size]); 502 } 503 504 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 505 { 506 /* 507 * Note that a->size indicates the output size, ie VMULL.P8 508 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 509 * is the 16x16->32 operation and a->size is MO_32. 510 */ 511 static MVEGenTwoOpFn * const fns[] = { 512 NULL, 513 gen_helper_mve_vmullpbh, 514 gen_helper_mve_vmullpbw, 515 NULL, 516 }; 517 return do_2op(s, a, fns[a->size]); 518 } 519 520 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 521 { 522 /* a->size is as for trans_VMULLP_B */ 523 static MVEGenTwoOpFn * const fns[] = { 524 NULL, 525 gen_helper_mve_vmullpth, 526 gen_helper_mve_vmullptw, 527 NULL, 528 }; 529 return do_2op(s, a, fns[a->size]); 530 } 531 532 /* 533 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 534 * of the 32-bit elements in each lane of the input vectors, where the 535 * carry-out of each add is the carry-in of the next. The initial carry 536 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 537 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 538 * These insns are subject to beat-wise execution. Partial execution 539 * of an I=1 (initial carry input fixed) insn which does not 540 * execute the first beat must start with the current FPSCR.NZCV 541 * value, not the fixed constant input. 542 */ 543 static bool trans_VADC(DisasContext *s, arg_2op *a) 544 { 545 return do_2op(s, a, gen_helper_mve_vadc); 546 } 547 548 static bool trans_VADCI(DisasContext *s, arg_2op *a) 549 { 550 if (mve_skip_first_beat(s)) { 551 return trans_VADC(s, a); 552 } 553 return do_2op(s, a, gen_helper_mve_vadci); 554 } 555 556 static bool trans_VSBC(DisasContext *s, arg_2op *a) 557 { 558 return do_2op(s, a, gen_helper_mve_vsbc); 559 } 560 561 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 562 { 563 if (mve_skip_first_beat(s)) { 564 return trans_VSBC(s, a); 565 } 566 return do_2op(s, a, gen_helper_mve_vsbci); 567 } 568 569 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 570 MVEGenTwoOpScalarFn fn) 571 { 572 TCGv_ptr qd, qn; 573 TCGv_i32 rm; 574 575 if (!dc_isar_feature(aa32_mve, s) || 576 !mve_check_qreg_bank(s, a->qd | a->qn) || 577 !fn) { 578 return false; 579 } 580 if (a->rm == 13 || a->rm == 15) { 581 /* UNPREDICTABLE */ 582 return false; 583 } 584 if (!mve_eci_check(s) || !vfp_access_check(s)) { 585 return true; 586 } 587 588 qd = mve_qreg_ptr(a->qd); 589 qn = mve_qreg_ptr(a->qn); 590 rm = load_reg(s, a->rm); 591 fn(cpu_env, qd, qn, rm); 592 tcg_temp_free_i32(rm); 593 tcg_temp_free_ptr(qd); 594 tcg_temp_free_ptr(qn); 595 mve_update_eci(s); 596 return true; 597 } 598 599 #define DO_2OP_SCALAR(INSN, FN) \ 600 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 601 { \ 602 static MVEGenTwoOpScalarFn * const fns[] = { \ 603 gen_helper_mve_##FN##b, \ 604 gen_helper_mve_##FN##h, \ 605 gen_helper_mve_##FN##w, \ 606 NULL, \ 607 }; \ 608 return do_2op_scalar(s, a, fns[a->size]); \ 609 } 610 611 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 612 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 613 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 614 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 615 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 616 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 617 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 618 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 619 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 620 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 621 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 622 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 623 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 624 DO_2OP_SCALAR(VBRSR, vbrsr) 625 DO_2OP_SCALAR(VMLA, vmla) 626 DO_2OP_SCALAR(VMLAS, vmlas) 627 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 628 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 629 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 630 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 631 632 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 633 { 634 static MVEGenTwoOpScalarFn * const fns[] = { 635 NULL, 636 gen_helper_mve_vqdmullb_scalarh, 637 gen_helper_mve_vqdmullb_scalarw, 638 NULL, 639 }; 640 if (a->qd == a->qn && a->size == MO_32) { 641 /* UNPREDICTABLE; we choose to undef */ 642 return false; 643 } 644 return do_2op_scalar(s, a, fns[a->size]); 645 } 646 647 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 648 { 649 static MVEGenTwoOpScalarFn * const fns[] = { 650 NULL, 651 gen_helper_mve_vqdmullt_scalarh, 652 gen_helper_mve_vqdmullt_scalarw, 653 NULL, 654 }; 655 if (a->qd == a->qn && a->size == MO_32) { 656 /* UNPREDICTABLE; we choose to undef */ 657 return false; 658 } 659 return do_2op_scalar(s, a, fns[a->size]); 660 } 661 662 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 663 MVEGenLongDualAccOpFn *fn) 664 { 665 TCGv_ptr qn, qm; 666 TCGv_i64 rda; 667 TCGv_i32 rdalo, rdahi; 668 669 if (!dc_isar_feature(aa32_mve, s) || 670 !mve_check_qreg_bank(s, a->qn | a->qm) || 671 !fn) { 672 return false; 673 } 674 /* 675 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 676 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 677 */ 678 if (a->rdahi == 13 || a->rdahi == 15) { 679 return false; 680 } 681 if (!mve_eci_check(s) || !vfp_access_check(s)) { 682 return true; 683 } 684 685 qn = mve_qreg_ptr(a->qn); 686 qm = mve_qreg_ptr(a->qm); 687 688 /* 689 * This insn is subject to beat-wise execution. Partial execution 690 * of an A=0 (no-accumulate) insn which does not execute the first 691 * beat must start with the current rda value, not 0. 692 */ 693 if (a->a || mve_skip_first_beat(s)) { 694 rda = tcg_temp_new_i64(); 695 rdalo = load_reg(s, a->rdalo); 696 rdahi = load_reg(s, a->rdahi); 697 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 698 tcg_temp_free_i32(rdalo); 699 tcg_temp_free_i32(rdahi); 700 } else { 701 rda = tcg_const_i64(0); 702 } 703 704 fn(rda, cpu_env, qn, qm, rda); 705 tcg_temp_free_ptr(qn); 706 tcg_temp_free_ptr(qm); 707 708 rdalo = tcg_temp_new_i32(); 709 rdahi = tcg_temp_new_i32(); 710 tcg_gen_extrl_i64_i32(rdalo, rda); 711 tcg_gen_extrh_i64_i32(rdahi, rda); 712 store_reg(s, a->rdalo, rdalo); 713 store_reg(s, a->rdahi, rdahi); 714 tcg_temp_free_i64(rda); 715 mve_update_eci(s); 716 return true; 717 } 718 719 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 720 { 721 static MVEGenLongDualAccOpFn * const fns[4][2] = { 722 { NULL, NULL }, 723 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 724 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 725 { NULL, NULL }, 726 }; 727 return do_long_dual_acc(s, a, fns[a->size][a->x]); 728 } 729 730 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 731 { 732 static MVEGenLongDualAccOpFn * const fns[4][2] = { 733 { NULL, NULL }, 734 { gen_helper_mve_vmlaldavuh, NULL }, 735 { gen_helper_mve_vmlaldavuw, NULL }, 736 { NULL, NULL }, 737 }; 738 return do_long_dual_acc(s, a, fns[a->size][a->x]); 739 } 740 741 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 742 { 743 static MVEGenLongDualAccOpFn * const fns[4][2] = { 744 { NULL, NULL }, 745 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 746 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 747 { NULL, NULL }, 748 }; 749 return do_long_dual_acc(s, a, fns[a->size][a->x]); 750 } 751 752 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 753 { 754 static MVEGenLongDualAccOpFn * const fns[] = { 755 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 756 }; 757 return do_long_dual_acc(s, a, fns[a->x]); 758 } 759 760 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 761 { 762 static MVEGenLongDualAccOpFn * const fns[] = { 763 gen_helper_mve_vrmlaldavhuw, NULL, 764 }; 765 return do_long_dual_acc(s, a, fns[a->x]); 766 } 767 768 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 769 { 770 static MVEGenLongDualAccOpFn * const fns[] = { 771 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 772 }; 773 return do_long_dual_acc(s, a, fns[a->x]); 774 } 775 776 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 777 { 778 TCGv_ptr qn, qm; 779 TCGv_i32 rda; 780 781 if (!dc_isar_feature(aa32_mve, s) || 782 !mve_check_qreg_bank(s, a->qn) || 783 !fn) { 784 return false; 785 } 786 if (!mve_eci_check(s) || !vfp_access_check(s)) { 787 return true; 788 } 789 790 qn = mve_qreg_ptr(a->qn); 791 qm = mve_qreg_ptr(a->qm); 792 793 /* 794 * This insn is subject to beat-wise execution. Partial execution 795 * of an A=0 (no-accumulate) insn which does not execute the first 796 * beat must start with the current rda value, not 0. 797 */ 798 if (a->a || mve_skip_first_beat(s)) { 799 rda = load_reg(s, a->rda); 800 } else { 801 rda = tcg_const_i32(0); 802 } 803 804 fn(rda, cpu_env, qn, qm, rda); 805 store_reg(s, a->rda, rda); 806 tcg_temp_free_ptr(qn); 807 tcg_temp_free_ptr(qm); 808 809 mve_update_eci(s); 810 return true; 811 } 812 813 #define DO_DUAL_ACC(INSN, FN) \ 814 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 815 { \ 816 static MVEGenDualAccOpFn * const fns[4][2] = { \ 817 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 818 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 819 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 820 { NULL, NULL }, \ 821 }; \ 822 return do_dual_acc(s, a, fns[a->size][a->x]); \ 823 } 824 825 DO_DUAL_ACC(VMLADAV_S, vmladavs) 826 DO_DUAL_ACC(VMLSDAV, vmlsdav) 827 828 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 829 { 830 static MVEGenDualAccOpFn * const fns[4][2] = { 831 { gen_helper_mve_vmladavub, NULL }, 832 { gen_helper_mve_vmladavuh, NULL }, 833 { gen_helper_mve_vmladavuw, NULL }, 834 { NULL, NULL }, 835 }; 836 return do_dual_acc(s, a, fns[a->size][a->x]); 837 } 838 839 static void gen_vpst(DisasContext *s, uint32_t mask) 840 { 841 /* 842 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 843 * being adjacent fields in the register. 844 * 845 * Updating the masks is not predicated, but it is subject to beat-wise 846 * execution, and the mask is updated on the odd-numbered beats. 847 * So if PSR.ECI says we should skip beat 1, we mustn't update the 848 * 01 mask field. 849 */ 850 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 851 switch (s->eci) { 852 case ECI_NONE: 853 case ECI_A0: 854 /* Update both 01 and 23 fields */ 855 tcg_gen_deposit_i32(vpr, vpr, 856 tcg_constant_i32(mask | (mask << 4)), 857 R_V7M_VPR_MASK01_SHIFT, 858 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 859 break; 860 case ECI_A0A1: 861 case ECI_A0A1A2: 862 case ECI_A0A1A2B0: 863 /* Update only the 23 mask field */ 864 tcg_gen_deposit_i32(vpr, vpr, 865 tcg_constant_i32(mask), 866 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 867 break; 868 default: 869 g_assert_not_reached(); 870 } 871 store_cpu_field(vpr, v7m.vpr); 872 } 873 874 static bool trans_VPST(DisasContext *s, arg_VPST *a) 875 { 876 /* mask == 0 is a "related encoding" */ 877 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 878 return false; 879 } 880 if (!mve_eci_check(s) || !vfp_access_check(s)) { 881 return true; 882 } 883 gen_vpst(s, a->mask); 884 mve_update_and_store_eci(s); 885 return true; 886 } 887 888 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 889 { 890 /* VADDV: vector add across vector */ 891 static MVEGenVADDVFn * const fns[4][2] = { 892 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 893 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 894 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 895 { NULL, NULL } 896 }; 897 TCGv_ptr qm; 898 TCGv_i32 rda; 899 900 if (!dc_isar_feature(aa32_mve, s) || 901 a->size == 3) { 902 return false; 903 } 904 if (!mve_eci_check(s) || !vfp_access_check(s)) { 905 return true; 906 } 907 908 /* 909 * This insn is subject to beat-wise execution. Partial execution 910 * of an A=0 (no-accumulate) insn which does not execute the first 911 * beat must start with the current value of Rda, not zero. 912 */ 913 if (a->a || mve_skip_first_beat(s)) { 914 /* Accumulate input from Rda */ 915 rda = load_reg(s, a->rda); 916 } else { 917 /* Accumulate starting at zero */ 918 rda = tcg_const_i32(0); 919 } 920 921 qm = mve_qreg_ptr(a->qm); 922 fns[a->size][a->u](rda, cpu_env, qm, rda); 923 store_reg(s, a->rda, rda); 924 tcg_temp_free_ptr(qm); 925 926 mve_update_eci(s); 927 return true; 928 } 929 930 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 931 { 932 /* 933 * Vector Add Long Across Vector: accumulate the 32-bit 934 * elements of the vector into a 64-bit result stored in 935 * a pair of general-purpose registers. 936 * No need to check Qm's bank: it is only 3 bits in decode. 937 */ 938 TCGv_ptr qm; 939 TCGv_i64 rda; 940 TCGv_i32 rdalo, rdahi; 941 942 if (!dc_isar_feature(aa32_mve, s)) { 943 return false; 944 } 945 /* 946 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 947 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 948 */ 949 if (a->rdahi == 13 || a->rdahi == 15) { 950 return false; 951 } 952 if (!mve_eci_check(s) || !vfp_access_check(s)) { 953 return true; 954 } 955 956 /* 957 * This insn is subject to beat-wise execution. Partial execution 958 * of an A=0 (no-accumulate) insn which does not execute the first 959 * beat must start with the current value of RdaHi:RdaLo, not zero. 960 */ 961 if (a->a || mve_skip_first_beat(s)) { 962 /* Accumulate input from RdaHi:RdaLo */ 963 rda = tcg_temp_new_i64(); 964 rdalo = load_reg(s, a->rdalo); 965 rdahi = load_reg(s, a->rdahi); 966 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 967 tcg_temp_free_i32(rdalo); 968 tcg_temp_free_i32(rdahi); 969 } else { 970 /* Accumulate starting at zero */ 971 rda = tcg_const_i64(0); 972 } 973 974 qm = mve_qreg_ptr(a->qm); 975 if (a->u) { 976 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 977 } else { 978 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 979 } 980 tcg_temp_free_ptr(qm); 981 982 rdalo = tcg_temp_new_i32(); 983 rdahi = tcg_temp_new_i32(); 984 tcg_gen_extrl_i64_i32(rdalo, rda); 985 tcg_gen_extrh_i64_i32(rdahi, rda); 986 store_reg(s, a->rdalo, rdalo); 987 store_reg(s, a->rdahi, rdahi); 988 tcg_temp_free_i64(rda); 989 mve_update_eci(s); 990 return true; 991 } 992 993 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 994 { 995 TCGv_ptr qd; 996 uint64_t imm; 997 998 if (!dc_isar_feature(aa32_mve, s) || 999 !mve_check_qreg_bank(s, a->qd) || 1000 !fn) { 1001 return false; 1002 } 1003 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1004 return true; 1005 } 1006 1007 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1008 1009 qd = mve_qreg_ptr(a->qd); 1010 fn(cpu_env, qd, tcg_constant_i64(imm)); 1011 tcg_temp_free_ptr(qd); 1012 mve_update_eci(s); 1013 return true; 1014 } 1015 1016 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1017 { 1018 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1019 MVEGenOneOpImmFn *fn; 1020 1021 if ((a->cmode & 1) && a->cmode < 12) { 1022 if (a->op) { 1023 /* 1024 * For op=1, the immediate will be inverted by asimd_imm_const(), 1025 * so the VBIC becomes a logical AND operation. 1026 */ 1027 fn = gen_helper_mve_vandi; 1028 } else { 1029 fn = gen_helper_mve_vorri; 1030 } 1031 } else { 1032 /* There is one unallocated cmode/op combination in this space */ 1033 if (a->cmode == 15 && a->op == 1) { 1034 return false; 1035 } 1036 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1037 fn = gen_helper_mve_vmovi; 1038 } 1039 return do_1imm(s, a, fn); 1040 } 1041 1042 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1043 bool negateshift) 1044 { 1045 TCGv_ptr qd, qm; 1046 int shift = a->shift; 1047 1048 if (!dc_isar_feature(aa32_mve, s) || 1049 !mve_check_qreg_bank(s, a->qd | a->qm) || 1050 !fn) { 1051 return false; 1052 } 1053 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1054 return true; 1055 } 1056 1057 /* 1058 * When we handle a right shift insn using a left-shift helper 1059 * which permits a negative shift count to indicate a right-shift, 1060 * we must negate the shift count. 1061 */ 1062 if (negateshift) { 1063 shift = -shift; 1064 } 1065 1066 qd = mve_qreg_ptr(a->qd); 1067 qm = mve_qreg_ptr(a->qm); 1068 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1069 tcg_temp_free_ptr(qd); 1070 tcg_temp_free_ptr(qm); 1071 mve_update_eci(s); 1072 return true; 1073 } 1074 1075 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1076 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1077 { \ 1078 static MVEGenTwoOpShiftFn * const fns[] = { \ 1079 gen_helper_mve_##FN##b, \ 1080 gen_helper_mve_##FN##h, \ 1081 gen_helper_mve_##FN##w, \ 1082 NULL, \ 1083 }; \ 1084 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1085 } 1086 1087 DO_2SHIFT(VSHLI, vshli_u, false) 1088 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1089 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1090 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1091 /* These right shifts use a left-shift helper with negated shift count */ 1092 DO_2SHIFT(VSHRI_S, vshli_s, true) 1093 DO_2SHIFT(VSHRI_U, vshli_u, true) 1094 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1095 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1096 1097 DO_2SHIFT(VSRI, vsri, false) 1098 DO_2SHIFT(VSLI, vsli, false) 1099 1100 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1101 MVEGenTwoOpShiftFn *fn) 1102 { 1103 TCGv_ptr qda; 1104 TCGv_i32 rm; 1105 1106 if (!dc_isar_feature(aa32_mve, s) || 1107 !mve_check_qreg_bank(s, a->qda) || 1108 a->rm == 13 || a->rm == 15 || !fn) { 1109 /* Rm cases are UNPREDICTABLE */ 1110 return false; 1111 } 1112 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1113 return true; 1114 } 1115 1116 qda = mve_qreg_ptr(a->qda); 1117 rm = load_reg(s, a->rm); 1118 fn(cpu_env, qda, qda, rm); 1119 tcg_temp_free_ptr(qda); 1120 tcg_temp_free_i32(rm); 1121 mve_update_eci(s); 1122 return true; 1123 } 1124 1125 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1126 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1127 { \ 1128 static MVEGenTwoOpShiftFn * const fns[] = { \ 1129 gen_helper_mve_##FN##b, \ 1130 gen_helper_mve_##FN##h, \ 1131 gen_helper_mve_##FN##w, \ 1132 NULL, \ 1133 }; \ 1134 return do_2shift_scalar(s, a, fns[a->size]); \ 1135 } 1136 1137 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1138 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1139 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1140 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1141 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1142 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1143 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1144 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1145 1146 #define DO_VSHLL(INSN, FN) \ 1147 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1148 { \ 1149 static MVEGenTwoOpShiftFn * const fns[] = { \ 1150 gen_helper_mve_##FN##b, \ 1151 gen_helper_mve_##FN##h, \ 1152 }; \ 1153 return do_2shift(s, a, fns[a->size], false); \ 1154 } 1155 1156 DO_VSHLL(VSHLL_BS, vshllbs) 1157 DO_VSHLL(VSHLL_BU, vshllbu) 1158 DO_VSHLL(VSHLL_TS, vshllts) 1159 DO_VSHLL(VSHLL_TU, vshlltu) 1160 1161 #define DO_2SHIFT_N(INSN, FN) \ 1162 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1163 { \ 1164 static MVEGenTwoOpShiftFn * const fns[] = { \ 1165 gen_helper_mve_##FN##b, \ 1166 gen_helper_mve_##FN##h, \ 1167 }; \ 1168 return do_2shift(s, a, fns[a->size], false); \ 1169 } 1170 1171 DO_2SHIFT_N(VSHRNB, vshrnb) 1172 DO_2SHIFT_N(VSHRNT, vshrnt) 1173 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1174 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1175 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1176 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1177 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1178 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1179 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1180 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1181 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1182 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1183 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1184 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1185 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1186 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1187 1188 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1189 { 1190 /* 1191 * Whole Vector Left Shift with Carry. The carry is taken 1192 * from a general purpose register and written back there. 1193 * An imm of 0 means "shift by 32". 1194 */ 1195 TCGv_ptr qd; 1196 TCGv_i32 rdm; 1197 1198 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1199 return false; 1200 } 1201 if (a->rdm == 13 || a->rdm == 15) { 1202 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1203 return false; 1204 } 1205 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1206 return true; 1207 } 1208 1209 qd = mve_qreg_ptr(a->qd); 1210 rdm = load_reg(s, a->rdm); 1211 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1212 store_reg(s, a->rdm, rdm); 1213 tcg_temp_free_ptr(qd); 1214 mve_update_eci(s); 1215 return true; 1216 } 1217 1218 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1219 { 1220 TCGv_ptr qd; 1221 TCGv_i32 rn; 1222 1223 /* 1224 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1225 * This fills the vector with elements of successively increasing 1226 * or decreasing values, starting from Rn. 1227 */ 1228 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1229 return false; 1230 } 1231 if (a->size == MO_64) { 1232 /* size 0b11 is another encoding */ 1233 return false; 1234 } 1235 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1236 return true; 1237 } 1238 1239 qd = mve_qreg_ptr(a->qd); 1240 rn = load_reg(s, a->rn); 1241 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1242 store_reg(s, a->rn, rn); 1243 tcg_temp_free_ptr(qd); 1244 mve_update_eci(s); 1245 return true; 1246 } 1247 1248 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1249 { 1250 TCGv_ptr qd; 1251 TCGv_i32 rn, rm; 1252 1253 /* 1254 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1255 * This fills the vector with elements of successively increasing 1256 * or decreasing values, starting from Rn. Rm specifies a point where 1257 * the count wraps back around to 0. The updated offset is written back 1258 * to Rn. 1259 */ 1260 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1261 return false; 1262 } 1263 if (!fn || a->rm == 13 || a->rm == 15) { 1264 /* 1265 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1266 * Rm == 13 is VIWDUP, VDWDUP. 1267 */ 1268 return false; 1269 } 1270 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1271 return true; 1272 } 1273 1274 qd = mve_qreg_ptr(a->qd); 1275 rn = load_reg(s, a->rn); 1276 rm = load_reg(s, a->rm); 1277 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1278 store_reg(s, a->rn, rn); 1279 tcg_temp_free_ptr(qd); 1280 tcg_temp_free_i32(rm); 1281 mve_update_eci(s); 1282 return true; 1283 } 1284 1285 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1286 { 1287 static MVEGenVIDUPFn * const fns[] = { 1288 gen_helper_mve_vidupb, 1289 gen_helper_mve_viduph, 1290 gen_helper_mve_vidupw, 1291 NULL, 1292 }; 1293 return do_vidup(s, a, fns[a->size]); 1294 } 1295 1296 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1297 { 1298 static MVEGenVIDUPFn * const fns[] = { 1299 gen_helper_mve_vidupb, 1300 gen_helper_mve_viduph, 1301 gen_helper_mve_vidupw, 1302 NULL, 1303 }; 1304 /* VDDUP is just like VIDUP but with a negative immediate */ 1305 a->imm = -a->imm; 1306 return do_vidup(s, a, fns[a->size]); 1307 } 1308 1309 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1310 { 1311 static MVEGenVIWDUPFn * const fns[] = { 1312 gen_helper_mve_viwdupb, 1313 gen_helper_mve_viwduph, 1314 gen_helper_mve_viwdupw, 1315 NULL, 1316 }; 1317 return do_viwdup(s, a, fns[a->size]); 1318 } 1319 1320 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1321 { 1322 static MVEGenVIWDUPFn * const fns[] = { 1323 gen_helper_mve_vdwdupb, 1324 gen_helper_mve_vdwduph, 1325 gen_helper_mve_vdwdupw, 1326 NULL, 1327 }; 1328 return do_viwdup(s, a, fns[a->size]); 1329 } 1330 1331 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1332 { 1333 TCGv_ptr qn, qm; 1334 1335 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1336 !fn) { 1337 return false; 1338 } 1339 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1340 return true; 1341 } 1342 1343 qn = mve_qreg_ptr(a->qn); 1344 qm = mve_qreg_ptr(a->qm); 1345 fn(cpu_env, qn, qm); 1346 tcg_temp_free_ptr(qn); 1347 tcg_temp_free_ptr(qm); 1348 if (a->mask) { 1349 /* VPT */ 1350 gen_vpst(s, a->mask); 1351 } 1352 mve_update_eci(s); 1353 return true; 1354 } 1355 1356 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1357 MVEGenScalarCmpFn *fn) 1358 { 1359 TCGv_ptr qn; 1360 TCGv_i32 rm; 1361 1362 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1363 return false; 1364 } 1365 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1366 return true; 1367 } 1368 1369 qn = mve_qreg_ptr(a->qn); 1370 if (a->rm == 15) { 1371 /* Encoding Rm=0b1111 means "constant zero" */ 1372 rm = tcg_constant_i32(0); 1373 } else { 1374 rm = load_reg(s, a->rm); 1375 } 1376 fn(cpu_env, qn, rm); 1377 tcg_temp_free_ptr(qn); 1378 tcg_temp_free_i32(rm); 1379 if (a->mask) { 1380 /* VPT */ 1381 gen_vpst(s, a->mask); 1382 } 1383 mve_update_eci(s); 1384 return true; 1385 } 1386 1387 #define DO_VCMP(INSN, FN) \ 1388 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1389 { \ 1390 static MVEGenCmpFn * const fns[] = { \ 1391 gen_helper_mve_##FN##b, \ 1392 gen_helper_mve_##FN##h, \ 1393 gen_helper_mve_##FN##w, \ 1394 NULL, \ 1395 }; \ 1396 return do_vcmp(s, a, fns[a->size]); \ 1397 } \ 1398 static bool trans_##INSN##_scalar(DisasContext *s, \ 1399 arg_vcmp_scalar *a) \ 1400 { \ 1401 static MVEGenScalarCmpFn * const fns[] = { \ 1402 gen_helper_mve_##FN##_scalarb, \ 1403 gen_helper_mve_##FN##_scalarh, \ 1404 gen_helper_mve_##FN##_scalarw, \ 1405 NULL, \ 1406 }; \ 1407 return do_vcmp_scalar(s, a, fns[a->size]); \ 1408 } 1409 1410 DO_VCMP(VCMPEQ, vcmpeq) 1411 DO_VCMP(VCMPNE, vcmpne) 1412 DO_VCMP(VCMPCS, vcmpcs) 1413 DO_VCMP(VCMPHI, vcmphi) 1414 DO_VCMP(VCMPGE, vcmpge) 1415 DO_VCMP(VCMPLT, vcmplt) 1416 DO_VCMP(VCMPGT, vcmpgt) 1417 DO_VCMP(VCMPLE, vcmple) 1418 1419 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1420 { 1421 /* 1422 * MIN/MAX operations across a vector: compute the min or 1423 * max of the initial value in a general purpose register 1424 * and all the elements in the vector, and store it back 1425 * into the general purpose register. 1426 */ 1427 TCGv_ptr qm; 1428 TCGv_i32 rda; 1429 1430 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1431 !fn || a->rda == 13 || a->rda == 15) { 1432 /* Rda cases are UNPREDICTABLE */ 1433 return false; 1434 } 1435 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1436 return true; 1437 } 1438 1439 qm = mve_qreg_ptr(a->qm); 1440 rda = load_reg(s, a->rda); 1441 fn(rda, cpu_env, qm, rda); 1442 store_reg(s, a->rda, rda); 1443 tcg_temp_free_ptr(qm); 1444 mve_update_eci(s); 1445 return true; 1446 } 1447 1448 #define DO_VMAXV(INSN, FN) \ 1449 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1450 { \ 1451 static MVEGenVADDVFn * const fns[] = { \ 1452 gen_helper_mve_##FN##b, \ 1453 gen_helper_mve_##FN##h, \ 1454 gen_helper_mve_##FN##w, \ 1455 NULL, \ 1456 }; \ 1457 return do_vmaxv(s, a, fns[a->size]); \ 1458 } 1459 1460 DO_VMAXV(VMAXV_S, vmaxvs) 1461 DO_VMAXV(VMAXV_U, vmaxvu) 1462 DO_VMAXV(VMAXAV, vmaxav) 1463 DO_VMAXV(VMINV_S, vminvs) 1464 DO_VMAXV(VMINV_U, vminvu) 1465 DO_VMAXV(VMINAV, vminav) 1466 1467 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1468 { 1469 /* Absolute difference accumulated across vector */ 1470 TCGv_ptr qn, qm; 1471 TCGv_i32 rda; 1472 1473 if (!dc_isar_feature(aa32_mve, s) || 1474 !mve_check_qreg_bank(s, a->qm | a->qn) || 1475 !fn || a->rda == 13 || a->rda == 15) { 1476 /* Rda cases are UNPREDICTABLE */ 1477 return false; 1478 } 1479 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1480 return true; 1481 } 1482 1483 qm = mve_qreg_ptr(a->qm); 1484 qn = mve_qreg_ptr(a->qn); 1485 rda = load_reg(s, a->rda); 1486 fn(rda, cpu_env, qn, qm, rda); 1487 store_reg(s, a->rda, rda); 1488 tcg_temp_free_ptr(qm); 1489 tcg_temp_free_ptr(qn); 1490 mve_update_eci(s); 1491 return true; 1492 } 1493 1494 #define DO_VABAV(INSN, FN) \ 1495 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1496 { \ 1497 static MVEGenVABAVFn * const fns[] = { \ 1498 gen_helper_mve_##FN##b, \ 1499 gen_helper_mve_##FN##h, \ 1500 gen_helper_mve_##FN##w, \ 1501 NULL, \ 1502 }; \ 1503 return do_vabav(s, a, fns[a->size]); \ 1504 } 1505 1506 DO_VABAV(VABAV_S, vabavs) 1507 DO_VABAV(VABAV_U, vabavu) 1508