1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 38 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 40 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 45 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 46 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 47 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 51 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 52 static inline long mve_qreg_offset(unsigned reg) 53 { 54 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 55 } 56 57 static TCGv_ptr mve_qreg_ptr(unsigned reg) 58 { 59 TCGv_ptr ret = tcg_temp_new_ptr(); 60 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 61 return ret; 62 } 63 64 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 65 { 66 /* 67 * Check whether Qregs are in range. For v8.1M only Q0..Q7 68 * are supported, see VFPSmallRegisterBank(). 69 */ 70 return qmask < 8; 71 } 72 73 bool mve_eci_check(DisasContext *s) 74 { 75 /* 76 * This is a beatwise insn: check that ECI is valid (not a 77 * reserved value) and note that we are handling it. 78 * Return true if OK, false if we generated an exception. 79 */ 80 s->eci_handled = true; 81 switch (s->eci) { 82 case ECI_NONE: 83 case ECI_A0: 84 case ECI_A0A1: 85 case ECI_A0A1A2: 86 case ECI_A0A1A2B0: 87 return true; 88 default: 89 /* Reserved value: INVSTATE UsageFault */ 90 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 91 default_exception_el(s)); 92 return false; 93 } 94 } 95 96 void mve_update_eci(DisasContext *s) 97 { 98 /* 99 * The helper function will always update the CPUState field, 100 * so we only need to update the DisasContext field. 101 */ 102 if (s->eci) { 103 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 104 } 105 } 106 107 void mve_update_and_store_eci(DisasContext *s) 108 { 109 /* 110 * For insns which don't call a helper function that will call 111 * mve_advance_vpt(), this version updates s->eci and also stores 112 * it out to the CPUState field. 113 */ 114 if (s->eci) { 115 mve_update_eci(s); 116 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 117 } 118 } 119 120 static bool mve_skip_first_beat(DisasContext *s) 121 { 122 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 123 switch (s->eci) { 124 case ECI_NONE: 125 return false; 126 case ECI_A0: 127 case ECI_A0A1: 128 case ECI_A0A1A2: 129 case ECI_A0A1A2B0: 130 return true; 131 default: 132 g_assert_not_reached(); 133 } 134 } 135 136 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 137 unsigned msize) 138 { 139 TCGv_i32 addr; 140 uint32_t offset; 141 TCGv_ptr qreg; 142 143 if (!dc_isar_feature(aa32_mve, s) || 144 !mve_check_qreg_bank(s, a->qd) || 145 !fn) { 146 return false; 147 } 148 149 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 150 if (a->rn == 15 || (a->rn == 13 && a->w)) { 151 return false; 152 } 153 154 if (!mve_eci_check(s) || !vfp_access_check(s)) { 155 return true; 156 } 157 158 offset = a->imm << msize; 159 if (!a->a) { 160 offset = -offset; 161 } 162 addr = load_reg(s, a->rn); 163 if (a->p) { 164 tcg_gen_addi_i32(addr, addr, offset); 165 } 166 167 qreg = mve_qreg_ptr(a->qd); 168 fn(cpu_env, qreg, addr); 169 tcg_temp_free_ptr(qreg); 170 171 /* 172 * Writeback always happens after the last beat of the insn, 173 * regardless of predication 174 */ 175 if (a->w) { 176 if (!a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 store_reg(s, a->rn, addr); 180 } else { 181 tcg_temp_free_i32(addr); 182 } 183 mve_update_eci(s); 184 return true; 185 } 186 187 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 188 { 189 static MVEGenLdStFn * const ldstfns[4][2] = { 190 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 191 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 192 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 193 { NULL, NULL } 194 }; 195 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 196 } 197 198 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 199 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 200 { \ 201 static MVEGenLdStFn * const ldstfns[2][2] = { \ 202 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 203 { NULL, gen_helper_mve_##ULD }, \ 204 }; \ 205 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 206 } 207 208 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 209 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 211 212 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 213 { 214 TCGv_ptr qd; 215 TCGv_i32 rt; 216 217 if (!dc_isar_feature(aa32_mve, s) || 218 !mve_check_qreg_bank(s, a->qd)) { 219 return false; 220 } 221 if (a->rt == 13 || a->rt == 15) { 222 /* UNPREDICTABLE; we choose to UNDEF */ 223 return false; 224 } 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 qd = mve_qreg_ptr(a->qd); 230 rt = load_reg(s, a->rt); 231 tcg_gen_dup_i32(a->size, rt, rt); 232 gen_helper_mve_vdup(cpu_env, qd, rt); 233 tcg_temp_free_ptr(qd); 234 tcg_temp_free_i32(rt); 235 mve_update_eci(s); 236 return true; 237 } 238 239 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 240 { 241 TCGv_ptr qd, qm; 242 243 if (!dc_isar_feature(aa32_mve, s) || 244 !mve_check_qreg_bank(s, a->qd | a->qm) || 245 !fn) { 246 return false; 247 } 248 249 if (!mve_eci_check(s) || !vfp_access_check(s)) { 250 return true; 251 } 252 253 qd = mve_qreg_ptr(a->qd); 254 qm = mve_qreg_ptr(a->qm); 255 fn(cpu_env, qd, qm); 256 tcg_temp_free_ptr(qd); 257 tcg_temp_free_ptr(qm); 258 mve_update_eci(s); 259 return true; 260 } 261 262 #define DO_1OP(INSN, FN) \ 263 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 264 { \ 265 static MVEGenOneOpFn * const fns[] = { \ 266 gen_helper_mve_##FN##b, \ 267 gen_helper_mve_##FN##h, \ 268 gen_helper_mve_##FN##w, \ 269 NULL, \ 270 }; \ 271 return do_1op(s, a, fns[a->size]); \ 272 } 273 274 DO_1OP(VCLZ, vclz) 275 DO_1OP(VCLS, vcls) 276 DO_1OP(VABS, vabs) 277 DO_1OP(VNEG, vneg) 278 DO_1OP(VQABS, vqabs) 279 DO_1OP(VQNEG, vqneg) 280 DO_1OP(VMAXA, vmaxa) 281 DO_1OP(VMINA, vmina) 282 283 /* Narrowing moves: only size 0 and 1 are valid */ 284 #define DO_VMOVN(INSN, FN) \ 285 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 286 { \ 287 static MVEGenOneOpFn * const fns[] = { \ 288 gen_helper_mve_##FN##b, \ 289 gen_helper_mve_##FN##h, \ 290 NULL, \ 291 NULL, \ 292 }; \ 293 return do_1op(s, a, fns[a->size]); \ 294 } 295 296 DO_VMOVN(VMOVNB, vmovnb) 297 DO_VMOVN(VMOVNT, vmovnt) 298 DO_VMOVN(VQMOVUNB, vqmovunb) 299 DO_VMOVN(VQMOVUNT, vqmovunt) 300 DO_VMOVN(VQMOVN_BS, vqmovnbs) 301 DO_VMOVN(VQMOVN_TS, vqmovnts) 302 DO_VMOVN(VQMOVN_BU, vqmovnbu) 303 DO_VMOVN(VQMOVN_TU, vqmovntu) 304 305 static bool trans_VREV16(DisasContext *s, arg_1op *a) 306 { 307 static MVEGenOneOpFn * const fns[] = { 308 gen_helper_mve_vrev16b, 309 NULL, 310 NULL, 311 NULL, 312 }; 313 return do_1op(s, a, fns[a->size]); 314 } 315 316 static bool trans_VREV32(DisasContext *s, arg_1op *a) 317 { 318 static MVEGenOneOpFn * const fns[] = { 319 gen_helper_mve_vrev32b, 320 gen_helper_mve_vrev32h, 321 NULL, 322 NULL, 323 }; 324 return do_1op(s, a, fns[a->size]); 325 } 326 327 static bool trans_VREV64(DisasContext *s, arg_1op *a) 328 { 329 static MVEGenOneOpFn * const fns[] = { 330 gen_helper_mve_vrev64b, 331 gen_helper_mve_vrev64h, 332 gen_helper_mve_vrev64w, 333 NULL, 334 }; 335 return do_1op(s, a, fns[a->size]); 336 } 337 338 static bool trans_VMVN(DisasContext *s, arg_1op *a) 339 { 340 return do_1op(s, a, gen_helper_mve_vmvn); 341 } 342 343 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 344 { 345 static MVEGenOneOpFn * const fns[] = { 346 NULL, 347 gen_helper_mve_vfabsh, 348 gen_helper_mve_vfabss, 349 NULL, 350 }; 351 if (!dc_isar_feature(aa32_mve_fp, s)) { 352 return false; 353 } 354 return do_1op(s, a, fns[a->size]); 355 } 356 357 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 358 { 359 static MVEGenOneOpFn * const fns[] = { 360 NULL, 361 gen_helper_mve_vfnegh, 362 gen_helper_mve_vfnegs, 363 NULL, 364 }; 365 if (!dc_isar_feature(aa32_mve_fp, s)) { 366 return false; 367 } 368 return do_1op(s, a, fns[a->size]); 369 } 370 371 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 372 { 373 TCGv_ptr qd, qn, qm; 374 375 if (!dc_isar_feature(aa32_mve, s) || 376 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 377 !fn) { 378 return false; 379 } 380 if (!mve_eci_check(s) || !vfp_access_check(s)) { 381 return true; 382 } 383 384 qd = mve_qreg_ptr(a->qd); 385 qn = mve_qreg_ptr(a->qn); 386 qm = mve_qreg_ptr(a->qm); 387 fn(cpu_env, qd, qn, qm); 388 tcg_temp_free_ptr(qd); 389 tcg_temp_free_ptr(qn); 390 tcg_temp_free_ptr(qm); 391 mve_update_eci(s); 392 return true; 393 } 394 395 #define DO_LOGIC(INSN, HELPER) \ 396 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 397 { \ 398 return do_2op(s, a, HELPER); \ 399 } 400 401 DO_LOGIC(VAND, gen_helper_mve_vand) 402 DO_LOGIC(VBIC, gen_helper_mve_vbic) 403 DO_LOGIC(VORR, gen_helper_mve_vorr) 404 DO_LOGIC(VORN, gen_helper_mve_vorn) 405 DO_LOGIC(VEOR, gen_helper_mve_veor) 406 407 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 408 409 #define DO_2OP(INSN, FN) \ 410 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 411 { \ 412 static MVEGenTwoOpFn * const fns[] = { \ 413 gen_helper_mve_##FN##b, \ 414 gen_helper_mve_##FN##h, \ 415 gen_helper_mve_##FN##w, \ 416 NULL, \ 417 }; \ 418 return do_2op(s, a, fns[a->size]); \ 419 } 420 421 DO_2OP(VADD, vadd) 422 DO_2OP(VSUB, vsub) 423 DO_2OP(VMUL, vmul) 424 DO_2OP(VMULH_S, vmulhs) 425 DO_2OP(VMULH_U, vmulhu) 426 DO_2OP(VRMULH_S, vrmulhs) 427 DO_2OP(VRMULH_U, vrmulhu) 428 DO_2OP(VMAX_S, vmaxs) 429 DO_2OP(VMAX_U, vmaxu) 430 DO_2OP(VMIN_S, vmins) 431 DO_2OP(VMIN_U, vminu) 432 DO_2OP(VABD_S, vabds) 433 DO_2OP(VABD_U, vabdu) 434 DO_2OP(VHADD_S, vhadds) 435 DO_2OP(VHADD_U, vhaddu) 436 DO_2OP(VHSUB_S, vhsubs) 437 DO_2OP(VHSUB_U, vhsubu) 438 DO_2OP(VMULL_BS, vmullbs) 439 DO_2OP(VMULL_BU, vmullbu) 440 DO_2OP(VMULL_TS, vmullts) 441 DO_2OP(VMULL_TU, vmulltu) 442 DO_2OP(VQDMULH, vqdmulh) 443 DO_2OP(VQRDMULH, vqrdmulh) 444 DO_2OP(VQADD_S, vqadds) 445 DO_2OP(VQADD_U, vqaddu) 446 DO_2OP(VQSUB_S, vqsubs) 447 DO_2OP(VQSUB_U, vqsubu) 448 DO_2OP(VSHL_S, vshls) 449 DO_2OP(VSHL_U, vshlu) 450 DO_2OP(VRSHL_S, vrshls) 451 DO_2OP(VRSHL_U, vrshlu) 452 DO_2OP(VQSHL_S, vqshls) 453 DO_2OP(VQSHL_U, vqshlu) 454 DO_2OP(VQRSHL_S, vqrshls) 455 DO_2OP(VQRSHL_U, vqrshlu) 456 DO_2OP(VQDMLADH, vqdmladh) 457 DO_2OP(VQDMLADHX, vqdmladhx) 458 DO_2OP(VQRDMLADH, vqrdmladh) 459 DO_2OP(VQRDMLADHX, vqrdmladhx) 460 DO_2OP(VQDMLSDH, vqdmlsdh) 461 DO_2OP(VQDMLSDHX, vqdmlsdhx) 462 DO_2OP(VQRDMLSDH, vqrdmlsdh) 463 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 464 DO_2OP(VRHADD_S, vrhadds) 465 DO_2OP(VRHADD_U, vrhaddu) 466 /* 467 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 468 * so we can reuse the DO_2OP macro. (Our implementation calculates the 469 * "expected" results in this case.) Similarly for VHCADD. 470 */ 471 DO_2OP(VCADD90, vcadd90) 472 DO_2OP(VCADD270, vcadd270) 473 DO_2OP(VHCADD90, vhcadd90) 474 DO_2OP(VHCADD270, vhcadd270) 475 476 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 477 { 478 static MVEGenTwoOpFn * const fns[] = { 479 NULL, 480 gen_helper_mve_vqdmullbh, 481 gen_helper_mve_vqdmullbw, 482 NULL, 483 }; 484 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 485 /* UNPREDICTABLE; we choose to undef */ 486 return false; 487 } 488 return do_2op(s, a, fns[a->size]); 489 } 490 491 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 492 { 493 static MVEGenTwoOpFn * const fns[] = { 494 NULL, 495 gen_helper_mve_vqdmullth, 496 gen_helper_mve_vqdmulltw, 497 NULL, 498 }; 499 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 500 /* UNPREDICTABLE; we choose to undef */ 501 return false; 502 } 503 return do_2op(s, a, fns[a->size]); 504 } 505 506 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 507 { 508 /* 509 * Note that a->size indicates the output size, ie VMULL.P8 510 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 511 * is the 16x16->32 operation and a->size is MO_32. 512 */ 513 static MVEGenTwoOpFn * const fns[] = { 514 NULL, 515 gen_helper_mve_vmullpbh, 516 gen_helper_mve_vmullpbw, 517 NULL, 518 }; 519 return do_2op(s, a, fns[a->size]); 520 } 521 522 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 523 { 524 /* a->size is as for trans_VMULLP_B */ 525 static MVEGenTwoOpFn * const fns[] = { 526 NULL, 527 gen_helper_mve_vmullpth, 528 gen_helper_mve_vmullptw, 529 NULL, 530 }; 531 return do_2op(s, a, fns[a->size]); 532 } 533 534 /* 535 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 536 * of the 32-bit elements in each lane of the input vectors, where the 537 * carry-out of each add is the carry-in of the next. The initial carry 538 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 539 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 540 * These insns are subject to beat-wise execution. Partial execution 541 * of an I=1 (initial carry input fixed) insn which does not 542 * execute the first beat must start with the current FPSCR.NZCV 543 * value, not the fixed constant input. 544 */ 545 static bool trans_VADC(DisasContext *s, arg_2op *a) 546 { 547 return do_2op(s, a, gen_helper_mve_vadc); 548 } 549 550 static bool trans_VADCI(DisasContext *s, arg_2op *a) 551 { 552 if (mve_skip_first_beat(s)) { 553 return trans_VADC(s, a); 554 } 555 return do_2op(s, a, gen_helper_mve_vadci); 556 } 557 558 static bool trans_VSBC(DisasContext *s, arg_2op *a) 559 { 560 return do_2op(s, a, gen_helper_mve_vsbc); 561 } 562 563 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 564 { 565 if (mve_skip_first_beat(s)) { 566 return trans_VSBC(s, a); 567 } 568 return do_2op(s, a, gen_helper_mve_vsbci); 569 } 570 571 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 572 MVEGenTwoOpScalarFn fn) 573 { 574 TCGv_ptr qd, qn; 575 TCGv_i32 rm; 576 577 if (!dc_isar_feature(aa32_mve, s) || 578 !mve_check_qreg_bank(s, a->qd | a->qn) || 579 !fn) { 580 return false; 581 } 582 if (a->rm == 13 || a->rm == 15) { 583 /* UNPREDICTABLE */ 584 return false; 585 } 586 if (!mve_eci_check(s) || !vfp_access_check(s)) { 587 return true; 588 } 589 590 qd = mve_qreg_ptr(a->qd); 591 qn = mve_qreg_ptr(a->qn); 592 rm = load_reg(s, a->rm); 593 fn(cpu_env, qd, qn, rm); 594 tcg_temp_free_i32(rm); 595 tcg_temp_free_ptr(qd); 596 tcg_temp_free_ptr(qn); 597 mve_update_eci(s); 598 return true; 599 } 600 601 #define DO_2OP_SCALAR(INSN, FN) \ 602 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 603 { \ 604 static MVEGenTwoOpScalarFn * const fns[] = { \ 605 gen_helper_mve_##FN##b, \ 606 gen_helper_mve_##FN##h, \ 607 gen_helper_mve_##FN##w, \ 608 NULL, \ 609 }; \ 610 return do_2op_scalar(s, a, fns[a->size]); \ 611 } 612 613 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 614 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 615 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 616 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 617 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 618 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 619 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 620 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 621 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 622 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 623 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 624 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 625 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 626 DO_2OP_SCALAR(VBRSR, vbrsr) 627 DO_2OP_SCALAR(VMLA, vmla) 628 DO_2OP_SCALAR(VMLAS, vmlas) 629 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 630 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 631 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 632 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 633 634 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 635 { 636 static MVEGenTwoOpScalarFn * const fns[] = { 637 NULL, 638 gen_helper_mve_vqdmullb_scalarh, 639 gen_helper_mve_vqdmullb_scalarw, 640 NULL, 641 }; 642 if (a->qd == a->qn && a->size == MO_32) { 643 /* UNPREDICTABLE; we choose to undef */ 644 return false; 645 } 646 return do_2op_scalar(s, a, fns[a->size]); 647 } 648 649 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 650 { 651 static MVEGenTwoOpScalarFn * const fns[] = { 652 NULL, 653 gen_helper_mve_vqdmullt_scalarh, 654 gen_helper_mve_vqdmullt_scalarw, 655 NULL, 656 }; 657 if (a->qd == a->qn && a->size == MO_32) { 658 /* UNPREDICTABLE; we choose to undef */ 659 return false; 660 } 661 return do_2op_scalar(s, a, fns[a->size]); 662 } 663 664 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 665 MVEGenLongDualAccOpFn *fn) 666 { 667 TCGv_ptr qn, qm; 668 TCGv_i64 rda; 669 TCGv_i32 rdalo, rdahi; 670 671 if (!dc_isar_feature(aa32_mve, s) || 672 !mve_check_qreg_bank(s, a->qn | a->qm) || 673 !fn) { 674 return false; 675 } 676 /* 677 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 678 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 679 */ 680 if (a->rdahi == 13 || a->rdahi == 15) { 681 return false; 682 } 683 if (!mve_eci_check(s) || !vfp_access_check(s)) { 684 return true; 685 } 686 687 qn = mve_qreg_ptr(a->qn); 688 qm = mve_qreg_ptr(a->qm); 689 690 /* 691 * This insn is subject to beat-wise execution. Partial execution 692 * of an A=0 (no-accumulate) insn which does not execute the first 693 * beat must start with the current rda value, not 0. 694 */ 695 if (a->a || mve_skip_first_beat(s)) { 696 rda = tcg_temp_new_i64(); 697 rdalo = load_reg(s, a->rdalo); 698 rdahi = load_reg(s, a->rdahi); 699 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 700 tcg_temp_free_i32(rdalo); 701 tcg_temp_free_i32(rdahi); 702 } else { 703 rda = tcg_const_i64(0); 704 } 705 706 fn(rda, cpu_env, qn, qm, rda); 707 tcg_temp_free_ptr(qn); 708 tcg_temp_free_ptr(qm); 709 710 rdalo = tcg_temp_new_i32(); 711 rdahi = tcg_temp_new_i32(); 712 tcg_gen_extrl_i64_i32(rdalo, rda); 713 tcg_gen_extrh_i64_i32(rdahi, rda); 714 store_reg(s, a->rdalo, rdalo); 715 store_reg(s, a->rdahi, rdahi); 716 tcg_temp_free_i64(rda); 717 mve_update_eci(s); 718 return true; 719 } 720 721 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 722 { 723 static MVEGenLongDualAccOpFn * const fns[4][2] = { 724 { NULL, NULL }, 725 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 726 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 727 { NULL, NULL }, 728 }; 729 return do_long_dual_acc(s, a, fns[a->size][a->x]); 730 } 731 732 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 733 { 734 static MVEGenLongDualAccOpFn * const fns[4][2] = { 735 { NULL, NULL }, 736 { gen_helper_mve_vmlaldavuh, NULL }, 737 { gen_helper_mve_vmlaldavuw, NULL }, 738 { NULL, NULL }, 739 }; 740 return do_long_dual_acc(s, a, fns[a->size][a->x]); 741 } 742 743 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 744 { 745 static MVEGenLongDualAccOpFn * const fns[4][2] = { 746 { NULL, NULL }, 747 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 748 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 749 { NULL, NULL }, 750 }; 751 return do_long_dual_acc(s, a, fns[a->size][a->x]); 752 } 753 754 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 755 { 756 static MVEGenLongDualAccOpFn * const fns[] = { 757 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 758 }; 759 return do_long_dual_acc(s, a, fns[a->x]); 760 } 761 762 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 763 { 764 static MVEGenLongDualAccOpFn * const fns[] = { 765 gen_helper_mve_vrmlaldavhuw, NULL, 766 }; 767 return do_long_dual_acc(s, a, fns[a->x]); 768 } 769 770 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 771 { 772 static MVEGenLongDualAccOpFn * const fns[] = { 773 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 774 }; 775 return do_long_dual_acc(s, a, fns[a->x]); 776 } 777 778 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 779 { 780 TCGv_ptr qn, qm; 781 TCGv_i32 rda; 782 783 if (!dc_isar_feature(aa32_mve, s) || 784 !mve_check_qreg_bank(s, a->qn) || 785 !fn) { 786 return false; 787 } 788 if (!mve_eci_check(s) || !vfp_access_check(s)) { 789 return true; 790 } 791 792 qn = mve_qreg_ptr(a->qn); 793 qm = mve_qreg_ptr(a->qm); 794 795 /* 796 * This insn is subject to beat-wise execution. Partial execution 797 * of an A=0 (no-accumulate) insn which does not execute the first 798 * beat must start with the current rda value, not 0. 799 */ 800 if (a->a || mve_skip_first_beat(s)) { 801 rda = load_reg(s, a->rda); 802 } else { 803 rda = tcg_const_i32(0); 804 } 805 806 fn(rda, cpu_env, qn, qm, rda); 807 store_reg(s, a->rda, rda); 808 tcg_temp_free_ptr(qn); 809 tcg_temp_free_ptr(qm); 810 811 mve_update_eci(s); 812 return true; 813 } 814 815 #define DO_DUAL_ACC(INSN, FN) \ 816 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 817 { \ 818 static MVEGenDualAccOpFn * const fns[4][2] = { \ 819 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 820 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 821 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 822 { NULL, NULL }, \ 823 }; \ 824 return do_dual_acc(s, a, fns[a->size][a->x]); \ 825 } 826 827 DO_DUAL_ACC(VMLADAV_S, vmladavs) 828 DO_DUAL_ACC(VMLSDAV, vmlsdav) 829 830 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 831 { 832 static MVEGenDualAccOpFn * const fns[4][2] = { 833 { gen_helper_mve_vmladavub, NULL }, 834 { gen_helper_mve_vmladavuh, NULL }, 835 { gen_helper_mve_vmladavuw, NULL }, 836 { NULL, NULL }, 837 }; 838 return do_dual_acc(s, a, fns[a->size][a->x]); 839 } 840 841 static void gen_vpst(DisasContext *s, uint32_t mask) 842 { 843 /* 844 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 845 * being adjacent fields in the register. 846 * 847 * Updating the masks is not predicated, but it is subject to beat-wise 848 * execution, and the mask is updated on the odd-numbered beats. 849 * So if PSR.ECI says we should skip beat 1, we mustn't update the 850 * 01 mask field. 851 */ 852 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 853 switch (s->eci) { 854 case ECI_NONE: 855 case ECI_A0: 856 /* Update both 01 and 23 fields */ 857 tcg_gen_deposit_i32(vpr, vpr, 858 tcg_constant_i32(mask | (mask << 4)), 859 R_V7M_VPR_MASK01_SHIFT, 860 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 861 break; 862 case ECI_A0A1: 863 case ECI_A0A1A2: 864 case ECI_A0A1A2B0: 865 /* Update only the 23 mask field */ 866 tcg_gen_deposit_i32(vpr, vpr, 867 tcg_constant_i32(mask), 868 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 869 break; 870 default: 871 g_assert_not_reached(); 872 } 873 store_cpu_field(vpr, v7m.vpr); 874 } 875 876 static bool trans_VPST(DisasContext *s, arg_VPST *a) 877 { 878 /* mask == 0 is a "related encoding" */ 879 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 880 return false; 881 } 882 if (!mve_eci_check(s) || !vfp_access_check(s)) { 883 return true; 884 } 885 gen_vpst(s, a->mask); 886 mve_update_and_store_eci(s); 887 return true; 888 } 889 890 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 891 { 892 /* 893 * Invert the predicate in VPR.P0. We have call out to 894 * a helper because this insn itself is beatwise and can 895 * be predicated. 896 */ 897 if (!dc_isar_feature(aa32_mve, s)) { 898 return false; 899 } 900 if (!mve_eci_check(s) || !vfp_access_check(s)) { 901 return true; 902 } 903 904 gen_helper_mve_vpnot(cpu_env); 905 mve_update_eci(s); 906 return true; 907 } 908 909 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 910 { 911 /* VADDV: vector add across vector */ 912 static MVEGenVADDVFn * const fns[4][2] = { 913 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 914 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 915 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 916 { NULL, NULL } 917 }; 918 TCGv_ptr qm; 919 TCGv_i32 rda; 920 921 if (!dc_isar_feature(aa32_mve, s) || 922 a->size == 3) { 923 return false; 924 } 925 if (!mve_eci_check(s) || !vfp_access_check(s)) { 926 return true; 927 } 928 929 /* 930 * This insn is subject to beat-wise execution. Partial execution 931 * of an A=0 (no-accumulate) insn which does not execute the first 932 * beat must start with the current value of Rda, not zero. 933 */ 934 if (a->a || mve_skip_first_beat(s)) { 935 /* Accumulate input from Rda */ 936 rda = load_reg(s, a->rda); 937 } else { 938 /* Accumulate starting at zero */ 939 rda = tcg_const_i32(0); 940 } 941 942 qm = mve_qreg_ptr(a->qm); 943 fns[a->size][a->u](rda, cpu_env, qm, rda); 944 store_reg(s, a->rda, rda); 945 tcg_temp_free_ptr(qm); 946 947 mve_update_eci(s); 948 return true; 949 } 950 951 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 952 { 953 /* 954 * Vector Add Long Across Vector: accumulate the 32-bit 955 * elements of the vector into a 64-bit result stored in 956 * a pair of general-purpose registers. 957 * No need to check Qm's bank: it is only 3 bits in decode. 958 */ 959 TCGv_ptr qm; 960 TCGv_i64 rda; 961 TCGv_i32 rdalo, rdahi; 962 963 if (!dc_isar_feature(aa32_mve, s)) { 964 return false; 965 } 966 /* 967 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 968 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 969 */ 970 if (a->rdahi == 13 || a->rdahi == 15) { 971 return false; 972 } 973 if (!mve_eci_check(s) || !vfp_access_check(s)) { 974 return true; 975 } 976 977 /* 978 * This insn is subject to beat-wise execution. Partial execution 979 * of an A=0 (no-accumulate) insn which does not execute the first 980 * beat must start with the current value of RdaHi:RdaLo, not zero. 981 */ 982 if (a->a || mve_skip_first_beat(s)) { 983 /* Accumulate input from RdaHi:RdaLo */ 984 rda = tcg_temp_new_i64(); 985 rdalo = load_reg(s, a->rdalo); 986 rdahi = load_reg(s, a->rdahi); 987 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 988 tcg_temp_free_i32(rdalo); 989 tcg_temp_free_i32(rdahi); 990 } else { 991 /* Accumulate starting at zero */ 992 rda = tcg_const_i64(0); 993 } 994 995 qm = mve_qreg_ptr(a->qm); 996 if (a->u) { 997 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 998 } else { 999 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1000 } 1001 tcg_temp_free_ptr(qm); 1002 1003 rdalo = tcg_temp_new_i32(); 1004 rdahi = tcg_temp_new_i32(); 1005 tcg_gen_extrl_i64_i32(rdalo, rda); 1006 tcg_gen_extrh_i64_i32(rdahi, rda); 1007 store_reg(s, a->rdalo, rdalo); 1008 store_reg(s, a->rdahi, rdahi); 1009 tcg_temp_free_i64(rda); 1010 mve_update_eci(s); 1011 return true; 1012 } 1013 1014 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1015 { 1016 TCGv_ptr qd; 1017 uint64_t imm; 1018 1019 if (!dc_isar_feature(aa32_mve, s) || 1020 !mve_check_qreg_bank(s, a->qd) || 1021 !fn) { 1022 return false; 1023 } 1024 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1025 return true; 1026 } 1027 1028 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1029 1030 qd = mve_qreg_ptr(a->qd); 1031 fn(cpu_env, qd, tcg_constant_i64(imm)); 1032 tcg_temp_free_ptr(qd); 1033 mve_update_eci(s); 1034 return true; 1035 } 1036 1037 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1038 { 1039 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1040 MVEGenOneOpImmFn *fn; 1041 1042 if ((a->cmode & 1) && a->cmode < 12) { 1043 if (a->op) { 1044 /* 1045 * For op=1, the immediate will be inverted by asimd_imm_const(), 1046 * so the VBIC becomes a logical AND operation. 1047 */ 1048 fn = gen_helper_mve_vandi; 1049 } else { 1050 fn = gen_helper_mve_vorri; 1051 } 1052 } else { 1053 /* There is one unallocated cmode/op combination in this space */ 1054 if (a->cmode == 15 && a->op == 1) { 1055 return false; 1056 } 1057 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1058 fn = gen_helper_mve_vmovi; 1059 } 1060 return do_1imm(s, a, fn); 1061 } 1062 1063 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1064 bool negateshift) 1065 { 1066 TCGv_ptr qd, qm; 1067 int shift = a->shift; 1068 1069 if (!dc_isar_feature(aa32_mve, s) || 1070 !mve_check_qreg_bank(s, a->qd | a->qm) || 1071 !fn) { 1072 return false; 1073 } 1074 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1075 return true; 1076 } 1077 1078 /* 1079 * When we handle a right shift insn using a left-shift helper 1080 * which permits a negative shift count to indicate a right-shift, 1081 * we must negate the shift count. 1082 */ 1083 if (negateshift) { 1084 shift = -shift; 1085 } 1086 1087 qd = mve_qreg_ptr(a->qd); 1088 qm = mve_qreg_ptr(a->qm); 1089 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1090 tcg_temp_free_ptr(qd); 1091 tcg_temp_free_ptr(qm); 1092 mve_update_eci(s); 1093 return true; 1094 } 1095 1096 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1097 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1098 { \ 1099 static MVEGenTwoOpShiftFn * const fns[] = { \ 1100 gen_helper_mve_##FN##b, \ 1101 gen_helper_mve_##FN##h, \ 1102 gen_helper_mve_##FN##w, \ 1103 NULL, \ 1104 }; \ 1105 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1106 } 1107 1108 DO_2SHIFT(VSHLI, vshli_u, false) 1109 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1110 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1111 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1112 /* These right shifts use a left-shift helper with negated shift count */ 1113 DO_2SHIFT(VSHRI_S, vshli_s, true) 1114 DO_2SHIFT(VSHRI_U, vshli_u, true) 1115 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1116 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1117 1118 DO_2SHIFT(VSRI, vsri, false) 1119 DO_2SHIFT(VSLI, vsli, false) 1120 1121 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1122 MVEGenTwoOpShiftFn *fn) 1123 { 1124 TCGv_ptr qda; 1125 TCGv_i32 rm; 1126 1127 if (!dc_isar_feature(aa32_mve, s) || 1128 !mve_check_qreg_bank(s, a->qda) || 1129 a->rm == 13 || a->rm == 15 || !fn) { 1130 /* Rm cases are UNPREDICTABLE */ 1131 return false; 1132 } 1133 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1134 return true; 1135 } 1136 1137 qda = mve_qreg_ptr(a->qda); 1138 rm = load_reg(s, a->rm); 1139 fn(cpu_env, qda, qda, rm); 1140 tcg_temp_free_ptr(qda); 1141 tcg_temp_free_i32(rm); 1142 mve_update_eci(s); 1143 return true; 1144 } 1145 1146 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1147 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1148 { \ 1149 static MVEGenTwoOpShiftFn * const fns[] = { \ 1150 gen_helper_mve_##FN##b, \ 1151 gen_helper_mve_##FN##h, \ 1152 gen_helper_mve_##FN##w, \ 1153 NULL, \ 1154 }; \ 1155 return do_2shift_scalar(s, a, fns[a->size]); \ 1156 } 1157 1158 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1159 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1160 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1161 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1162 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1163 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1164 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1165 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1166 1167 #define DO_VSHLL(INSN, FN) \ 1168 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1169 { \ 1170 static MVEGenTwoOpShiftFn * const fns[] = { \ 1171 gen_helper_mve_##FN##b, \ 1172 gen_helper_mve_##FN##h, \ 1173 }; \ 1174 return do_2shift(s, a, fns[a->size], false); \ 1175 } 1176 1177 DO_VSHLL(VSHLL_BS, vshllbs) 1178 DO_VSHLL(VSHLL_BU, vshllbu) 1179 DO_VSHLL(VSHLL_TS, vshllts) 1180 DO_VSHLL(VSHLL_TU, vshlltu) 1181 1182 #define DO_2SHIFT_N(INSN, FN) \ 1183 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1184 { \ 1185 static MVEGenTwoOpShiftFn * const fns[] = { \ 1186 gen_helper_mve_##FN##b, \ 1187 gen_helper_mve_##FN##h, \ 1188 }; \ 1189 return do_2shift(s, a, fns[a->size], false); \ 1190 } 1191 1192 DO_2SHIFT_N(VSHRNB, vshrnb) 1193 DO_2SHIFT_N(VSHRNT, vshrnt) 1194 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1195 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1196 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1197 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1198 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1199 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1200 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1201 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1202 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1203 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1204 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1205 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1206 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1207 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1208 1209 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1210 { 1211 /* 1212 * Whole Vector Left Shift with Carry. The carry is taken 1213 * from a general purpose register and written back there. 1214 * An imm of 0 means "shift by 32". 1215 */ 1216 TCGv_ptr qd; 1217 TCGv_i32 rdm; 1218 1219 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1220 return false; 1221 } 1222 if (a->rdm == 13 || a->rdm == 15) { 1223 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1224 return false; 1225 } 1226 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1227 return true; 1228 } 1229 1230 qd = mve_qreg_ptr(a->qd); 1231 rdm = load_reg(s, a->rdm); 1232 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1233 store_reg(s, a->rdm, rdm); 1234 tcg_temp_free_ptr(qd); 1235 mve_update_eci(s); 1236 return true; 1237 } 1238 1239 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1240 { 1241 TCGv_ptr qd; 1242 TCGv_i32 rn; 1243 1244 /* 1245 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1246 * This fills the vector with elements of successively increasing 1247 * or decreasing values, starting from Rn. 1248 */ 1249 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1250 return false; 1251 } 1252 if (a->size == MO_64) { 1253 /* size 0b11 is another encoding */ 1254 return false; 1255 } 1256 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1257 return true; 1258 } 1259 1260 qd = mve_qreg_ptr(a->qd); 1261 rn = load_reg(s, a->rn); 1262 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1263 store_reg(s, a->rn, rn); 1264 tcg_temp_free_ptr(qd); 1265 mve_update_eci(s); 1266 return true; 1267 } 1268 1269 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1270 { 1271 TCGv_ptr qd; 1272 TCGv_i32 rn, rm; 1273 1274 /* 1275 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1276 * This fills the vector with elements of successively increasing 1277 * or decreasing values, starting from Rn. Rm specifies a point where 1278 * the count wraps back around to 0. The updated offset is written back 1279 * to Rn. 1280 */ 1281 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1282 return false; 1283 } 1284 if (!fn || a->rm == 13 || a->rm == 15) { 1285 /* 1286 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1287 * Rm == 13 is VIWDUP, VDWDUP. 1288 */ 1289 return false; 1290 } 1291 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1292 return true; 1293 } 1294 1295 qd = mve_qreg_ptr(a->qd); 1296 rn = load_reg(s, a->rn); 1297 rm = load_reg(s, a->rm); 1298 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1299 store_reg(s, a->rn, rn); 1300 tcg_temp_free_ptr(qd); 1301 tcg_temp_free_i32(rm); 1302 mve_update_eci(s); 1303 return true; 1304 } 1305 1306 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1307 { 1308 static MVEGenVIDUPFn * const fns[] = { 1309 gen_helper_mve_vidupb, 1310 gen_helper_mve_viduph, 1311 gen_helper_mve_vidupw, 1312 NULL, 1313 }; 1314 return do_vidup(s, a, fns[a->size]); 1315 } 1316 1317 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1318 { 1319 static MVEGenVIDUPFn * const fns[] = { 1320 gen_helper_mve_vidupb, 1321 gen_helper_mve_viduph, 1322 gen_helper_mve_vidupw, 1323 NULL, 1324 }; 1325 /* VDDUP is just like VIDUP but with a negative immediate */ 1326 a->imm = -a->imm; 1327 return do_vidup(s, a, fns[a->size]); 1328 } 1329 1330 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1331 { 1332 static MVEGenVIWDUPFn * const fns[] = { 1333 gen_helper_mve_viwdupb, 1334 gen_helper_mve_viwduph, 1335 gen_helper_mve_viwdupw, 1336 NULL, 1337 }; 1338 return do_viwdup(s, a, fns[a->size]); 1339 } 1340 1341 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1342 { 1343 static MVEGenVIWDUPFn * const fns[] = { 1344 gen_helper_mve_vdwdupb, 1345 gen_helper_mve_vdwduph, 1346 gen_helper_mve_vdwdupw, 1347 NULL, 1348 }; 1349 return do_viwdup(s, a, fns[a->size]); 1350 } 1351 1352 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1353 { 1354 TCGv_ptr qn, qm; 1355 1356 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1357 !fn) { 1358 return false; 1359 } 1360 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1361 return true; 1362 } 1363 1364 qn = mve_qreg_ptr(a->qn); 1365 qm = mve_qreg_ptr(a->qm); 1366 fn(cpu_env, qn, qm); 1367 tcg_temp_free_ptr(qn); 1368 tcg_temp_free_ptr(qm); 1369 if (a->mask) { 1370 /* VPT */ 1371 gen_vpst(s, a->mask); 1372 } 1373 mve_update_eci(s); 1374 return true; 1375 } 1376 1377 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1378 MVEGenScalarCmpFn *fn) 1379 { 1380 TCGv_ptr qn; 1381 TCGv_i32 rm; 1382 1383 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1384 return false; 1385 } 1386 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1387 return true; 1388 } 1389 1390 qn = mve_qreg_ptr(a->qn); 1391 if (a->rm == 15) { 1392 /* Encoding Rm=0b1111 means "constant zero" */ 1393 rm = tcg_constant_i32(0); 1394 } else { 1395 rm = load_reg(s, a->rm); 1396 } 1397 fn(cpu_env, qn, rm); 1398 tcg_temp_free_ptr(qn); 1399 tcg_temp_free_i32(rm); 1400 if (a->mask) { 1401 /* VPT */ 1402 gen_vpst(s, a->mask); 1403 } 1404 mve_update_eci(s); 1405 return true; 1406 } 1407 1408 #define DO_VCMP(INSN, FN) \ 1409 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1410 { \ 1411 static MVEGenCmpFn * const fns[] = { \ 1412 gen_helper_mve_##FN##b, \ 1413 gen_helper_mve_##FN##h, \ 1414 gen_helper_mve_##FN##w, \ 1415 NULL, \ 1416 }; \ 1417 return do_vcmp(s, a, fns[a->size]); \ 1418 } \ 1419 static bool trans_##INSN##_scalar(DisasContext *s, \ 1420 arg_vcmp_scalar *a) \ 1421 { \ 1422 static MVEGenScalarCmpFn * const fns[] = { \ 1423 gen_helper_mve_##FN##_scalarb, \ 1424 gen_helper_mve_##FN##_scalarh, \ 1425 gen_helper_mve_##FN##_scalarw, \ 1426 NULL, \ 1427 }; \ 1428 return do_vcmp_scalar(s, a, fns[a->size]); \ 1429 } 1430 1431 DO_VCMP(VCMPEQ, vcmpeq) 1432 DO_VCMP(VCMPNE, vcmpne) 1433 DO_VCMP(VCMPCS, vcmpcs) 1434 DO_VCMP(VCMPHI, vcmphi) 1435 DO_VCMP(VCMPGE, vcmpge) 1436 DO_VCMP(VCMPLT, vcmplt) 1437 DO_VCMP(VCMPGT, vcmpgt) 1438 DO_VCMP(VCMPLE, vcmple) 1439 1440 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1441 { 1442 /* 1443 * MIN/MAX operations across a vector: compute the min or 1444 * max of the initial value in a general purpose register 1445 * and all the elements in the vector, and store it back 1446 * into the general purpose register. 1447 */ 1448 TCGv_ptr qm; 1449 TCGv_i32 rda; 1450 1451 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1452 !fn || a->rda == 13 || a->rda == 15) { 1453 /* Rda cases are UNPREDICTABLE */ 1454 return false; 1455 } 1456 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1457 return true; 1458 } 1459 1460 qm = mve_qreg_ptr(a->qm); 1461 rda = load_reg(s, a->rda); 1462 fn(rda, cpu_env, qm, rda); 1463 store_reg(s, a->rda, rda); 1464 tcg_temp_free_ptr(qm); 1465 mve_update_eci(s); 1466 return true; 1467 } 1468 1469 #define DO_VMAXV(INSN, FN) \ 1470 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1471 { \ 1472 static MVEGenVADDVFn * const fns[] = { \ 1473 gen_helper_mve_##FN##b, \ 1474 gen_helper_mve_##FN##h, \ 1475 gen_helper_mve_##FN##w, \ 1476 NULL, \ 1477 }; \ 1478 return do_vmaxv(s, a, fns[a->size]); \ 1479 } 1480 1481 DO_VMAXV(VMAXV_S, vmaxvs) 1482 DO_VMAXV(VMAXV_U, vmaxvu) 1483 DO_VMAXV(VMAXAV, vmaxav) 1484 DO_VMAXV(VMINV_S, vminvs) 1485 DO_VMAXV(VMINV_U, vminvu) 1486 DO_VMAXV(VMINAV, vminav) 1487 1488 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1489 { 1490 /* Absolute difference accumulated across vector */ 1491 TCGv_ptr qn, qm; 1492 TCGv_i32 rda; 1493 1494 if (!dc_isar_feature(aa32_mve, s) || 1495 !mve_check_qreg_bank(s, a->qm | a->qn) || 1496 !fn || a->rda == 13 || a->rda == 15) { 1497 /* Rda cases are UNPREDICTABLE */ 1498 return false; 1499 } 1500 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1501 return true; 1502 } 1503 1504 qm = mve_qreg_ptr(a->qm); 1505 qn = mve_qreg_ptr(a->qn); 1506 rda = load_reg(s, a->rda); 1507 fn(rda, cpu_env, qn, qm, rda); 1508 store_reg(s, a->rda, rda); 1509 tcg_temp_free_ptr(qm); 1510 tcg_temp_free_ptr(qn); 1511 mve_update_eci(s); 1512 return true; 1513 } 1514 1515 #define DO_VABAV(INSN, FN) \ 1516 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1517 { \ 1518 static MVEGenVABAVFn * const fns[] = { \ 1519 gen_helper_mve_##FN##b, \ 1520 gen_helper_mve_##FN##h, \ 1521 gen_helper_mve_##FN##w, \ 1522 NULL, \ 1523 }; \ 1524 return do_vabav(s, a, fns[a->size]); \ 1525 } 1526 1527 DO_VABAV(VABAV_S, vabavs) 1528 DO_VABAV(VABAV_U, vabavu) 1529 1530 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1531 { 1532 /* 1533 * VMOV two 32-bit vector lanes to two general-purpose registers. 1534 * This insn is not predicated but it is subject to beat-wise 1535 * execution if it is not in an IT block. For us this means 1536 * only that if PSR.ECI says we should not be executing the beat 1537 * corresponding to the lane of the vector register being accessed 1538 * then we should skip perfoming the move, and that we need to do 1539 * the usual check for bad ECI state and advance of ECI state. 1540 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1541 */ 1542 TCGv_i32 tmp; 1543 int vd; 1544 1545 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1546 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 1547 a->rt == a->rt2) { 1548 /* Rt/Rt2 cases are UNPREDICTABLE */ 1549 return false; 1550 } 1551 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1552 return true; 1553 } 1554 1555 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 1556 vd = a->qd * 2; 1557 1558 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1559 tmp = tcg_temp_new_i32(); 1560 read_neon_element32(tmp, vd, a->idx, MO_32); 1561 store_reg(s, a->rt, tmp); 1562 } 1563 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1564 tmp = tcg_temp_new_i32(); 1565 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 1566 store_reg(s, a->rt2, tmp); 1567 } 1568 1569 mve_update_and_store_eci(s); 1570 return true; 1571 } 1572 1573 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1574 { 1575 /* 1576 * VMOV two general-purpose registers to two 32-bit vector lanes. 1577 * This insn is not predicated but it is subject to beat-wise 1578 * execution if it is not in an IT block. For us this means 1579 * only that if PSR.ECI says we should not be executing the beat 1580 * corresponding to the lane of the vector register being accessed 1581 * then we should skip perfoming the move, and that we need to do 1582 * the usual check for bad ECI state and advance of ECI state. 1583 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1584 */ 1585 TCGv_i32 tmp; 1586 int vd; 1587 1588 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1589 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 1590 /* Rt/Rt2 cases are UNPREDICTABLE */ 1591 return false; 1592 } 1593 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1594 return true; 1595 } 1596 1597 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 1598 vd = a->qd * 2; 1599 1600 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1601 tmp = load_reg(s, a->rt); 1602 write_neon_element32(tmp, vd, a->idx, MO_32); 1603 tcg_temp_free_i32(tmp); 1604 } 1605 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1606 tmp = load_reg(s, a->rt2); 1607 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 1608 tcg_temp_free_i32(tmp); 1609 } 1610 1611 mve_update_and_store_eci(s); 1612 return true; 1613 } 1614