1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 43 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 44 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 45 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 46 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 47 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 48 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 52 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 53 static inline long mve_qreg_offset(unsigned reg) 54 { 55 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 56 } 57 58 static TCGv_ptr mve_qreg_ptr(unsigned reg) 59 { 60 TCGv_ptr ret = tcg_temp_new_ptr(); 61 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 62 return ret; 63 } 64 65 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 66 { 67 /* 68 * Check whether Qregs are in range. For v8.1M only Q0..Q7 69 * are supported, see VFPSmallRegisterBank(). 70 */ 71 return qmask < 8; 72 } 73 74 bool mve_eci_check(DisasContext *s) 75 { 76 /* 77 * This is a beatwise insn: check that ECI is valid (not a 78 * reserved value) and note that we are handling it. 79 * Return true if OK, false if we generated an exception. 80 */ 81 s->eci_handled = true; 82 switch (s->eci) { 83 case ECI_NONE: 84 case ECI_A0: 85 case ECI_A0A1: 86 case ECI_A0A1A2: 87 case ECI_A0A1A2B0: 88 return true; 89 default: 90 /* Reserved value: INVSTATE UsageFault */ 91 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 92 default_exception_el(s)); 93 return false; 94 } 95 } 96 97 void mve_update_eci(DisasContext *s) 98 { 99 /* 100 * The helper function will always update the CPUState field, 101 * so we only need to update the DisasContext field. 102 */ 103 if (s->eci) { 104 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 105 } 106 } 107 108 void mve_update_and_store_eci(DisasContext *s) 109 { 110 /* 111 * For insns which don't call a helper function that will call 112 * mve_advance_vpt(), this version updates s->eci and also stores 113 * it out to the CPUState field. 114 */ 115 if (s->eci) { 116 mve_update_eci(s); 117 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 118 } 119 } 120 121 static bool mve_skip_first_beat(DisasContext *s) 122 { 123 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 124 switch (s->eci) { 125 case ECI_NONE: 126 return false; 127 case ECI_A0: 128 case ECI_A0A1: 129 case ECI_A0A1A2: 130 case ECI_A0A1A2B0: 131 return true; 132 default: 133 g_assert_not_reached(); 134 } 135 } 136 137 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 138 unsigned msize) 139 { 140 TCGv_i32 addr; 141 uint32_t offset; 142 TCGv_ptr qreg; 143 144 if (!dc_isar_feature(aa32_mve, s) || 145 !mve_check_qreg_bank(s, a->qd) || 146 !fn) { 147 return false; 148 } 149 150 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 151 if (a->rn == 15 || (a->rn == 13 && a->w)) { 152 return false; 153 } 154 155 if (!mve_eci_check(s) || !vfp_access_check(s)) { 156 return true; 157 } 158 159 offset = a->imm << msize; 160 if (!a->a) { 161 offset = -offset; 162 } 163 addr = load_reg(s, a->rn); 164 if (a->p) { 165 tcg_gen_addi_i32(addr, addr, offset); 166 } 167 168 qreg = mve_qreg_ptr(a->qd); 169 fn(cpu_env, qreg, addr); 170 tcg_temp_free_ptr(qreg); 171 172 /* 173 * Writeback always happens after the last beat of the insn, 174 * regardless of predication 175 */ 176 if (a->w) { 177 if (!a->p) { 178 tcg_gen_addi_i32(addr, addr, offset); 179 } 180 store_reg(s, a->rn, addr); 181 } else { 182 tcg_temp_free_i32(addr); 183 } 184 mve_update_eci(s); 185 return true; 186 } 187 188 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 189 { 190 static MVEGenLdStFn * const ldstfns[4][2] = { 191 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 192 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 193 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 194 { NULL, NULL } 195 }; 196 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 197 } 198 199 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 200 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 201 { \ 202 static MVEGenLdStFn * const ldstfns[2][2] = { \ 203 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 204 { NULL, gen_helper_mve_##ULD }, \ 205 }; \ 206 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 207 } 208 209 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 210 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 211 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 212 213 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 214 { 215 TCGv_i32 addr; 216 TCGv_ptr qd, qm; 217 218 if (!dc_isar_feature(aa32_mve, s) || 219 !mve_check_qreg_bank(s, a->qd | a->qm) || 220 !fn || a->rn == 15) { 221 /* Rn case is UNPREDICTABLE */ 222 return false; 223 } 224 225 if (!mve_eci_check(s) || !vfp_access_check(s)) { 226 return true; 227 } 228 229 addr = load_reg(s, a->rn); 230 231 qd = mve_qreg_ptr(a->qd); 232 qm = mve_qreg_ptr(a->qm); 233 fn(cpu_env, qd, qm, addr); 234 tcg_temp_free_ptr(qd); 235 tcg_temp_free_ptr(qm); 236 tcg_temp_free_i32(addr); 237 mve_update_eci(s); 238 return true; 239 } 240 241 /* 242 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 243 * signextended to halfword elements in register". _os_ indicates that 244 * the offsets in Qm should be scaled by the element size. 245 */ 246 /* This macro is just to make the arrays more compact in these functions */ 247 #define F(N) gen_helper_mve_##N 248 249 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 250 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 251 { 252 static MVEGenLdStSGFn * const fns[2][4][4] = { { 253 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 254 { NULL, NULL, F(vldrh_sg_sw), NULL }, 255 { NULL, NULL, NULL, NULL }, 256 { NULL, NULL, NULL, NULL } 257 }, { 258 { NULL, NULL, NULL, NULL }, 259 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 260 { NULL, NULL, NULL, NULL }, 261 { NULL, NULL, NULL, NULL } 262 } 263 }; 264 if (a->qd == a->qm) { 265 return false; /* UNPREDICTABLE */ 266 } 267 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 268 } 269 270 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 271 { 272 static MVEGenLdStSGFn * const fns[2][4][4] = { { 273 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 274 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 275 { NULL, NULL, F(vldrw_sg_uw), NULL }, 276 { NULL, NULL, NULL, F(vldrd_sg_ud) } 277 }, { 278 { NULL, NULL, NULL, NULL }, 279 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 280 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 281 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 282 } 283 }; 284 if (a->qd == a->qm) { 285 return false; /* UNPREDICTABLE */ 286 } 287 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 288 } 289 290 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 291 { 292 static MVEGenLdStSGFn * const fns[2][4][4] = { { 293 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 294 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 295 { NULL, NULL, F(vstrw_sg_uw), NULL }, 296 { NULL, NULL, NULL, F(vstrd_sg_ud) } 297 }, { 298 { NULL, NULL, NULL, NULL }, 299 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 300 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 301 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 302 } 303 }; 304 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 305 } 306 307 #undef F 308 309 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 310 { 311 TCGv_ptr qd; 312 TCGv_i32 rt; 313 314 if (!dc_isar_feature(aa32_mve, s) || 315 !mve_check_qreg_bank(s, a->qd)) { 316 return false; 317 } 318 if (a->rt == 13 || a->rt == 15) { 319 /* UNPREDICTABLE; we choose to UNDEF */ 320 return false; 321 } 322 if (!mve_eci_check(s) || !vfp_access_check(s)) { 323 return true; 324 } 325 326 qd = mve_qreg_ptr(a->qd); 327 rt = load_reg(s, a->rt); 328 tcg_gen_dup_i32(a->size, rt, rt); 329 gen_helper_mve_vdup(cpu_env, qd, rt); 330 tcg_temp_free_ptr(qd); 331 tcg_temp_free_i32(rt); 332 mve_update_eci(s); 333 return true; 334 } 335 336 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 337 { 338 TCGv_ptr qd, qm; 339 340 if (!dc_isar_feature(aa32_mve, s) || 341 !mve_check_qreg_bank(s, a->qd | a->qm) || 342 !fn) { 343 return false; 344 } 345 346 if (!mve_eci_check(s) || !vfp_access_check(s)) { 347 return true; 348 } 349 350 qd = mve_qreg_ptr(a->qd); 351 qm = mve_qreg_ptr(a->qm); 352 fn(cpu_env, qd, qm); 353 tcg_temp_free_ptr(qd); 354 tcg_temp_free_ptr(qm); 355 mve_update_eci(s); 356 return true; 357 } 358 359 #define DO_1OP(INSN, FN) \ 360 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 361 { \ 362 static MVEGenOneOpFn * const fns[] = { \ 363 gen_helper_mve_##FN##b, \ 364 gen_helper_mve_##FN##h, \ 365 gen_helper_mve_##FN##w, \ 366 NULL, \ 367 }; \ 368 return do_1op(s, a, fns[a->size]); \ 369 } 370 371 DO_1OP(VCLZ, vclz) 372 DO_1OP(VCLS, vcls) 373 DO_1OP(VABS, vabs) 374 DO_1OP(VNEG, vneg) 375 DO_1OP(VQABS, vqabs) 376 DO_1OP(VQNEG, vqneg) 377 DO_1OP(VMAXA, vmaxa) 378 DO_1OP(VMINA, vmina) 379 380 /* Narrowing moves: only size 0 and 1 are valid */ 381 #define DO_VMOVN(INSN, FN) \ 382 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 383 { \ 384 static MVEGenOneOpFn * const fns[] = { \ 385 gen_helper_mve_##FN##b, \ 386 gen_helper_mve_##FN##h, \ 387 NULL, \ 388 NULL, \ 389 }; \ 390 return do_1op(s, a, fns[a->size]); \ 391 } 392 393 DO_VMOVN(VMOVNB, vmovnb) 394 DO_VMOVN(VMOVNT, vmovnt) 395 DO_VMOVN(VQMOVUNB, vqmovunb) 396 DO_VMOVN(VQMOVUNT, vqmovunt) 397 DO_VMOVN(VQMOVN_BS, vqmovnbs) 398 DO_VMOVN(VQMOVN_TS, vqmovnts) 399 DO_VMOVN(VQMOVN_BU, vqmovnbu) 400 DO_VMOVN(VQMOVN_TU, vqmovntu) 401 402 static bool trans_VREV16(DisasContext *s, arg_1op *a) 403 { 404 static MVEGenOneOpFn * const fns[] = { 405 gen_helper_mve_vrev16b, 406 NULL, 407 NULL, 408 NULL, 409 }; 410 return do_1op(s, a, fns[a->size]); 411 } 412 413 static bool trans_VREV32(DisasContext *s, arg_1op *a) 414 { 415 static MVEGenOneOpFn * const fns[] = { 416 gen_helper_mve_vrev32b, 417 gen_helper_mve_vrev32h, 418 NULL, 419 NULL, 420 }; 421 return do_1op(s, a, fns[a->size]); 422 } 423 424 static bool trans_VREV64(DisasContext *s, arg_1op *a) 425 { 426 static MVEGenOneOpFn * const fns[] = { 427 gen_helper_mve_vrev64b, 428 gen_helper_mve_vrev64h, 429 gen_helper_mve_vrev64w, 430 NULL, 431 }; 432 return do_1op(s, a, fns[a->size]); 433 } 434 435 static bool trans_VMVN(DisasContext *s, arg_1op *a) 436 { 437 return do_1op(s, a, gen_helper_mve_vmvn); 438 } 439 440 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 441 { 442 static MVEGenOneOpFn * const fns[] = { 443 NULL, 444 gen_helper_mve_vfabsh, 445 gen_helper_mve_vfabss, 446 NULL, 447 }; 448 if (!dc_isar_feature(aa32_mve_fp, s)) { 449 return false; 450 } 451 return do_1op(s, a, fns[a->size]); 452 } 453 454 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 455 { 456 static MVEGenOneOpFn * const fns[] = { 457 NULL, 458 gen_helper_mve_vfnegh, 459 gen_helper_mve_vfnegs, 460 NULL, 461 }; 462 if (!dc_isar_feature(aa32_mve_fp, s)) { 463 return false; 464 } 465 return do_1op(s, a, fns[a->size]); 466 } 467 468 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn) 469 { 470 TCGv_ptr qd, qn, qm; 471 472 if (!dc_isar_feature(aa32_mve, s) || 473 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 474 !fn) { 475 return false; 476 } 477 if (!mve_eci_check(s) || !vfp_access_check(s)) { 478 return true; 479 } 480 481 qd = mve_qreg_ptr(a->qd); 482 qn = mve_qreg_ptr(a->qn); 483 qm = mve_qreg_ptr(a->qm); 484 fn(cpu_env, qd, qn, qm); 485 tcg_temp_free_ptr(qd); 486 tcg_temp_free_ptr(qn); 487 tcg_temp_free_ptr(qm); 488 mve_update_eci(s); 489 return true; 490 } 491 492 #define DO_LOGIC(INSN, HELPER) \ 493 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 494 { \ 495 return do_2op(s, a, HELPER); \ 496 } 497 498 DO_LOGIC(VAND, gen_helper_mve_vand) 499 DO_LOGIC(VBIC, gen_helper_mve_vbic) 500 DO_LOGIC(VORR, gen_helper_mve_vorr) 501 DO_LOGIC(VORN, gen_helper_mve_vorn) 502 DO_LOGIC(VEOR, gen_helper_mve_veor) 503 504 DO_LOGIC(VPSEL, gen_helper_mve_vpsel) 505 506 #define DO_2OP(INSN, FN) \ 507 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 508 { \ 509 static MVEGenTwoOpFn * const fns[] = { \ 510 gen_helper_mve_##FN##b, \ 511 gen_helper_mve_##FN##h, \ 512 gen_helper_mve_##FN##w, \ 513 NULL, \ 514 }; \ 515 return do_2op(s, a, fns[a->size]); \ 516 } 517 518 DO_2OP(VADD, vadd) 519 DO_2OP(VSUB, vsub) 520 DO_2OP(VMUL, vmul) 521 DO_2OP(VMULH_S, vmulhs) 522 DO_2OP(VMULH_U, vmulhu) 523 DO_2OP(VRMULH_S, vrmulhs) 524 DO_2OP(VRMULH_U, vrmulhu) 525 DO_2OP(VMAX_S, vmaxs) 526 DO_2OP(VMAX_U, vmaxu) 527 DO_2OP(VMIN_S, vmins) 528 DO_2OP(VMIN_U, vminu) 529 DO_2OP(VABD_S, vabds) 530 DO_2OP(VABD_U, vabdu) 531 DO_2OP(VHADD_S, vhadds) 532 DO_2OP(VHADD_U, vhaddu) 533 DO_2OP(VHSUB_S, vhsubs) 534 DO_2OP(VHSUB_U, vhsubu) 535 DO_2OP(VMULL_BS, vmullbs) 536 DO_2OP(VMULL_BU, vmullbu) 537 DO_2OP(VMULL_TS, vmullts) 538 DO_2OP(VMULL_TU, vmulltu) 539 DO_2OP(VQDMULH, vqdmulh) 540 DO_2OP(VQRDMULH, vqrdmulh) 541 DO_2OP(VQADD_S, vqadds) 542 DO_2OP(VQADD_U, vqaddu) 543 DO_2OP(VQSUB_S, vqsubs) 544 DO_2OP(VQSUB_U, vqsubu) 545 DO_2OP(VSHL_S, vshls) 546 DO_2OP(VSHL_U, vshlu) 547 DO_2OP(VRSHL_S, vrshls) 548 DO_2OP(VRSHL_U, vrshlu) 549 DO_2OP(VQSHL_S, vqshls) 550 DO_2OP(VQSHL_U, vqshlu) 551 DO_2OP(VQRSHL_S, vqrshls) 552 DO_2OP(VQRSHL_U, vqrshlu) 553 DO_2OP(VQDMLADH, vqdmladh) 554 DO_2OP(VQDMLADHX, vqdmladhx) 555 DO_2OP(VQRDMLADH, vqrdmladh) 556 DO_2OP(VQRDMLADHX, vqrdmladhx) 557 DO_2OP(VQDMLSDH, vqdmlsdh) 558 DO_2OP(VQDMLSDHX, vqdmlsdhx) 559 DO_2OP(VQRDMLSDH, vqrdmlsdh) 560 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 561 DO_2OP(VRHADD_S, vrhadds) 562 DO_2OP(VRHADD_U, vrhaddu) 563 /* 564 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 565 * so we can reuse the DO_2OP macro. (Our implementation calculates the 566 * "expected" results in this case.) Similarly for VHCADD. 567 */ 568 DO_2OP(VCADD90, vcadd90) 569 DO_2OP(VCADD270, vcadd270) 570 DO_2OP(VHCADD90, vhcadd90) 571 DO_2OP(VHCADD270, vhcadd270) 572 573 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 574 { 575 static MVEGenTwoOpFn * const fns[] = { 576 NULL, 577 gen_helper_mve_vqdmullbh, 578 gen_helper_mve_vqdmullbw, 579 NULL, 580 }; 581 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 582 /* UNPREDICTABLE; we choose to undef */ 583 return false; 584 } 585 return do_2op(s, a, fns[a->size]); 586 } 587 588 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 589 { 590 static MVEGenTwoOpFn * const fns[] = { 591 NULL, 592 gen_helper_mve_vqdmullth, 593 gen_helper_mve_vqdmulltw, 594 NULL, 595 }; 596 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 597 /* UNPREDICTABLE; we choose to undef */ 598 return false; 599 } 600 return do_2op(s, a, fns[a->size]); 601 } 602 603 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 604 { 605 /* 606 * Note that a->size indicates the output size, ie VMULL.P8 607 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 608 * is the 16x16->32 operation and a->size is MO_32. 609 */ 610 static MVEGenTwoOpFn * const fns[] = { 611 NULL, 612 gen_helper_mve_vmullpbh, 613 gen_helper_mve_vmullpbw, 614 NULL, 615 }; 616 return do_2op(s, a, fns[a->size]); 617 } 618 619 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 620 { 621 /* a->size is as for trans_VMULLP_B */ 622 static MVEGenTwoOpFn * const fns[] = { 623 NULL, 624 gen_helper_mve_vmullpth, 625 gen_helper_mve_vmullptw, 626 NULL, 627 }; 628 return do_2op(s, a, fns[a->size]); 629 } 630 631 /* 632 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 633 * of the 32-bit elements in each lane of the input vectors, where the 634 * carry-out of each add is the carry-in of the next. The initial carry 635 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 636 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 637 * These insns are subject to beat-wise execution. Partial execution 638 * of an I=1 (initial carry input fixed) insn which does not 639 * execute the first beat must start with the current FPSCR.NZCV 640 * value, not the fixed constant input. 641 */ 642 static bool trans_VADC(DisasContext *s, arg_2op *a) 643 { 644 return do_2op(s, a, gen_helper_mve_vadc); 645 } 646 647 static bool trans_VADCI(DisasContext *s, arg_2op *a) 648 { 649 if (mve_skip_first_beat(s)) { 650 return trans_VADC(s, a); 651 } 652 return do_2op(s, a, gen_helper_mve_vadci); 653 } 654 655 static bool trans_VSBC(DisasContext *s, arg_2op *a) 656 { 657 return do_2op(s, a, gen_helper_mve_vsbc); 658 } 659 660 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 661 { 662 if (mve_skip_first_beat(s)) { 663 return trans_VSBC(s, a); 664 } 665 return do_2op(s, a, gen_helper_mve_vsbci); 666 } 667 668 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 669 MVEGenTwoOpScalarFn fn) 670 { 671 TCGv_ptr qd, qn; 672 TCGv_i32 rm; 673 674 if (!dc_isar_feature(aa32_mve, s) || 675 !mve_check_qreg_bank(s, a->qd | a->qn) || 676 !fn) { 677 return false; 678 } 679 if (a->rm == 13 || a->rm == 15) { 680 /* UNPREDICTABLE */ 681 return false; 682 } 683 if (!mve_eci_check(s) || !vfp_access_check(s)) { 684 return true; 685 } 686 687 qd = mve_qreg_ptr(a->qd); 688 qn = mve_qreg_ptr(a->qn); 689 rm = load_reg(s, a->rm); 690 fn(cpu_env, qd, qn, rm); 691 tcg_temp_free_i32(rm); 692 tcg_temp_free_ptr(qd); 693 tcg_temp_free_ptr(qn); 694 mve_update_eci(s); 695 return true; 696 } 697 698 #define DO_2OP_SCALAR(INSN, FN) \ 699 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 700 { \ 701 static MVEGenTwoOpScalarFn * const fns[] = { \ 702 gen_helper_mve_##FN##b, \ 703 gen_helper_mve_##FN##h, \ 704 gen_helper_mve_##FN##w, \ 705 NULL, \ 706 }; \ 707 return do_2op_scalar(s, a, fns[a->size]); \ 708 } 709 710 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 711 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 712 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 713 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 714 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 715 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 716 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 717 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 718 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 719 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 720 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 721 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 722 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 723 DO_2OP_SCALAR(VBRSR, vbrsr) 724 DO_2OP_SCALAR(VMLA, vmla) 725 DO_2OP_SCALAR(VMLAS, vmlas) 726 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 727 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 728 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 729 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 730 731 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 732 { 733 static MVEGenTwoOpScalarFn * const fns[] = { 734 NULL, 735 gen_helper_mve_vqdmullb_scalarh, 736 gen_helper_mve_vqdmullb_scalarw, 737 NULL, 738 }; 739 if (a->qd == a->qn && a->size == MO_32) { 740 /* UNPREDICTABLE; we choose to undef */ 741 return false; 742 } 743 return do_2op_scalar(s, a, fns[a->size]); 744 } 745 746 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 747 { 748 static MVEGenTwoOpScalarFn * const fns[] = { 749 NULL, 750 gen_helper_mve_vqdmullt_scalarh, 751 gen_helper_mve_vqdmullt_scalarw, 752 NULL, 753 }; 754 if (a->qd == a->qn && a->size == MO_32) { 755 /* UNPREDICTABLE; we choose to undef */ 756 return false; 757 } 758 return do_2op_scalar(s, a, fns[a->size]); 759 } 760 761 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 762 MVEGenLongDualAccOpFn *fn) 763 { 764 TCGv_ptr qn, qm; 765 TCGv_i64 rda; 766 TCGv_i32 rdalo, rdahi; 767 768 if (!dc_isar_feature(aa32_mve, s) || 769 !mve_check_qreg_bank(s, a->qn | a->qm) || 770 !fn) { 771 return false; 772 } 773 /* 774 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 775 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 776 */ 777 if (a->rdahi == 13 || a->rdahi == 15) { 778 return false; 779 } 780 if (!mve_eci_check(s) || !vfp_access_check(s)) { 781 return true; 782 } 783 784 qn = mve_qreg_ptr(a->qn); 785 qm = mve_qreg_ptr(a->qm); 786 787 /* 788 * This insn is subject to beat-wise execution. Partial execution 789 * of an A=0 (no-accumulate) insn which does not execute the first 790 * beat must start with the current rda value, not 0. 791 */ 792 if (a->a || mve_skip_first_beat(s)) { 793 rda = tcg_temp_new_i64(); 794 rdalo = load_reg(s, a->rdalo); 795 rdahi = load_reg(s, a->rdahi); 796 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 797 tcg_temp_free_i32(rdalo); 798 tcg_temp_free_i32(rdahi); 799 } else { 800 rda = tcg_const_i64(0); 801 } 802 803 fn(rda, cpu_env, qn, qm, rda); 804 tcg_temp_free_ptr(qn); 805 tcg_temp_free_ptr(qm); 806 807 rdalo = tcg_temp_new_i32(); 808 rdahi = tcg_temp_new_i32(); 809 tcg_gen_extrl_i64_i32(rdalo, rda); 810 tcg_gen_extrh_i64_i32(rdahi, rda); 811 store_reg(s, a->rdalo, rdalo); 812 store_reg(s, a->rdahi, rdahi); 813 tcg_temp_free_i64(rda); 814 mve_update_eci(s); 815 return true; 816 } 817 818 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 819 { 820 static MVEGenLongDualAccOpFn * const fns[4][2] = { 821 { NULL, NULL }, 822 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 823 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 824 { NULL, NULL }, 825 }; 826 return do_long_dual_acc(s, a, fns[a->size][a->x]); 827 } 828 829 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 830 { 831 static MVEGenLongDualAccOpFn * const fns[4][2] = { 832 { NULL, NULL }, 833 { gen_helper_mve_vmlaldavuh, NULL }, 834 { gen_helper_mve_vmlaldavuw, NULL }, 835 { NULL, NULL }, 836 }; 837 return do_long_dual_acc(s, a, fns[a->size][a->x]); 838 } 839 840 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 841 { 842 static MVEGenLongDualAccOpFn * const fns[4][2] = { 843 { NULL, NULL }, 844 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 845 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 846 { NULL, NULL }, 847 }; 848 return do_long_dual_acc(s, a, fns[a->size][a->x]); 849 } 850 851 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 852 { 853 static MVEGenLongDualAccOpFn * const fns[] = { 854 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 855 }; 856 return do_long_dual_acc(s, a, fns[a->x]); 857 } 858 859 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 860 { 861 static MVEGenLongDualAccOpFn * const fns[] = { 862 gen_helper_mve_vrmlaldavhuw, NULL, 863 }; 864 return do_long_dual_acc(s, a, fns[a->x]); 865 } 866 867 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 868 { 869 static MVEGenLongDualAccOpFn * const fns[] = { 870 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 871 }; 872 return do_long_dual_acc(s, a, fns[a->x]); 873 } 874 875 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 876 { 877 TCGv_ptr qn, qm; 878 TCGv_i32 rda; 879 880 if (!dc_isar_feature(aa32_mve, s) || 881 !mve_check_qreg_bank(s, a->qn) || 882 !fn) { 883 return false; 884 } 885 if (!mve_eci_check(s) || !vfp_access_check(s)) { 886 return true; 887 } 888 889 qn = mve_qreg_ptr(a->qn); 890 qm = mve_qreg_ptr(a->qm); 891 892 /* 893 * This insn is subject to beat-wise execution. Partial execution 894 * of an A=0 (no-accumulate) insn which does not execute the first 895 * beat must start with the current rda value, not 0. 896 */ 897 if (a->a || mve_skip_first_beat(s)) { 898 rda = load_reg(s, a->rda); 899 } else { 900 rda = tcg_const_i32(0); 901 } 902 903 fn(rda, cpu_env, qn, qm, rda); 904 store_reg(s, a->rda, rda); 905 tcg_temp_free_ptr(qn); 906 tcg_temp_free_ptr(qm); 907 908 mve_update_eci(s); 909 return true; 910 } 911 912 #define DO_DUAL_ACC(INSN, FN) \ 913 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 914 { \ 915 static MVEGenDualAccOpFn * const fns[4][2] = { \ 916 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 917 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 918 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 919 { NULL, NULL }, \ 920 }; \ 921 return do_dual_acc(s, a, fns[a->size][a->x]); \ 922 } 923 924 DO_DUAL_ACC(VMLADAV_S, vmladavs) 925 DO_DUAL_ACC(VMLSDAV, vmlsdav) 926 927 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 928 { 929 static MVEGenDualAccOpFn * const fns[4][2] = { 930 { gen_helper_mve_vmladavub, NULL }, 931 { gen_helper_mve_vmladavuh, NULL }, 932 { gen_helper_mve_vmladavuw, NULL }, 933 { NULL, NULL }, 934 }; 935 return do_dual_acc(s, a, fns[a->size][a->x]); 936 } 937 938 static void gen_vpst(DisasContext *s, uint32_t mask) 939 { 940 /* 941 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 942 * being adjacent fields in the register. 943 * 944 * Updating the masks is not predicated, but it is subject to beat-wise 945 * execution, and the mask is updated on the odd-numbered beats. 946 * So if PSR.ECI says we should skip beat 1, we mustn't update the 947 * 01 mask field. 948 */ 949 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 950 switch (s->eci) { 951 case ECI_NONE: 952 case ECI_A0: 953 /* Update both 01 and 23 fields */ 954 tcg_gen_deposit_i32(vpr, vpr, 955 tcg_constant_i32(mask | (mask << 4)), 956 R_V7M_VPR_MASK01_SHIFT, 957 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 958 break; 959 case ECI_A0A1: 960 case ECI_A0A1A2: 961 case ECI_A0A1A2B0: 962 /* Update only the 23 mask field */ 963 tcg_gen_deposit_i32(vpr, vpr, 964 tcg_constant_i32(mask), 965 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 966 break; 967 default: 968 g_assert_not_reached(); 969 } 970 store_cpu_field(vpr, v7m.vpr); 971 } 972 973 static bool trans_VPST(DisasContext *s, arg_VPST *a) 974 { 975 /* mask == 0 is a "related encoding" */ 976 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 977 return false; 978 } 979 if (!mve_eci_check(s) || !vfp_access_check(s)) { 980 return true; 981 } 982 gen_vpst(s, a->mask); 983 mve_update_and_store_eci(s); 984 return true; 985 } 986 987 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 988 { 989 /* 990 * Invert the predicate in VPR.P0. We have call out to 991 * a helper because this insn itself is beatwise and can 992 * be predicated. 993 */ 994 if (!dc_isar_feature(aa32_mve, s)) { 995 return false; 996 } 997 if (!mve_eci_check(s) || !vfp_access_check(s)) { 998 return true; 999 } 1000 1001 gen_helper_mve_vpnot(cpu_env); 1002 mve_update_eci(s); 1003 return true; 1004 } 1005 1006 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1007 { 1008 /* VADDV: vector add across vector */ 1009 static MVEGenVADDVFn * const fns[4][2] = { 1010 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1011 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1012 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1013 { NULL, NULL } 1014 }; 1015 TCGv_ptr qm; 1016 TCGv_i32 rda; 1017 1018 if (!dc_isar_feature(aa32_mve, s) || 1019 a->size == 3) { 1020 return false; 1021 } 1022 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1023 return true; 1024 } 1025 1026 /* 1027 * This insn is subject to beat-wise execution. Partial execution 1028 * of an A=0 (no-accumulate) insn which does not execute the first 1029 * beat must start with the current value of Rda, not zero. 1030 */ 1031 if (a->a || mve_skip_first_beat(s)) { 1032 /* Accumulate input from Rda */ 1033 rda = load_reg(s, a->rda); 1034 } else { 1035 /* Accumulate starting at zero */ 1036 rda = tcg_const_i32(0); 1037 } 1038 1039 qm = mve_qreg_ptr(a->qm); 1040 fns[a->size][a->u](rda, cpu_env, qm, rda); 1041 store_reg(s, a->rda, rda); 1042 tcg_temp_free_ptr(qm); 1043 1044 mve_update_eci(s); 1045 return true; 1046 } 1047 1048 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1049 { 1050 /* 1051 * Vector Add Long Across Vector: accumulate the 32-bit 1052 * elements of the vector into a 64-bit result stored in 1053 * a pair of general-purpose registers. 1054 * No need to check Qm's bank: it is only 3 bits in decode. 1055 */ 1056 TCGv_ptr qm; 1057 TCGv_i64 rda; 1058 TCGv_i32 rdalo, rdahi; 1059 1060 if (!dc_isar_feature(aa32_mve, s)) { 1061 return false; 1062 } 1063 /* 1064 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1065 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1066 */ 1067 if (a->rdahi == 13 || a->rdahi == 15) { 1068 return false; 1069 } 1070 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1071 return true; 1072 } 1073 1074 /* 1075 * This insn is subject to beat-wise execution. Partial execution 1076 * of an A=0 (no-accumulate) insn which does not execute the first 1077 * beat must start with the current value of RdaHi:RdaLo, not zero. 1078 */ 1079 if (a->a || mve_skip_first_beat(s)) { 1080 /* Accumulate input from RdaHi:RdaLo */ 1081 rda = tcg_temp_new_i64(); 1082 rdalo = load_reg(s, a->rdalo); 1083 rdahi = load_reg(s, a->rdahi); 1084 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1085 tcg_temp_free_i32(rdalo); 1086 tcg_temp_free_i32(rdahi); 1087 } else { 1088 /* Accumulate starting at zero */ 1089 rda = tcg_const_i64(0); 1090 } 1091 1092 qm = mve_qreg_ptr(a->qm); 1093 if (a->u) { 1094 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1095 } else { 1096 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1097 } 1098 tcg_temp_free_ptr(qm); 1099 1100 rdalo = tcg_temp_new_i32(); 1101 rdahi = tcg_temp_new_i32(); 1102 tcg_gen_extrl_i64_i32(rdalo, rda); 1103 tcg_gen_extrh_i64_i32(rdahi, rda); 1104 store_reg(s, a->rdalo, rdalo); 1105 store_reg(s, a->rdahi, rdahi); 1106 tcg_temp_free_i64(rda); 1107 mve_update_eci(s); 1108 return true; 1109 } 1110 1111 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn) 1112 { 1113 TCGv_ptr qd; 1114 uint64_t imm; 1115 1116 if (!dc_isar_feature(aa32_mve, s) || 1117 !mve_check_qreg_bank(s, a->qd) || 1118 !fn) { 1119 return false; 1120 } 1121 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1122 return true; 1123 } 1124 1125 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1126 1127 qd = mve_qreg_ptr(a->qd); 1128 fn(cpu_env, qd, tcg_constant_i64(imm)); 1129 tcg_temp_free_ptr(qd); 1130 mve_update_eci(s); 1131 return true; 1132 } 1133 1134 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1135 { 1136 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1137 MVEGenOneOpImmFn *fn; 1138 1139 if ((a->cmode & 1) && a->cmode < 12) { 1140 if (a->op) { 1141 /* 1142 * For op=1, the immediate will be inverted by asimd_imm_const(), 1143 * so the VBIC becomes a logical AND operation. 1144 */ 1145 fn = gen_helper_mve_vandi; 1146 } else { 1147 fn = gen_helper_mve_vorri; 1148 } 1149 } else { 1150 /* There is one unallocated cmode/op combination in this space */ 1151 if (a->cmode == 15 && a->op == 1) { 1152 return false; 1153 } 1154 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1155 fn = gen_helper_mve_vmovi; 1156 } 1157 return do_1imm(s, a, fn); 1158 } 1159 1160 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1161 bool negateshift) 1162 { 1163 TCGv_ptr qd, qm; 1164 int shift = a->shift; 1165 1166 if (!dc_isar_feature(aa32_mve, s) || 1167 !mve_check_qreg_bank(s, a->qd | a->qm) || 1168 !fn) { 1169 return false; 1170 } 1171 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1172 return true; 1173 } 1174 1175 /* 1176 * When we handle a right shift insn using a left-shift helper 1177 * which permits a negative shift count to indicate a right-shift, 1178 * we must negate the shift count. 1179 */ 1180 if (negateshift) { 1181 shift = -shift; 1182 } 1183 1184 qd = mve_qreg_ptr(a->qd); 1185 qm = mve_qreg_ptr(a->qm); 1186 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1187 tcg_temp_free_ptr(qd); 1188 tcg_temp_free_ptr(qm); 1189 mve_update_eci(s); 1190 return true; 1191 } 1192 1193 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1194 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1195 { \ 1196 static MVEGenTwoOpShiftFn * const fns[] = { \ 1197 gen_helper_mve_##FN##b, \ 1198 gen_helper_mve_##FN##h, \ 1199 gen_helper_mve_##FN##w, \ 1200 NULL, \ 1201 }; \ 1202 return do_2shift(s, a, fns[a->size], NEGATESHIFT); \ 1203 } 1204 1205 DO_2SHIFT(VSHLI, vshli_u, false) 1206 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1207 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1208 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1209 /* These right shifts use a left-shift helper with negated shift count */ 1210 DO_2SHIFT(VSHRI_S, vshli_s, true) 1211 DO_2SHIFT(VSHRI_U, vshli_u, true) 1212 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1213 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1214 1215 DO_2SHIFT(VSRI, vsri, false) 1216 DO_2SHIFT(VSLI, vsli, false) 1217 1218 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1219 MVEGenTwoOpShiftFn *fn) 1220 { 1221 TCGv_ptr qda; 1222 TCGv_i32 rm; 1223 1224 if (!dc_isar_feature(aa32_mve, s) || 1225 !mve_check_qreg_bank(s, a->qda) || 1226 a->rm == 13 || a->rm == 15 || !fn) { 1227 /* Rm cases are UNPREDICTABLE */ 1228 return false; 1229 } 1230 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1231 return true; 1232 } 1233 1234 qda = mve_qreg_ptr(a->qda); 1235 rm = load_reg(s, a->rm); 1236 fn(cpu_env, qda, qda, rm); 1237 tcg_temp_free_ptr(qda); 1238 tcg_temp_free_i32(rm); 1239 mve_update_eci(s); 1240 return true; 1241 } 1242 1243 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1244 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1245 { \ 1246 static MVEGenTwoOpShiftFn * const fns[] = { \ 1247 gen_helper_mve_##FN##b, \ 1248 gen_helper_mve_##FN##h, \ 1249 gen_helper_mve_##FN##w, \ 1250 NULL, \ 1251 }; \ 1252 return do_2shift_scalar(s, a, fns[a->size]); \ 1253 } 1254 1255 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1256 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1257 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1258 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1259 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1260 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1261 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1262 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1263 1264 #define DO_VSHLL(INSN, FN) \ 1265 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1266 { \ 1267 static MVEGenTwoOpShiftFn * const fns[] = { \ 1268 gen_helper_mve_##FN##b, \ 1269 gen_helper_mve_##FN##h, \ 1270 }; \ 1271 return do_2shift(s, a, fns[a->size], false); \ 1272 } 1273 1274 DO_VSHLL(VSHLL_BS, vshllbs) 1275 DO_VSHLL(VSHLL_BU, vshllbu) 1276 DO_VSHLL(VSHLL_TS, vshllts) 1277 DO_VSHLL(VSHLL_TU, vshlltu) 1278 1279 #define DO_2SHIFT_N(INSN, FN) \ 1280 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1281 { \ 1282 static MVEGenTwoOpShiftFn * const fns[] = { \ 1283 gen_helper_mve_##FN##b, \ 1284 gen_helper_mve_##FN##h, \ 1285 }; \ 1286 return do_2shift(s, a, fns[a->size], false); \ 1287 } 1288 1289 DO_2SHIFT_N(VSHRNB, vshrnb) 1290 DO_2SHIFT_N(VSHRNT, vshrnt) 1291 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1292 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1293 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1294 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1295 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1296 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1297 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1298 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1299 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1300 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1301 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1302 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1303 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1304 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1305 1306 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1307 { 1308 /* 1309 * Whole Vector Left Shift with Carry. The carry is taken 1310 * from a general purpose register and written back there. 1311 * An imm of 0 means "shift by 32". 1312 */ 1313 TCGv_ptr qd; 1314 TCGv_i32 rdm; 1315 1316 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1317 return false; 1318 } 1319 if (a->rdm == 13 || a->rdm == 15) { 1320 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1321 return false; 1322 } 1323 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1324 return true; 1325 } 1326 1327 qd = mve_qreg_ptr(a->qd); 1328 rdm = load_reg(s, a->rdm); 1329 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1330 store_reg(s, a->rdm, rdm); 1331 tcg_temp_free_ptr(qd); 1332 mve_update_eci(s); 1333 return true; 1334 } 1335 1336 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1337 { 1338 TCGv_ptr qd; 1339 TCGv_i32 rn; 1340 1341 /* 1342 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1343 * This fills the vector with elements of successively increasing 1344 * or decreasing values, starting from Rn. 1345 */ 1346 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1347 return false; 1348 } 1349 if (a->size == MO_64) { 1350 /* size 0b11 is another encoding */ 1351 return false; 1352 } 1353 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1354 return true; 1355 } 1356 1357 qd = mve_qreg_ptr(a->qd); 1358 rn = load_reg(s, a->rn); 1359 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1360 store_reg(s, a->rn, rn); 1361 tcg_temp_free_ptr(qd); 1362 mve_update_eci(s); 1363 return true; 1364 } 1365 1366 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1367 { 1368 TCGv_ptr qd; 1369 TCGv_i32 rn, rm; 1370 1371 /* 1372 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1373 * This fills the vector with elements of successively increasing 1374 * or decreasing values, starting from Rn. Rm specifies a point where 1375 * the count wraps back around to 0. The updated offset is written back 1376 * to Rn. 1377 */ 1378 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1379 return false; 1380 } 1381 if (!fn || a->rm == 13 || a->rm == 15) { 1382 /* 1383 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1384 * Rm == 13 is VIWDUP, VDWDUP. 1385 */ 1386 return false; 1387 } 1388 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1389 return true; 1390 } 1391 1392 qd = mve_qreg_ptr(a->qd); 1393 rn = load_reg(s, a->rn); 1394 rm = load_reg(s, a->rm); 1395 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1396 store_reg(s, a->rn, rn); 1397 tcg_temp_free_ptr(qd); 1398 tcg_temp_free_i32(rm); 1399 mve_update_eci(s); 1400 return true; 1401 } 1402 1403 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1404 { 1405 static MVEGenVIDUPFn * const fns[] = { 1406 gen_helper_mve_vidupb, 1407 gen_helper_mve_viduph, 1408 gen_helper_mve_vidupw, 1409 NULL, 1410 }; 1411 return do_vidup(s, a, fns[a->size]); 1412 } 1413 1414 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1415 { 1416 static MVEGenVIDUPFn * const fns[] = { 1417 gen_helper_mve_vidupb, 1418 gen_helper_mve_viduph, 1419 gen_helper_mve_vidupw, 1420 NULL, 1421 }; 1422 /* VDDUP is just like VIDUP but with a negative immediate */ 1423 a->imm = -a->imm; 1424 return do_vidup(s, a, fns[a->size]); 1425 } 1426 1427 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1428 { 1429 static MVEGenVIWDUPFn * const fns[] = { 1430 gen_helper_mve_viwdupb, 1431 gen_helper_mve_viwduph, 1432 gen_helper_mve_viwdupw, 1433 NULL, 1434 }; 1435 return do_viwdup(s, a, fns[a->size]); 1436 } 1437 1438 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1439 { 1440 static MVEGenVIWDUPFn * const fns[] = { 1441 gen_helper_mve_vdwdupb, 1442 gen_helper_mve_vdwduph, 1443 gen_helper_mve_vdwdupw, 1444 NULL, 1445 }; 1446 return do_viwdup(s, a, fns[a->size]); 1447 } 1448 1449 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1450 { 1451 TCGv_ptr qn, qm; 1452 1453 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1454 !fn) { 1455 return false; 1456 } 1457 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1458 return true; 1459 } 1460 1461 qn = mve_qreg_ptr(a->qn); 1462 qm = mve_qreg_ptr(a->qm); 1463 fn(cpu_env, qn, qm); 1464 tcg_temp_free_ptr(qn); 1465 tcg_temp_free_ptr(qm); 1466 if (a->mask) { 1467 /* VPT */ 1468 gen_vpst(s, a->mask); 1469 } 1470 mve_update_eci(s); 1471 return true; 1472 } 1473 1474 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1475 MVEGenScalarCmpFn *fn) 1476 { 1477 TCGv_ptr qn; 1478 TCGv_i32 rm; 1479 1480 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1481 return false; 1482 } 1483 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1484 return true; 1485 } 1486 1487 qn = mve_qreg_ptr(a->qn); 1488 if (a->rm == 15) { 1489 /* Encoding Rm=0b1111 means "constant zero" */ 1490 rm = tcg_constant_i32(0); 1491 } else { 1492 rm = load_reg(s, a->rm); 1493 } 1494 fn(cpu_env, qn, rm); 1495 tcg_temp_free_ptr(qn); 1496 tcg_temp_free_i32(rm); 1497 if (a->mask) { 1498 /* VPT */ 1499 gen_vpst(s, a->mask); 1500 } 1501 mve_update_eci(s); 1502 return true; 1503 } 1504 1505 #define DO_VCMP(INSN, FN) \ 1506 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 1507 { \ 1508 static MVEGenCmpFn * const fns[] = { \ 1509 gen_helper_mve_##FN##b, \ 1510 gen_helper_mve_##FN##h, \ 1511 gen_helper_mve_##FN##w, \ 1512 NULL, \ 1513 }; \ 1514 return do_vcmp(s, a, fns[a->size]); \ 1515 } \ 1516 static bool trans_##INSN##_scalar(DisasContext *s, \ 1517 arg_vcmp_scalar *a) \ 1518 { \ 1519 static MVEGenScalarCmpFn * const fns[] = { \ 1520 gen_helper_mve_##FN##_scalarb, \ 1521 gen_helper_mve_##FN##_scalarh, \ 1522 gen_helper_mve_##FN##_scalarw, \ 1523 NULL, \ 1524 }; \ 1525 return do_vcmp_scalar(s, a, fns[a->size]); \ 1526 } 1527 1528 DO_VCMP(VCMPEQ, vcmpeq) 1529 DO_VCMP(VCMPNE, vcmpne) 1530 DO_VCMP(VCMPCS, vcmpcs) 1531 DO_VCMP(VCMPHI, vcmphi) 1532 DO_VCMP(VCMPGE, vcmpge) 1533 DO_VCMP(VCMPLT, vcmplt) 1534 DO_VCMP(VCMPGT, vcmpgt) 1535 DO_VCMP(VCMPLE, vcmple) 1536 1537 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 1538 { 1539 /* 1540 * MIN/MAX operations across a vector: compute the min or 1541 * max of the initial value in a general purpose register 1542 * and all the elements in the vector, and store it back 1543 * into the general purpose register. 1544 */ 1545 TCGv_ptr qm; 1546 TCGv_i32 rda; 1547 1548 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1549 !fn || a->rda == 13 || a->rda == 15) { 1550 /* Rda cases are UNPREDICTABLE */ 1551 return false; 1552 } 1553 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1554 return true; 1555 } 1556 1557 qm = mve_qreg_ptr(a->qm); 1558 rda = load_reg(s, a->rda); 1559 fn(rda, cpu_env, qm, rda); 1560 store_reg(s, a->rda, rda); 1561 tcg_temp_free_ptr(qm); 1562 mve_update_eci(s); 1563 return true; 1564 } 1565 1566 #define DO_VMAXV(INSN, FN) \ 1567 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 1568 { \ 1569 static MVEGenVADDVFn * const fns[] = { \ 1570 gen_helper_mve_##FN##b, \ 1571 gen_helper_mve_##FN##h, \ 1572 gen_helper_mve_##FN##w, \ 1573 NULL, \ 1574 }; \ 1575 return do_vmaxv(s, a, fns[a->size]); \ 1576 } 1577 1578 DO_VMAXV(VMAXV_S, vmaxvs) 1579 DO_VMAXV(VMAXV_U, vmaxvu) 1580 DO_VMAXV(VMAXAV, vmaxav) 1581 DO_VMAXV(VMINV_S, vminvs) 1582 DO_VMAXV(VMINV_U, vminvu) 1583 DO_VMAXV(VMINAV, vminav) 1584 1585 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 1586 { 1587 /* Absolute difference accumulated across vector */ 1588 TCGv_ptr qn, qm; 1589 TCGv_i32 rda; 1590 1591 if (!dc_isar_feature(aa32_mve, s) || 1592 !mve_check_qreg_bank(s, a->qm | a->qn) || 1593 !fn || a->rda == 13 || a->rda == 15) { 1594 /* Rda cases are UNPREDICTABLE */ 1595 return false; 1596 } 1597 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1598 return true; 1599 } 1600 1601 qm = mve_qreg_ptr(a->qm); 1602 qn = mve_qreg_ptr(a->qn); 1603 rda = load_reg(s, a->rda); 1604 fn(rda, cpu_env, qn, qm, rda); 1605 store_reg(s, a->rda, rda); 1606 tcg_temp_free_ptr(qm); 1607 tcg_temp_free_ptr(qn); 1608 mve_update_eci(s); 1609 return true; 1610 } 1611 1612 #define DO_VABAV(INSN, FN) \ 1613 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 1614 { \ 1615 static MVEGenVABAVFn * const fns[] = { \ 1616 gen_helper_mve_##FN##b, \ 1617 gen_helper_mve_##FN##h, \ 1618 gen_helper_mve_##FN##w, \ 1619 NULL, \ 1620 }; \ 1621 return do_vabav(s, a, fns[a->size]); \ 1622 } 1623 1624 DO_VABAV(VABAV_S, vabavs) 1625 DO_VABAV(VABAV_U, vabavu) 1626 1627 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1628 { 1629 /* 1630 * VMOV two 32-bit vector lanes to two general-purpose registers. 1631 * This insn is not predicated but it is subject to beat-wise 1632 * execution if it is not in an IT block. For us this means 1633 * only that if PSR.ECI says we should not be executing the beat 1634 * corresponding to the lane of the vector register being accessed 1635 * then we should skip perfoming the move, and that we need to do 1636 * the usual check for bad ECI state and advance of ECI state. 1637 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1638 */ 1639 TCGv_i32 tmp; 1640 int vd; 1641 1642 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1643 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 1644 a->rt == a->rt2) { 1645 /* Rt/Rt2 cases are UNPREDICTABLE */ 1646 return false; 1647 } 1648 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1649 return true; 1650 } 1651 1652 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 1653 vd = a->qd * 2; 1654 1655 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1656 tmp = tcg_temp_new_i32(); 1657 read_neon_element32(tmp, vd, a->idx, MO_32); 1658 store_reg(s, a->rt, tmp); 1659 } 1660 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1661 tmp = tcg_temp_new_i32(); 1662 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 1663 store_reg(s, a->rt2, tmp); 1664 } 1665 1666 mve_update_and_store_eci(s); 1667 return true; 1668 } 1669 1670 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 1671 { 1672 /* 1673 * VMOV two general-purpose registers to two 32-bit vector lanes. 1674 * This insn is not predicated but it is subject to beat-wise 1675 * execution if it is not in an IT block. For us this means 1676 * only that if PSR.ECI says we should not be executing the beat 1677 * corresponding to the lane of the vector register being accessed 1678 * then we should skip perfoming the move, and that we need to do 1679 * the usual check for bad ECI state and advance of ECI state. 1680 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 1681 */ 1682 TCGv_i32 tmp; 1683 int vd; 1684 1685 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 1686 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 1687 /* Rt/Rt2 cases are UNPREDICTABLE */ 1688 return false; 1689 } 1690 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1691 return true; 1692 } 1693 1694 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 1695 vd = a->qd * 2; 1696 1697 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 1698 tmp = load_reg(s, a->rt); 1699 write_neon_element32(tmp, vd, a->idx, MO_32); 1700 tcg_temp_free_i32(tmp); 1701 } 1702 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 1703 tmp = load_reg(s, a->rt2); 1704 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 1705 tcg_temp_free_i32(tmp); 1706 } 1707 1708 mve_update_and_store_eci(s); 1709 return true; 1710 } 1711